Source code for

"""This module contains functions to read from QBox output files"""

from ase import Atom, Atoms
from ase.calculators.singlepoint import SinglePointCalculator
from ase.utils import reader

import re
import xml.etree.ElementTree as ET

# Compile regexs for fixing XML
re_find_bad_xml = re.compile(r'<(/?)([A-z]+) expectation ([a-z]+)')

[docs]@reader def read_qbox(f, index=-1): """Read data from QBox output file Inputs: f - str or fileobj, path to file or file object to read from index - int or slice, which frames to return Returns: list of Atoms or atoms, requested frame(s) """ # Check whether this is a QB@all output version = None for line in f: if '<release>' in line: version = ET.fromstring(line) break if version is None: raise Exception('Parse Error: Version not found') is_qball = 'qb@LL' in version.text or 'qball' in version.text # Load in atomic species species = dict() if is_qball: # Read all of the lines between release and the first call to `run` species_data = [] for line in f: if '<run' in line: break species_data.append(line) species_data = '\n'.join(species_data) # Read out the species information with regular expressions symbols = re.findall('symbol_ = ([A-Z][a-z]?)', species_data) masses = re.findall('mass_ = ([0-9.]+)', species_data) names = re.findall('name_ = ([a-z]+)', species_data) numbers = re.findall('atomic_number_ = ([0-9]+)', species_data) # Compile them into a dictionary for name, symbol, mass, number in zip(names, symbols, masses, numbers): spec_data = dict( symbol=symbol, mass=float(mass), number=float(number) ) species[name] = spec_data else: # Find all species species_blocks = _find_blocks(f, 'species', '<cmd>run') for spec in species_blocks: name = spec.get('name') spec_data = dict( symbol=spec.find('symbol').text, mass=float(spec.find('mass').text), number=int(spec.find('atomic_number').text)) species[name] = spec_data # Find all of the frames frames = _find_blocks(f, 'iteration', None) # If index is an int, return one frame if isinstance(index, int): return _parse_frame(frames[index], species) else: return [_parse_frame(frame, species) for frame in frames[index]]
def _find_blocks(fp, tag, stopwords='[qbox]'): """Find and parse a certain block of the file. Reads a file sequentially and stops when it either encounters the end of the file, or until the it encounters a line that contains a user-defined string *after it has already found at least one desired block*. Use the stopwords ``[qbox]`` to read until the next command is issued. Groups the text between the first line that contains <tag> and the next line that contains </tag>, inclusively. The function then parses the XML and returns the Element object. Inputs: fp - file-like object, file to be read from tag - str, tag to search for (e.g., 'iteration'). `None` if you want to read until the end of the file stopwords - str, halt parsing if a line containing this string is encountered Returns: list of xml.ElementTree, parsed XML blocks found by this class """ start_tag = '<%s' % tag end_tag = '</%s>' % tag blocks = [] # Stores all blocks cur_block = [] # Block being filled in_block = False # Whether we are currently parsing for line in fp: # Check if the block has started if start_tag in line: if in_block: raise Exception('Parsing failed: Encountered nested block') else: in_block = True # Add data to block if in_block: cur_block.append(line) # Check for stopping conditions if stopwords is not None: if stopwords in line and len(blocks) > 0: break if end_tag in line: if in_block: blocks.append(cur_block) cur_block = [] in_block = False else: raise Exception('Parsing failed: End tag found before start ' 'tag') # Join strings in a block into a single string blocks = [''.join(b) for b in blocks] # Ensure XML compatibility. There are two specific tags in QBall that are # not valid XML, so we need to run a blocks = [re_find_bad_xml.sub(r'<\1\2_expectation_\3', b) for b in blocks] # Parse the blocks return [ET.fromstring(b) for b in blocks] def _parse_frame(tree, species): """Parse a certain frame from QBOX output Inputs: tree - ElementTree, <iteration> block from output file species - dict, data about species. Key is name of atom type, value is data about that type Return: Atoms object describing this iteration""" # Load in data about the system energy = float(tree.find("etotal").text) # Load in data about the cell unitcell = tree.find('atomset').find('unit_cell') cell = [] for d in ['a', 'b', 'c']: cell.append([float(x) for x in unitcell.get(d).split()]) stress_tree = tree.find('stress_tensor') if stress_tree is None: stresses = None else: stresses = [float(stress_tree.find('sigma_%s' % x).text) for x in ['xx', 'yy', 'zz', 'yz', 'xz', 'xy']] # Create the Atoms object atoms = Atoms(pbc=True, cell=cell) # Load in the atom information forces = [] for atom in tree.find('atomset').findall('atom'): # Load data about the atom type spec = atom.get('species') symbol = species[spec]['symbol'] mass = species[spec]['mass'] # Get data about position / velocity / force pos = [float(x) for x in atom.find('position').text.split()] force = [float(x) for x in atom.find('force').text.split()] momentum = [float(x) * mass for x in atom.find('velocity').text.split()] # Create the objects atom = Atom(symbol=symbol, mass=mass, position=pos, momentum=momentum) atoms += atom forces.append(force) # Create the calculator object that holds energy/forces calc = SinglePointCalculator(atoms, energy=energy, forces=forces, stress=stresses) atoms.calc = calc return atoms