Source code for ase.formula

import re
from functools import lru_cache
from math import gcd
from typing import Dict, List, Sequence, Tuple, Union

from ase.data import atomic_numbers, chemical_symbols

# For type hints (A, A2, A+B):
Tree = Union[str, Tuple['Tree', int], List['Tree']]  # type: ignore


[docs]class Formula: def __init__(self, formula: str = '', *, strict: bool = False, format: str = '', _tree: Tree = None, _count: Dict[str, int] = None): """Chemical formula object. Parameters ---------- formula: str Text string representation of formula. Examples: ``'6CO2'``, ``'30Cu+2CO'``, ``'Pt(CO)6'``. strict: bool Only allow real chemical symbols. format: str Reorder according to *format*. Must be one of hill, metal, ab2, a2b, periodic or reduce. Examples -------- >>> from ase.formula import Formula >>> w = Formula('H2O') >>> w.count() {'H': 2, 'O': 1} >>> 'H' in w True >>> w == 'HOH' True >>> f'{w:latex}' 'H$_{2}$O' >>> w.format('latex') 'H$_{2}$O' >>> divmod(6 * w + 'Cu', w) (6, Formula('Cu')) Raises ------ ValueError on malformed formula """ if format: assert _tree is None and _count is None if format not in {'hill', 'metal', 'abc', 'reduce', 'ab2', 'a2b', 'periodic'}: raise ValueError(f'Illegal format: {format}') formula = Formula(formula).format(format) self._formula = formula self._tree = _tree or parse(formula) self._count = _count or count_tree(self._tree) if strict: for symbol in self._count: if symbol not in atomic_numbers: raise ValueError('Unknown chemical symbol: ' + symbol)
[docs] def convert(self, fmt: str) -> 'Formula': """Reformat this formula as a new Formula. Same formatting rules as Formula(format=...) keyword. """ return Formula(self._formula, format=fmt)
[docs] def count(self) -> Dict[str, int]: """Return dictionary mapping chemical symbol to number of atoms. Example ------- >>> Formula('H2O').count() {'H': 2, 'O': 1} """ return self._count.copy()
[docs] def reduce(self) -> Tuple['Formula', int]: """Reduce formula. Returns ------- formula: Formula Reduced formula. n: int Number of reduced formula units. Example ------- >>> Formula('2H2O').reduce() (Formula('H2O'), 2) """ dct, N = self._reduce() return self.from_dict(dct), N
[docs] def stoichiometry(self) -> Tuple['Formula', 'Formula', int]: """Reduce to unique stoichiometry using "chemical symbols" A, B, C, ... Examples -------- >>> Formula('CO2').stoichiometry() (Formula('AB2'), Formula('CO2'), 1) >>> Formula('(H2O)4').stoichiometry() (Formula('AB2'), Formula('OH2'), 4) """ count1, N = self._reduce() c = ord('A') count2 = {} count3 = {} for n, symb in sorted((n, symb) for symb, n in count1.items()): count2[chr(c)] = n count3[symb] = n c += 1 return self.from_dict(count2), self.from_dict(count3), N
[docs] def format(self, fmt: str = '') -> str: """Format formula as string. Formats: * ``'hill'``: alphabetically ordered with C and H first * ``'metal'``: alphabetically ordered with metals first * ``'ab2'``: count-ordered first then alphabetically ordered * ``'abc'``: old name for ``'ab2'`` * ``'a2b'``: reverse count-ordered first then alphabetically ordered * ``'periodic'``: periodic-table ordered: period first then group * ``'reduce'``: Reduce and keep order (ABBBC -> AB3C) * ``'latex'``: LaTeX representation * ``'html'``: HTML representation * ``'rest'``: reStructuredText representation Example ------- >>> Formula('H2O').format('html') 'H<sub>2</sub>O' """ return format(self, fmt)
[docs] def __format__(self, fmt: str) -> str: """Format Formula as str. Possible formats: ``'hill'``, ``'metal'``, ``'abc'``, ``'reduce'``, ``'latex'``, ``'html'``, ``'rest'``. Example ------- >>> f = Formula('OH2') >>> '{f}, {f:hill}, {f:latex}'.format(f=f) 'OH2, H2O, OH$_{2}$' """ if fmt == 'hill': count = self.count() count2 = {} for symb in 'CH': if symb in count: count2[symb] = count.pop(symb) for symb, n in sorted(count.items()): count2[symb] = n return dict2str(count2) if fmt == 'metal': count = self.count() result2 = [(s, count.pop(s)) for s in non_metals if s in count] result = [(s, count[s]) for s in sorted(count)] result += sorted(result2) return dict2str(dict(result)) if fmt == 'abc' or fmt == 'ab2': _, f, N = self.stoichiometry() return dict2str({symb: n * N for symb, n in f._count.items()}) if fmt == 'a2b': _, f, N = self.stoichiometry() return dict2str({symb: -n * N for n, symb in sorted([(-n, symb) for symb, n in f._count.items()])}) if fmt == 'periodic': count = self.count() order = periodic_table_order() items = sorted(count.items(), key=lambda item: order.get(item[0], 0)) return ''.join(symb + (str(n) if n > 1 else '') for symb, n in items) if fmt == 'reduce': symbols = list(self) nsymb = len(symbols) parts = [] i1 = 0 for i2, symbol in enumerate(symbols): if i2 == nsymb - 1 or symbol != symbols[i2 + 1]: parts.append(symbol) m = i2 + 1 - i1 if m > 1: parts.append(str(m)) i1 = i2 + 1 return ''.join(parts) if fmt == 'latex': return self._tostr('$_{', '}$') if fmt == 'html': return self._tostr('<sub>', '</sub>') if fmt == 'rest': return self._tostr(r'\ :sub:`', r'`\ ') if fmt == '': return self._formula raise ValueError('Invalid format specifier')
[docs] @staticmethod def from_dict(dct: Dict[str, int]) -> 'Formula': """Convert dict to Formula. >>> Formula.from_dict({'H': 2}) Formula('H2') """ dct2 = {} for symb, n in dct.items(): if not (isinstance(symb, str) and isinstance(n, int) and n >= 0): raise ValueError('Bad dictionary: {dct}'.format(dct=dct)) if n > 0: # filter out n=0 symbols dct2[symb] = n return Formula(dict2str(dct2), _tree=[([(symb, n) for symb, n in dct2.items()], 1)], _count=dct2)
[docs] @staticmethod def from_list(symbols: Sequence[str]) -> 'Formula': """Convert list of chemical symbols to Formula.""" return Formula(''.join(symbols), _tree=[(symbols[:], 1)])
[docs] def __len__(self) -> int: """Number of atoms.""" return sum(self._count.values())
[docs] def __getitem__(self, symb: str) -> int: """Number of atoms with chemical symbol *symb*.""" return self._count.get(symb, 0)
[docs] def __contains__(self, f: Union[str, 'Formula']) -> bool: """Check if formula contains chemical symbols in *f*. Type of *f* must be str or Formula. Examples -------- >>> 'OH' in Formula('H2O') True >>> 'O2' in Formula('H2O') False """ if isinstance(f, str): f = Formula(f) for symb, n in f._count.items(): if self[symb] < n: return False return True
[docs] def __eq__(self, other) -> bool: """Equality check. Note that order is not important. Example ------- >>> Formula('CO') == Formula('OC') True """ if isinstance(other, str): other = Formula(other) elif not isinstance(other, Formula): return False return self._count == other._count
[docs] def __add__(self, other: Union[str, 'Formula']) -> 'Formula': """Add two formulas.""" if not isinstance(other, str): other = other._formula return Formula(self._formula + '+' + other)
def __radd__(self, other: str): # -> Formula return Formula(other) + self
[docs] def __mul__(self, N: int) -> 'Formula': """Repeat formula `N` times.""" if N == 0: return Formula('') return self.from_dict({symb: n * N for symb, n in self._count.items()})
def __rmul__(self, N: int): # -> Formula return self * N
[docs] def __divmod__(self, other: Union['Formula', str]) -> Tuple[int, 'Formula']: """Return the tuple (self // other, self % other). Invariant:: div, mod = divmod(self, other) div * other + mod == self Example ------- >>> divmod(Formula('H2O'), 'H') (2, Formula('O')) """ if isinstance(other, str): other = Formula(other) N = min(self[symb] // n for symb, n in other._count.items()) dct = self.count() if N: for symb, n in other._count.items(): dct[symb] -= n * N if dct[symb] == 0: del dct[symb] return N, self.from_dict(dct)
def __rdivmod__(self, other): return divmod(Formula(other), self) def __mod__(self, other): return divmod(self, other)[1] def __rmod__(self, other): return Formula(other) % self def __floordiv__(self, other): return divmod(self, other)[0] def __rfloordiv__(self, other): return Formula(other) // self def __iter__(self, tree=None): if tree is None: tree = self._tree if isinstance(tree, str): yield tree elif isinstance(tree, tuple): tree, N = tree for _ in range(N): yield from self.__iter__(tree) else: for tree in tree: yield from self.__iter__(tree) def __str__(self): return self._formula def __repr__(self): return 'Formula({!r})'.format(self._formula) def _reduce(self): N = 0 for n in self._count.values(): if N == 0: N = n else: N = gcd(n, N) dct = {symb: n // N for symb, n in self._count.items()} return dct, N def _tostr(self, sub1, sub2): parts = [] for tree, n in self._tree: s = tree2str(tree, sub1, sub2) if s[0] == '(' and s[-1] == ')': s = s[1:-1] if n > 1: s = str(n) + s parts.append(s) return '+'.join(parts)
def dict2str(dct: Dict[str, int]) -> str: """Convert symbol-to-number dict to str. >>> dict2str({'A': 1, 'B': 2}) 'AB2' """ return ''.join(symb + (str(n) if n > 1 else '') for symb, n in dct.items()) def parse(f: str) -> Tree: """Convert formula string to tree structure. >>> parse('2A+BC2') [('A', 2), (['B', ('C', 2)], 1)] """ if not f: return [] parts = f.split('+') result = [] for part in parts: n, f = strip_number(part) result.append((parse2(f), n)) return result def parse2(f: str) -> Tree: """Convert formula string to tree structure (no "+" symbols). >>> parse('10(H2O)') [(([('H', 2), 'O'], 1), 10)] """ units = [] while f: unit: Union[str, Tuple[str, int], Tree] if f[0] == '(': level = 0 for i, c in enumerate(f[1:], 1): if c == '(': level += 1 elif c == ')': if level == 0: break level -= 1 else: raise ValueError f2 = f[1:i] n, f = strip_number(f[i + 1:]) unit = (parse2(f2), n) else: m = re.match('([A-Z][a-z]?)([0-9]*)', f) if m is None: raise ValueError symb = m.group(1) number = m.group(2) if number: unit = (symb, int(number)) else: unit = symb f = f[m.end():] units.append(unit) if len(units) == 1: return unit return units def strip_number(s: str) -> Tuple[int, str]: """Strip leading nuimber. >>> strip_number('10AB2') (10, 'AB2') >>> strip_number('AB2') (1, 'AB2') """ m = re.match('[0-9]*', s) assert m is not None return int(m.group() or 1), s[m.end():] def tree2str(tree: Tree, sub1: str, sub2: str) -> str: """Helper function for html, latex and rest formats.""" if isinstance(tree, str): return tree if isinstance(tree, tuple): tree, N = tree s = tree2str(tree, sub1, sub2) if N == 1: if s[0] == '(' and s[-1] == ')': return s[1:-1] return s return s + sub1 + str(N) + sub2 return '(' + ''.join(tree2str(tree, sub1, sub2) for tree in tree) + ')' def count_tree(tree: Tree) -> Dict[str, int]: if isinstance(tree, str): return {tree: 1} if isinstance(tree, tuple): tree, N = tree return {symb: n * N for symb, n in count_tree(tree).items()} dct = {} # type: Dict[str, int] for tree in tree: for symb, n in count_tree(tree).items(): m = dct.get(symb, 0) dct[symb] = m + n return dct # non metals, half-metals/metalloid, halogen, noble gas: non_metals = ['H', 'He', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Si', 'P', 'S', 'Cl', 'Ar', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Sb', 'Te', 'I', 'Xe', 'Po', 'At', 'Rn'] @lru_cache() def periodic_table_order() -> Dict[str, int]: """Create dict for sorting after period first then row.""" return {symbol: n for n, symbol in enumerate(chemical_symbols[87:] + chemical_symbols[55:87] + chemical_symbols[37:55] + chemical_symbols[19:37] + chemical_symbols[11:19] + chemical_symbols[3:11] + chemical_symbols[1:3])} # Backwards compatibility:
[docs]def formula_hill(numbers, empirical=False): """Convert list of atomic numbers to a chemical formula as a string. Elements are alphabetically ordered with C and H first. If argument `empirical`, element counts will be divided by greatest common divisor to yield an empirical formula""" symbols = [chemical_symbols[Z] for Z in numbers] f = Formula('', _tree=[(symbols, 1)]) if empirical: f, _ = f.reduce() return f.format('hill')
# Backwards compatibility:
[docs]def formula_metal(numbers, empirical=False): """Convert list of atomic numbers to a chemical formula as a string. Elements are alphabetically ordered with metals first. If argument `empirical`, element counts will be divided by greatest common divisor to yield an empirical formula""" symbols = [chemical_symbols[Z] for Z in numbers] f = Formula('', _tree=[(symbols, 1)]) if empirical: f, _ = f.reduce() return f.format('metal')