Source code for ase.io.nwchem.nwreader

import re
from collections import OrderedDict

import numpy as np

from ase import Atoms
from ase.calculators.singlepoint import (
    SinglePointDFTCalculator,
    SinglePointKPoint,
)
from ase.units import Bohr, Hartree

from .parser import _define_pattern

# Note to the reader of this code: Here and below we use the function
# _define_pattern from parser.py in this same directory to compile
# regular expressions. These compiled expressions are stored along with
# an example string that the expression should match in a list that
# is used during tests (test/nwchem/nwchem_parser.py) to ensure that
# the regular expressions are still working correctly.

# Matches the beginning of a GTO calculation
_gauss_block = _define_pattern(
    r'^[\s]+NWChem (?:SCF|DFT) Module\n$',
    "                                 NWChem SCF Module\n",
)


# Matches the beginning of a plane wave calculation
_pw_block = _define_pattern(
    r'^[\s]+\*[\s]+NWPW (?:PSPW|BAND|PAW|Band Structure) Calculation'
    r'[\s]+\*[\s]*\n$',
    "          *               NWPW PSPW Calculation              *\n",
)


# Top-level parser
[docs] def read_nwchem_out(fobj, index=-1): """Splits an NWChem output file into chunks corresponding to individual single point calculations.""" lines = fobj.readlines() if index == slice(-1, None, None): for line in lines: if _gauss_block.match(line): return [parse_gto_chunk(''.join(lines))] if _pw_block.match(line): return [parse_pw_chunk(''.join(lines))] raise ValueError('This does not appear to be a valid NWChem ' 'output file.') # First, find each SCF block group = [] atomslist = [] header = True lastgroup = [] lastparser = None parser = None for line in lines: group.append(line) if _gauss_block.match(line): next_parser = parse_gto_chunk elif _pw_block.match(line): next_parser = parse_pw_chunk else: continue if header: header = False else: atoms = parser(''.join(group)) if atoms is None and parser is lastparser: atoms = parser(''.join(lastgroup + group)) if atoms is not None: atomslist[-1] = atoms lastgroup += group else: atomslist.append(atoms) lastgroup = group lastparser = parser group = [] parser = next_parser if not header: atoms = parser(''.join(group)) if atoms is not None: atomslist.append(atoms) return atomslist[index]
# Matches a geometry block and returns the geometry specification lines _geom = _define_pattern( r'\n[ \t]+Geometry \"[ \t\S]+\" -> \"[ \t\S]*\"[ \t]*\n' r'^[ \t-]+\n' r'(?:^[ \t\S]*\n){3}' r'^[ \t]+No\.[ \t]+Tag[ \t]+Charge[ \t]+X[ \t]+Y[ \t]+Z\n' r'^[ \t-]+\n' r'((?:^(?:[ \t]+[\S]+){6}[ \t]*\n)+)', """\ Geometry "geometry" -> "" ------------------------- Output coordinates in angstroms (scale by 1.889725989 to convert to a.u.) No. Tag Charge X Y Z ---- ---------------- ---------- -------------- -------------- -------------- 1 C 6.0000 0.00000000 0.00000000 0.00000000 2 H 1.0000 0.62911800 0.62911800 0.62911800 3 H 1.0000 -0.62911800 -0.62911800 0.62911800 4 H 1.0000 0.62911800 -0.62911800 -0.62911800 """, re.M) # Unit cell parser _cell_block = _define_pattern(r'^[ \t]+Lattice Parameters[ \t]*\n' r'^(?:[ \t\S]*\n){4}' r'((?:^(?:[ \t]+[\S]+){5}\n){3})', """\ Lattice Parameters ------------------ lattice vectors in angstroms (scale by 1.889725989 to convert to a.u.) a1=< 4.000 0.000 0.000 > a2=< 0.000 5.526 0.000 > a3=< 0.000 0.000 4.596 > a= 4.000 b= 5.526 c= 4.596 alpha= 90.000 beta= 90.000 gamma= 90.000 omega= 101.6 """, re.M) # Parses the geometry and returns the corresponding Atoms object def _parse_geomblock(chunk): geomblocks = _geom.findall(chunk) if not geomblocks: return None geomblock = geomblocks[-1].strip().split('\n') natoms = len(geomblock) symbols = [] pos = np.zeros((natoms, 3)) for i, line in enumerate(geomblock): line = line.strip().split() symbols.append(line[1]) pos[i] = [float(x) for x in line[3:6]] cellblocks = _cell_block.findall(chunk) if cellblocks: cellblock = cellblocks[-1].strip().split('\n') cell = np.zeros((3, 3)) for i, line in enumerate(cellblock): line = line.strip().split() cell[i] = [float(x) for x in line[1:4]] else: cell = None return Atoms(symbols, positions=pos, cell=cell) # GTO-specific parser stuff # Matches gradient block from a GTO calculation _gto_grad = _define_pattern( r'^[ \t]+[\S]+[ \t]+ENERGY GRADIENTS[ \t]*[\n]+' r'^[ \t]+atom[ \t]+coordinates[ \t]+gradient[ \t]*\n' r'^(?:[ \t]+x[ \t]+y[ \t]+z){2}[ \t]*\n' r'((?:^(?:[ \t]+[\S]+){8}\n)+)[ \t]*\n', """\ UHF ENERGY GRADIENTS atom coordinates gradient x y z x y z 1 C 0.293457 -0.293457 0.293457 -0.000083 0.000083 -0.000083 2 H 1.125380 1.355351 1.125380 0.000086 0.000089 0.000086 3 H -1.355351 -1.125380 1.125380 -0.000089 -0.000086 0.000086 4 H 1.125380 -1.125380 -1.355351 0.000086 -0.000086 -0.000089 """, re.M) # Energy parsers for a variety of different GTO calculations _e_gto = OrderedDict() _e_gto['tce'] = _define_pattern( r'^[\s]+[\S]+[\s]+total energy \/ hartree[\s]+' r'=[\s]+([\S]+)[\s]*\n', " CCD total energy / hartree " "= -75.715332545665888\n", re.M, ) _e_gto['ccsd'] = _define_pattern( r'^[\s]+Total CCSD energy:[\s]+([\S]+)[\s]*\n', " Total CCSD energy: -75.716168566598569\n", re.M, ) _e_gto['tddft'] = _define_pattern( r'^[\s]+Excited state energy =[\s]+([\S]+)[\s]*\n', " Excited state energy = -75.130134499965\n", re.M, ) _e_gto['mp2'] = _define_pattern( r'^[\s]+Total MP2 energy[\s]+([\S]+)[\s]*\n', " Total MP2 energy -75.708800087578\n", re.M, ) _e_gto['mf'] = _define_pattern( r'^[\s]+Total (?:DFT|SCF) energy =[\s]+([\S]+)[\s]*\n', " Total SCF energy = -75.585555997789\n", re.M, ) # GTO parser def parse_gto_chunk(chunk): atoms = None forces = None energy = None dipole = None for theory, pattern in _e_gto.items(): matches = pattern.findall(chunk) if matches: energy = float(matches[-1].replace('D', 'E')) * Hartree break gradblocks = _gto_grad.findall(chunk) if gradblocks: gradblock = gradblocks[-1].strip().split('\n') natoms = len(gradblock) symbols = [] pos = np.zeros((natoms, 3)) forces = np.zeros((natoms, 3)) for i, line in enumerate(gradblock): line = line.strip().split() symbols.append(line[1]) pos[i] = [float(x) for x in line[2:5]] forces[i] = [-float(x) for x in line[5:8]] pos *= Bohr forces *= Hartree / Bohr atoms = Atoms(symbols, positions=pos) dipole, _quadrupole = _get_multipole(chunk) kpts = _get_gto_kpts(chunk) if atoms is None: atoms = _parse_geomblock(chunk) if atoms is None: return None # SinglePointDFTCalculator doesn't support quadrupole moment currently calc = SinglePointDFTCalculator(atoms=atoms, energy=energy, free_energy=energy, # XXX Is this right? forces=forces, dipole=dipole, # quadrupole=quadrupole, ) calc.kpts = kpts atoms.calc = calc return atoms # Extracts dipole and quadrupole moment for a GTO calculation # Note on the regex: Some, but not all, versions of NWChem # insert extra spaces in the blank lines. Do not remove the \s* # in between \n and \n _multipole = _define_pattern( r'^[ \t]+Multipole analysis of the density[ \t\S]*\n' r'^[ \t-]+\n\s*\n^[ \t\S]+\n^[ \t-]+\n' r'((?:(?:(?:[ \t]+[\S]+){7,8}\n)|[ \t]*\n){12})', """\ Multipole analysis of the density --------------------------------- L x y z total alpha beta nuclear - - - - ----- ----- ---- ------- 0 0 0 0 -0.000000 -5.000000 -5.000000 10.000000 1 1 0 0 0.000000 0.000000 0.000000 0.000000 1 0 1 0 -0.000001 -0.000017 -0.000017 0.000034 1 0 0 1 -0.902084 -0.559881 -0.559881 0.217679 2 2 0 0 -5.142958 -2.571479 -2.571479 0.000000 2 1 1 0 -0.000000 -0.000000 -0.000000 0.000000 2 1 0 1 0.000000 0.000000 0.000000 0.000000 2 0 2 0 -3.153324 -3.807308 -3.807308 4.461291 2 0 1 1 0.000001 -0.000009 -0.000009 0.000020 2 0 0 2 -4.384288 -3.296205 -3.296205 2.208122 """, re.M) # Parses the dipole and quadrupole moment from a GTO calculation def _get_multipole(chunk): matches = _multipole.findall(chunk) if not matches: return None, None # This pulls the 5th column out of the multipole moments block; # this column contains the actual moments. moments = [float(x.split()[4]) for x in matches[-1].split('\n') if x and not x.isspace()] dipole = np.array(moments[1:4]) * Bohr quadrupole = np.zeros(9) quadrupole[[0, 1, 2, 4, 5, 8]] = [moments[4:]] quadrupole[[3, 6, 7]] = quadrupole[[1, 2, 5]] return dipole, quadrupole.reshape((3, 3)) * Bohr**2 # MO eigenvalue and occupancy parser for GTO calculations _eval_block = _define_pattern( r'^[ \t]+[\S]+ Final (?:Alpha |Beta )?Molecular Orbital Analysis[ \t]*' r'\n^[ \t-]+\n\n' r'(?:^[ \t]+Vector [ \t\S]+\n(?:^[ \t\S]+\n){3}' r'(?:^(?:(?:[ \t]+[\S]+){5}){1,2}[ \t]*\n)+\n)+', """\ ROHF Final Molecular Orbital Analysis ------------------------------------- Vector 1 Occ=2.000000D+00 E=-2.043101D+01 MO Center= 1.1D-20, 1.5D-18, 1.2D-01, r^2= 1.5D-02 Bfn. Coefficient Atom+Function Bfn. Coefficient Atom+Function ----- ------------ --------------- ----- ------------ --------------- 1 0.983233 1 O s Vector 2 Occ=2.000000D+00 E=-1.324439D+00 MO Center= -2.1D-18, -8.6D-17, -7.1D-02, r^2= 5.1D-01 Bfn. Coefficient Atom+Function Bfn. Coefficient Atom+Function ----- ------------ --------------- ----- ------------ --------------- 6 0.708998 1 O s 1 -0.229426 1 O s 2 0.217752 1 O s """, re.M) # noqa: W291 # Parses the eigenvalues and occupations from a GTO calculation def _get_gto_kpts(chunk): eval_blocks = _eval_block.findall(chunk) if not eval_blocks: return [] kpts = [] kpt = _get_gto_evals(eval_blocks[-1]) if kpt.s == 1: kpts.append(_get_gto_evals(eval_blocks[-2])) kpts.append(kpt) return kpts # Extracts MO eigenvalue and occupancy for a GTO calculation _extract_vector = _define_pattern( r'^[ \t]+Vector[ \t]+([\S])+[ \t]+Occ=([\S]+)[ \t]+E=[ \t]*([\S]+)[ \t]*\n', " Vector 1 Occ=2.000000D+00 E=-2.043101D+01\n", re.M, ) # Extracts the eigenvalues and occupations from a GTO calculation def _get_gto_evals(chunk): spin = 1 if re.match(r'[ \t\S]+Beta', chunk) else 0 data = [] for vector in _extract_vector.finditer(chunk): data.append([float(x.replace('D', 'E')) for x in vector.groups()[1:]]) data = np.array(data) occ = data[:, 0] energies = data[:, 1] * Hartree return SinglePointKPoint(1., spin, 0, energies, occ) # Plane wave specific parsing stuff # Matches the gradient block from a plane wave calculation _nwpw_grad = _define_pattern( r'^[ \t]+[=]+[ \t]+Ion Gradients[ \t]+[=]+[ \t]*\n' r'^[ \t]+Ion Forces:[ \t]*\n' r'((?:^(?:[ \t]+[\S]+){7}\n)+)', """\ ============= Ion Gradients ================= Ion Forces: 1 O ( -0.000012 0.000027 -0.005199 ) 2 H ( 0.000047 -0.013082 0.020790 ) 3 H ( 0.000047 0.012863 0.020786 ) C.O.M. ( -0.000000 -0.000000 -0.000000 ) =============================================== """, re.M) # Matches the gradient block from a PAW calculation _paw_grad = _define_pattern( r'^[ \t]+[=]+[ \t]+Ion Gradients[ \t]+[=]+[ \t]*\n' r'^[ \t]+Ion Positions:[ \t]*\n' r'((?:^(?:[ \t]+[\S]+){7}\n)+)' r'^[ \t]+Ion Forces:[ \t]*\n' r'((?:^(?:[ \t]+[\S]+){7}\n)+)', """\ ============= Ion Gradients ================= Ion Positions: 1 O ( -3.77945 -5.22176 -3.77945 ) 2 H ( -3.77945 -3.77945 3.77945 ) 3 H ( -3.77945 3.77945 3.77945 ) Ion Forces: 1 O ( -0.00001 -0.00000 0.00081 ) 2 H ( 0.00005 -0.00026 -0.00322 ) 3 H ( 0.00005 0.00030 -0.00322 ) C.O.M. ( -0.00000 -0.00000 -0.00000 ) =============================================== """, re.M) # Energy parser for plane wave calculations _nwpw_energy = _define_pattern( r'^[\s]+Total (?:PSPW|BAND|PAW) energy' r'[\s]+:[\s]+([\S]+)[\s]*\n', " Total PSPW energy : -0.1709317826E+02\n", re.M, ) # Parser for the fermi energy in a plane wave calculation _fermi_energy = _define_pattern( r'^[ \t]+Fermi energy =[ \t]+([\S]+) \([ \t]+[\S]+[ \t]*\n', " Fermi energy = -0.5585062E-01 ( -1.520eV)\n", re.M, ) # Plane wave parser def parse_pw_chunk(chunk): atoms = _parse_geomblock(chunk) if atoms is None: return None energy = None efermi = None forces = None stress = None matches = _nwpw_energy.findall(chunk) if matches: energy = float(matches[-1].replace('D', 'E')) * Hartree matches = _fermi_energy.findall(chunk) if matches: efermi = float(matches[-1].replace('D', 'E')) * Hartree gradblocks = _nwpw_grad.findall(chunk) if not gradblocks: gradblocks = _paw_grad.findall(chunk) if gradblocks: gradblock = gradblocks[-1].strip().split('\n') natoms = len(gradblock) symbols = [] forces = np.zeros((natoms, 3)) for i, line in enumerate(gradblock): line = line.strip().split() symbols.append(line[1]) forces[i] = [float(x) for x in line[3:6]] forces *= Hartree / Bohr if atoms.cell: stress = _get_stress(chunk, atoms.cell) ibz_kpts, kpts = _get_pw_kpts(chunk) # NWChem does not calculate an energy extrapolated to the 0K limit, # so right now, energy and free_energy will be the same. calc = SinglePointDFTCalculator(atoms=atoms, energy=energy, efermi=efermi, free_energy=energy, forces=forces, stress=stress, ibzkpts=ibz_kpts) calc.kpts = kpts atoms.calc = calc return atoms # Extracts stress tensor from a plane wave calculation _stress = _define_pattern( r'[ \t]+[=]+[ \t]+(?:total gradient|E all FD)[ \t]+[=]+[ \t]*\n' r'^[ \t]+S =((?:(?:[ \t]+[\S]+){5}\n){3})[ \t=]+\n', """\ ============= total gradient ============== S = ( -0.22668 0.27174 0.19134 ) ( 0.23150 -0.26760 0.23226 ) ( 0.19090 0.27206 -0.22700 ) =================================================== """, re.M) # Extract stress tensor from a plane wave calculation def _get_stress(chunk, cell): stress_blocks = _stress.findall(chunk) if not stress_blocks: return None stress_block = stress_blocks[-1] stress = np.zeros((3, 3)) for i, row in enumerate(stress_block.strip().split('\n')): stress[i] = [float(x) for x in row.split()[1:4]] stress = (stress @ cell) * Hartree / Bohr / cell.volume stress = 0.5 * (stress + stress.T) # convert from 3x3 array to Voigt form return stress.ravel()[[0, 4, 8, 5, 2, 1]] # MO/band eigenvalue and occupancy parser for plane wave calculations _nwpw_eval_block = _define_pattern( r'(?:(?:^[ \t]+Brillouin zone point:[ \t]+[\S]+[ \t]*\n' r'(?:[ \t\S]*\n){3,4})?' r'^[ \t]+(?:virtual )?orbital energies:\n' r'(?:^(?:(?:[ \t]+[\S]+){3,4}){1,2}[ \t]*\n)+\n{,3})+', """\ Brillouin zone point: 1 weight= 0.074074 k =< 0.333 0.333 0.333> . <b1,b2,b3> =< 0.307 0.307 0.307> orbital energies: 0.3919370E+00 ( 10.665eV) occ=1.000 0.3908827E+00 ( 10.637eV) occ=1.000 0.4155535E+00 ( 11.308eV) occ=1.000 0.3607689E+00 ( 9.817eV) occ=1.000 0.3827820E+00 ( 10.416eV) occ=1.000 0.3544000E+00 ( 9.644eV) occ=1.000 0.3782641E+00 ( 10.293eV) occ=1.000 0.3531137E+00 ( 9.609eV) occ=1.000 0.3778819E+00 ( 10.283eV) occ=1.000 0.2596367E+00 ( 7.065eV) occ=1.000 0.2820723E+00 ( 7.676eV) occ=1.000 Brillouin zone point: 2 weight= 0.074074 k =< -0.000 0.333 0.333> . <b1,b2,b3> =< 0.614 0.000 0.000> orbital energies: 0.3967132E+00 ( 10.795eV) occ=1.000 0.3920006E+00 ( 10.667eV) occ=1.000 0.4197952E+00 ( 11.423eV) occ=1.000 0.3912442E+00 ( 10.646eV) occ=1.000 0.4125086E+00 ( 11.225eV) occ=1.000 0.3910472E+00 ( 10.641eV) occ=1.000 0.4124238E+00 ( 11.223eV) occ=1.000 0.3153977E+00 ( 8.582eV) occ=1.000 0.3379797E+00 ( 9.197eV) occ=1.000 0.2801606E+00 ( 7.624eV) occ=1.000 0.3052478E+00 ( 8.306eV) occ=1.000 """, re.M) # noqa: E501, W291 # Parser for kpoint weights for a plane wave calculation _kpt_weight = _define_pattern( r'^[ \t]+Brillouin zone point:[ \t]+([\S]+)[ \t]*\n' r'^[ \t]+weight=[ \t]+([\S]+)[ \t]*\n', """\ Brillouin zone point: 1 weight= 0.074074 """, re.M) # noqa: W291 # Parse eigenvalues and occupancies from a plane wave calculation def _get_pw_kpts(chunk): eval_blocks = [] for block in _nwpw_eval_block.findall(chunk): if 'pathlength' not in block: eval_blocks.append(block) if not eval_blocks: return [] if 'virtual' in eval_blocks[-1]: occ_block = eval_blocks[-2] virt_block = eval_blocks[-1] else: occ_block = eval_blocks[-1] virt_block = '' kpts = NWChemKpts() _extract_pw_kpts(occ_block, kpts, 1.) _extract_pw_kpts(virt_block, kpts, 0.) for match in _kpt_weight.finditer(occ_block): index, weight = match.groups() kpts.set_weight(index, float(weight)) return kpts.to_ibz_kpts(), kpts.to_singlepointkpts() # Helper class for keeping track of kpoints and converting to # SinglePointKPoint objects. class NWChemKpts: def __init__(self): self.data = {} self.ibz_kpts = {} self.weights = {} def add_ibz_kpt(self, index, raw_kpt): kpt = np.array([float(x.strip('>')) for x in raw_kpt.split()[1:4]]) self.ibz_kpts[index] = kpt def add_eval(self, index, spin, energy, occ): if index not in self.data: self.data[index] = {} if spin not in self.data[index]: self.data[index][spin] = [] self.data[index][spin].append((energy, occ)) def set_weight(self, index, weight): self.weights[index] = weight def to_ibz_kpts(self): if not self.ibz_kpts: return np.array([[0., 0., 0.]]) sorted_kpts = sorted(list(self.ibz_kpts.items()), key=lambda x: x[0]) return np.array(list(zip(*sorted_kpts))[1]) def to_singlepointkpts(self): kpts = [] for i, (index, spins) in enumerate(self.data.items()): weight = self.weights[index] for spin, (_, data) in enumerate(spins.items()): energies, occs = np.array(sorted(data, key=lambda x: x[0])).T kpts.append(SinglePointKPoint(weight, spin, i, energies, occs)) return kpts # Extracts MO/band data from a pattern matched by _nwpw_eval_block above _kpt = _define_pattern( r'^[ \t]+Brillouin zone point:[ \t]+([\S]+)[ \t]*\n' r'^[ \t]+weight=[ \t]+([\S])+[ \t]*\n' r'^[ \t]+k[ \t]+([ \t\S]+)\n' r'(?:^[ \t\S]*\n){1,2}' r'^[ \t]+(?:virtual )?orbital energies:\n' r'((?:^(?:(?:[ \t]+[\S]+){3,4}){1,2}[ \t]*\n)+)', """\ Brillouin zone point: 1 weight= 0.074074 k =< 0.333 0.333 0.333> . <b1,b2,b3> =< 0.307 0.307 0.307> orbital energies: 0.3919370E+00 ( 10.665eV) occ=1.000 0.3908827E+00 ( 10.637eV) occ=1.000 0.4155535E+00 ( 11.308eV) occ=1.000 0.3607689E+00 ( 9.817eV) occ=1.000 0.3827820E+00 ( 10.416eV) occ=1.000 0.3544000E+00 ( 9.644eV) occ=1.000 0.3782641E+00 ( 10.293eV) occ=1.000 0.3531137E+00 ( 9.609eV) occ=1.000 0.3778819E+00 ( 10.283eV) occ=1.000 0.2596367E+00 ( 7.065eV) occ=1.000 0.2820723E+00 ( 7.676eV) occ=1.000 """, re.M) # noqa: E501, W291 # Extracts kpoints from a plane wave calculation def _extract_pw_kpts(chunk, kpts, default_occ): for match in _kpt.finditer(chunk): point, weight, raw_kpt, orbitals = match.groups() index = int(point) - 1 for line in orbitals.split('\n'): tokens = line.strip().split() if not tokens: continue ntokens = len(tokens) a_e = float(tokens[0]) * Hartree if ntokens % 3 == 0: a_o = default_occ else: a_o = float(tokens[3].split('=')[1]) kpts.add_eval(index, 0, a_e, a_o) if ntokens <= 4: continue if ntokens == 6: b_e = float(tokens[3]) * Hartree b_o = default_occ elif ntokens == 8: b_e = float(tokens[4]) * Hartree b_o = float(tokens[7].split('=')[1]) kpts.add_eval(index, 1, b_e, b_o) kpts.set_weight(index, float(weight)) kpts.add_ibz_kpt(index, raw_kpt)