Source code for ase.io.castep.castep_reader

import io
import re
import warnings
from collections import defaultdict
from typing import Any, Dict, List, Optional

import numpy as np

from ase import Atoms, units
from ase.calculators.singlepoint import SinglePointCalculator
from ase.constraints import FixAtoms, FixCartesian, FixConstraint
from ase.io import ParseError
from ase.utils import reader, string2index


[docs] @reader def read_castep_castep(fd, index=-1): """Read a .castep file and returns an Atoms object. The calculator information will be stored in the calc attribute. Notes ----- This routine will return an atom ordering as found within the castep file. This means that the species will be ordered by ascending atomic numbers. The atoms witin a species are ordered as given in the original cell file. """ # look for last result, if several CASTEP run are appended line_start, line_end, end_found = _find_last_record(fd) if not end_found: raise ParseError(f'No regular end found in {fd.name} file.') # These variables are finally assigned to `SinglePointCalculator` # for backward compatibility with the `Castep` calculator. cut_off_energy = None kpoints = None total_time = None peak_memory = None # jump back to the beginning to the last record fd.seek(0) for i, line in enumerate(fd): if i == line_start: break # read header parameters_header = _read_header(fd) if 'cut_off_energy' in parameters_header: cut_off_energy = parameters_header['cut_off_energy'] if 'basis_precision' in parameters_header: del parameters_header['cut_off_energy'] # avoid conflict markers_new_iteration = [ 'BFGS: starting iteration', 'BFGS: improving iteration', 'Starting MD iteration', ] images = [] results = {} constraints = [] species_pot = [] castep_warnings = [] for i, line in enumerate(fd): if i > line_end: break if 'Number of kpoints used' in line: kpoints = int(line.split('=')[-1].strip()) elif 'Unit Cell' in line: lattice_real = _read_unit_cell(fd) elif 'Cell Contents' in line: for line in fd: if 'Total number of ions in cell' in line: n_atoms = int(line.split()[7]) if 'Total number of species in cell' in line: int(line.split()[7]) fields = line.split() if len(fields) == 0: break elif 'Fractional coordinates of atoms' in line: species, custom_species, positions_frac = \ _read_fractional_coordinates(fd, n_atoms) elif 'Files used for pseudopotentials' in line: for line in fd: line = fd.readline() if 'Pseudopotential generated on-the-fly' in line: continue fields = line.split() if len(fields) == 2: species_pot.append(fields) else: break elif 'k-Points For BZ Sampling' in line: # TODO: generalize for non-Monkhorst Pack case # (i.e. kpoint lists) - # kpoints_offset cannot be read this way and # is hence always set to None for line in fd: if not line.strip(): break if 'MP grid size for SCF calculation' in line: # kpoints = ' '.join(line.split()[-3:]) # self.kpoints_mp_grid = kpoints # self.kpoints_mp_offset = '0. 0. 0.' # not set here anymore because otherwise # two calculator objects go out of sync # after each calculation triggering unnecessary # recalculation break elif 'Final energy' in line: key = 'energy_without_dispersion_correction' results[key] = float(line.split()[-2]) elif 'Final free energy' in line: key = 'free_energy_without_dispersion_correction' results[key] = float(line.split()[-2]) elif 'NB est. 0K energy' in line: key = 'energy_zero_without_dispersion_correction' results[key] = float(line.split()[-2]) # Add support for dispersion correction # filtering due to SEDC is done in get_potential_energy elif 'Dispersion corrected final energy' in line: key = 'energy_with_dispersion_correlation' results[key] = float(line.split()[-2]) elif 'Dispersion corrected final free energy' in line: key = 'free_energy_with_dispersion_correlation' results[key] = float(line.split()[-2]) elif 'NB dispersion corrected est. 0K energy' in line: key = 'energy_zero_with_dispersion_correlation' results[key] = float(line.split()[-2]) # check if we had a finite basis set correction elif 'Total energy corrected for finite basis set' in line: key = 'energy_with_finite_basis_set_correction' results[key] = float(line.split()[-2]) # ******************** Forces ********************* # ************** Symmetrised Forces *************** # ******************** Constrained Forces ******************** # ******************* Unconstrained Forces ******************* elif re.search(r'\**.* Forces \**', line): forces, constraints = _read_forces(fd, n_atoms) results['forces'] = np.array(forces) # ***************** Stress Tensor ***************** # *********** Symmetrised Stress Tensor *********** elif re.search(r'\**.* Stress Tensor \**', line): results.update(_read_stress(fd)) elif any(_ in line for _ in markers_new_iteration): _add_atoms( images, lattice_real, species, custom_species, positions_frac, constraints, results, ) # reset for the next step lattice_real = None species = None positions_frac = None results = {} constraints = [] # extract info from the Mulliken analysis elif 'Atomic Populations' in line: results.update(_read_mulliken_charges(fd)) # extract detailed Hirshfeld analysis (iprint > 1) elif 'Hirshfeld total electronic charge (e)' in line: results.update(_read_hirshfeld_details(fd, n_atoms)) elif 'Hirshfeld Analysis' in line: results.update(_read_hirshfeld_charges(fd)) # There is actually no good reason to get out of the loop # already at this point... or do I miss something? # elif 'BFGS: Final Configuration:' in line: # break elif 'warn' in line.lower(): castep_warnings.append(line) # fetch some last info elif 'Total time' in line: pattern = r'.*=\s*([\d\.]+) s' total_time = float(re.search(pattern, line).group(1)) elif 'Peak Memory Use' in line: pattern = r'.*=\s*([\d]+) kB' peak_memory = int(re.search(pattern, line).group(1)) # add the last image _add_atoms( images, lattice_real, species, custom_species, positions_frac, constraints, results, ) for atoms in images: # these variables are temporarily assigned to `SinglePointCalculator` # to be assigned to the `Castep` calculator for backward compatibility atoms.calc._cut_off_energy = cut_off_energy atoms.calc._kpoints = kpoints atoms.calc._species_pot = species_pot atoms.calc._total_time = total_time atoms.calc._peak_memory = peak_memory atoms.calc._parameters_header = parameters_header if castep_warnings: warnings.warn('WARNING: .castep file contains warnings') for warning in castep_warnings: warnings.warn(warning) if isinstance(index, str): index = string2index(index) return images[index]
def _find_last_record(fd): """Find the last record of the .castep file. Returns ------- start : int Line number of the first line of the last record. end : int Line number of the last line of the last record. end_found : bool True if the .castep file ends as expected. """ start = -1 for i, line in enumerate(fd): if (('Welcome' in line or 'Materials Studio' in line) and 'CASTEP' in line): start = i if start < 0: warnings.warn( f'Could not find CASTEP label in result file: {fd.name}.' ' Are you sure this is a .castep file?' ) return None # search for regular end of file end_found = False # start to search from record beginning from the back # and see if end = -1 fd.seek(0) for i, line in enumerate(fd): if i < start: continue if 'Finalisation time =' in line: end_found = True end = i break return (start, end, end_found) def _read_header(out: io.TextIOBase): """Read the header blocks from a .castep file. Returns ------- parameters : dict Dictionary storing keys and values of a .param file. """ def _parse_on_off(_: str): return {'on': True, 'off': False}[_] read_title = False parameters: Dict[str, Any] = {} for line in out: if len(line) == 0: # end of file break if re.search(r'^\s*\*+$', line) and read_title: # end of header break if re.search(r'\**.* Title \**', line): read_title = True # General Parameters elif 'output verbosity' in line: parameters['iprint'] = int(line.split()[-1][1]) elif re.match(r'\stype of calculation\s*:', line): parameters['task'] = { 'single point energy': 'SinglePoint', 'geometry optimization': 'GeometryOptimization', 'band structure': 'BandStructure', 'molecular dynamics': 'MolecularDynamics', 'optical properties': 'Optics', 'phonon calculation': 'Phonon', 'E-field calculation': 'Efield', 'Phonon followed by E-field': 'Phonon+Efield', 'transition state search': 'TransitionStateSearch', 'Magnetic Resonance': 'MagRes', 'Core level spectra': 'Elnes', 'Electronic Spectroscopy': 'ElectronicSpectroscopy', }[line.split(':')[-1].strip()] elif 'stress calculation' in line: parameters['calculate_stress'] = _parse_on_off(line.split()[-1]) elif 'calculation limited to maximum' in line: parameters['run_time'] = float(line.split()[-2]) elif re.match(r'\soptimization strategy\s*:', line): parameters['opt_strategy'] = { 'maximize speed(+++)': 'Speed', 'minimize memory(---)': 'Memory', 'balance speed and memory': 'Default', }[line.split(':')[-1].strip()] # Exchange-Correlation Parameters elif re.match(r'\susing functional\s*:', line): parameters['xc_functional'] = { 'Local Density Approximation': 'LDA', 'Perdew Wang (1991)': 'PW91', 'Perdew Burke Ernzerhof': 'PBE', 'revised Perdew Burke Ernzerhof': 'RPBE', 'PBE with Wu-Cohen exchange': 'WC', 'PBE for solids (2008)': 'PBESOL', 'Hartree-Fock': 'HF', 'Hartree-Fock +': 'HF-LDA', 'Screened Hartree-Fock': 'sX', 'Screened Hartree-Fock + ': 'sX-LDA', 'hybrid PBE0': 'PBE0', 'hybrid B3LYP': 'B3LYP', 'hybrid HSE03': 'HSE03', 'hybrid HSE06': 'HSE06', 'RSCAN': 'RSCAN', }[line.split(':')[-1].strip()] elif 'DFT+D: Semi-empirical dispersion correction' in line: parameters['sedc_apply'] = _parse_on_off(line.split()[-1]) elif 'SEDC with' in line: parameters['sedc_scheme'] = { 'OBS correction scheme': 'OBS', 'G06 correction scheme': 'G06', 'D3 correction scheme': 'D3', 'D3(BJ) correction scheme': 'D3-BJ', 'D4 correction scheme': 'D4', 'JCHS correction scheme': 'JCHS', 'TS correction scheme': 'TS', 'TSsurf correction scheme': 'TSSURF', 'TS+SCS correction scheme': 'TSSCS', 'aperiodic TS+SCS correction scheme': 'ATSSCS', 'aperiodic MBD@SCS method': 'AMBD', 'MBD@SCS method': 'MBD', 'aperiodic MBD@rsSCS method': 'AMBD*', 'MBD@rsSCS method': 'MBD*', 'XDM correction scheme': 'XDM', }[line.split(':')[-1].strip()] # Basis Set Parameters elif 'basis set accuracy' in line: parameters['basis_precision'] = line.split()[-1] elif 'plane wave basis set cut-off' in line: parameters['cut_off_energy'] = float(line.split()[-2]) elif re.match(r'\sfinite basis set correction\s*:', line): parameters['finite_basis_corr'] = { 'none': 0, 'manual': 1, 'automatic': 2, }[line.split()[-1]] # Electronic Parameters elif 'treating system as spin-polarized' in line: parameters['spin_polarized'] = True # Electronic Minimization Parameters elif 'Treating system as non-metallic' in line: parameters['fix_occupancy'] = True elif 'total energy / atom convergence tol.' in line: parameters['elec_energy_tol'] = float(line.split()[-2]) elif 'convergence tolerance window' in line: parameters['elec_convergence_win'] = int(line.split()[-2]) elif 'max. number of SCF cycles:' in line: parameters['max_scf_cycles'] = float(line.split()[-1]) elif 'dump wavefunctions every' in line: parameters['num_dump_cycles'] = float(line.split()[-3]) # Density Mixing Parameters elif 'density-mixing scheme' in line: parameters['mixing_scheme'] = line.split()[-1] return parameters def _read_unit_cell(out: io.TextIOBase): """Read a Unit Cell block from a .castep file.""" for line in out: fields = line.split() if len(fields) == 6: break lattice_real = [] for _ in range(3): lattice_real.append([float(f) for f in fields[0:3]]) line = out.readline() fields = line.split() return np.array(lattice_real) def _read_forces(out: io.TextIOBase, n_atoms: int): """Read a block for atomic forces from a .castep file.""" constraints: List[FixConstraint] = [] forces = [] for line in out: fields = line.split() if len(fields) == 7: break for n in range(n_atoms): consd = np.array([0, 0, 0]) fxyz = [0.0, 0.0, 0.0] for i, force_component in enumerate(fields[-4:-1]): if force_component.count("(cons'd)") > 0: consd[i] = 1 # remove constraint labels in force components fxyz[i] = float(force_component.replace("(cons'd)", '')) if consd.all(): constraints.append(FixAtoms(n)) elif consd.any(): constraints.append(FixCartesian(n, consd)) forces.append(fxyz) line = out.readline() fields = line.split() return forces, constraints def _read_fractional_coordinates(out: io.TextIOBase, n_atoms: int): """Read fractional coordinates from a .castep file.""" species: List[str] = [] custom_species: Optional[List[str]] = None # A CASTEP special thing positions_frac: List[List[float]] = [] for line in out: fields = line.split() if len(fields) == 7: break for _ in range(n_atoms): spec_custom = fields[1].split(':', 1) elem = spec_custom[0] if len(spec_custom) > 1 and custom_species is None: # Add it to the custom info! custom_species = list(species) species.append(elem) if custom_species is not None: custom_species.append(fields[1]) positions_frac.append([float(s) for s in fields[3:6]]) line = out.readline() fields = line.split() return species, custom_species, positions_frac def _read_stress(out: io.TextIOBase): """Read a block for the stress tensor from a .castep file.""" for line in out: fields = line.split() if len(fields) == 6: break results = {} stress = [] for _ in range(3): stress.append([float(s) for s in fields[2:5]]) line = out.readline() fields = line.split() # stress in .castep file is given in GPa results['stress'] = np.array(stress) * units.GPa results['stress'] = results['stress'].reshape(9)[[0, 4, 8, 5, 2, 1]] line = out.readline() if "Pressure:" in line: results['pressure'] = float( line.split()[-2]) * units.GPa # type: ignore[assignment] return results def _add_atoms( images, lattice_real, species, custom_species, positions_frac, constraints, results, ): # If all the lattice parameters are fixed, # the `Unit Cell` block in the .castep file is not printed # in every ionic step. # The lattice paramters are therefore taken from the last step. # This happens: # - `GeometryOptimization`: `FIX_ALL_CELL : TRUE` # - `MolecularDynamics`: `MD_ENSEMBLE : NVE or NVT` if lattice_real is None: lattice_real = images[-1].cell.copy() # for highly symmetric systems (where essentially only the # stress is optimized, but the atomic positions) positions # are only printed once. if species is None: species = images[-1].symbols if positions_frac is None: positions_frac = images[-1].get_scaled_positions() _set_energy_and_free_energy(results) atoms = Atoms( species, cell=lattice_real, constraint=constraints, pbc=True, scaled_positions=positions_frac, ) if custom_species is not None: atoms.new_array( 'castep_custom_species', np.array(custom_species), ) atoms.calc = SinglePointCalculator(atoms) atoms.calc.results = results images.append(atoms) def _read_mulliken_charges(out: io.TextIOBase): """Read a block for Mulliken charges from a .castep file.""" for i in range(3): line = out.readline() if i == 1: spin_polarized = 'Spin' in line results = defaultdict(list) for line in out: fields = line.split() if len(fields) == 1: break if spin_polarized: if len(fields) != 7: # due to CASTEP 18 outformat changes results['charges'].append(float(fields[-2])) results['magmoms'].append(float(fields[-1])) else: results['charges'].append(float(fields[-1])) return {k: np.array(v) for k, v in results.items()} def _read_hirshfeld_details(out: io.TextIOBase, n_atoms: int): """Read the Hirshfeld analysis when iprint > 1 from a .castep file.""" results = defaultdict(list) for _ in range(n_atoms): for line in out: if line.strip() == '': break # end for each atom if 'Hirshfeld / free atomic volume :' in line: line = out.readline() fields = line.split() results['hirshfeld_volume_ratios'].append(float(fields[0])) return {k: np.array(v) for k, v in results.items()} def _read_hirshfeld_charges(out: io.TextIOBase): """Read a block for Hirshfeld charges from a .castep file.""" for i in range(3): line = out.readline() if i == 1: spin_polarized = 'Spin' in line results = defaultdict(list) for line in out: fields = line.split() if len(fields) == 1: break if spin_polarized: results['hirshfeld_charges'].append(float(fields[-2])) results['hirshfeld_magmoms'].append(float(fields[-1])) else: results['hirshfeld_charges'].append(float(fields[-1])) return {k: np.array(v) for k, v in results.items()} def _set_energy_and_free_energy(results: Dict[str, Any]): """Set values referred to as `energy` and `free_energy`.""" if 'energy_with_dispersion_correction' in results: suffix = '_with_dispersion_correction' else: suffix = '_without_dispersion_correction' if 'free_energy' + suffix in results: # metallic keye = 'energy_zero' + suffix keyf = 'free_energy' + suffix else: # non-metallic # The finite basis set correction is applied to the energy at finite T # (not the energy at 0 K). We should hence refer to the corrected value # as `energy` only when the free energy is unavailable, i.e., only when # FIX_OCCUPANCY : TRUE and thus no smearing is applied. if 'energy_with_finite_basis_set_correction' in results: keye = 'energy_with_finite_basis_set_correction' else: keye = 'energy' + suffix keyf = 'energy' + suffix results['energy'] = results[keye] results['free_energy'] = results[keyf]