# Source code for ase.ga.bulk_startgenerator

""" Methods for generating new random starting candidates
for bulk crystal structures. If you find this implementation
M. Van den Bossche, Henrik Gronbeck, B. Hammer,
J. Chem. Theory Comput., doi:10.1021/acs.jctc.8b00039
in addition to the papers mentioned in the docstrings."""
from random import shuffle, random, sample
import numpy as np
from ase import Atoms
from ase.data import atomic_numbers
from ase.build import molecule
from ase.ga.utilities import closest_distances_generator, atoms_too_close

def random_pos(cell):
""" Returns a random position within the box
described by the input box. """
pos = np.zeros(3)
for i in range(3):
pos += random() * cell[i, :]
return pos

[docs]class StartGenerator(object): """ Class for generating random starting candidates of bulk structures. The candidates are generated by: * Randomly creating a cell that satisfies the specified volume. * Adding one building block at a time, respecting the minimal distances. Building blocks can be either single atoms or groups of atoms. Parameters: blocks: list List of building units for the structure, each item as a (A, B) tuple, with A either an Atoms object or string (element symbol or molecule name) and B the number of these repeating units. A few examples: >>> sg = StartGenerator([('Ti', 4), ('O', 8)], ...) >>> sg = StartGenerator([('CO2', 10)], ...) >>> h2 = Atoms('H2', positions=[[0, 0, 0], [0.7, 0, 0]]) >>> sg = StartGenerator([(h2, 12)], ...) blmin: dict or int Dictionary with minimal interatomic distances. If an integer is provided instead, the dictionary will be generated with this fraction of covalent bond lengths. volume: int or float Initial guess for the unit cell volume. cellbounds: ase.ga.bulk_utilities.CellBounds instance Describing limits on the cell shape, see :class:~ase.ga.bulk_utilities.CellBounds. cell: 3x3 matrix or length 3 or 6 vector If you want to keep the cell vectors fixed, the desired cell is to be specified here. splits: dict Splitting scheme to be used, based on: * Lyakhov, Oganov, Valle, Comp. Phys. Comm. 181 (2010) 1623-32__ __ https://doi.org/10.1016/j.cpc.2010.06.007 This should be a dict specifying the relative probabilities for each split, written as tuples. For example, >>> splits = {(2,): 3, (1,): 1} This means that, for each structure, either a splitting factor of 2 is applied to one randomly chosen axis, or a splitting factor of 1 is applied (i.e., no splitting). The probability ratio of the two scenararios will be 3:1, i.e. 75% chance for the former and 25% chance for the latter splitting scheme. To e.g. always apply splitting factors of 2 and 3 along two randomly chosen axes: >>> splits={(2, 3): 1} """ def __init__(self, blocks, blmin, volume, cellbounds=None, cell=None, splits={(1,): 1}): self.volume = volume self.cellbounds = cellbounds self.cell = cell # normalize splitting probabilities: tot = sum([v for v in splits.values()]) self.splits = {k: v * 1. / tot for k, v in splits.items()} # pre-process blocks and blmin: self.blocks = [] numbers = [] for block, count in blocks: if isinstance(block, Atoms): pass elif block in atomic_numbers: block = Atoms(block) else: block = molecule(block) self.blocks.append((block, count)) numbers.extend(block.get_atomic_numbers()) numbers = np.unique(numbers) if type(blmin) == dict: self.blmin = blmin else: self.blmin = closest_distances_generator(numbers, blmin) def get_new_candidate(self, maxiter=None): """ Returns a new candidate. maxiter: upper bound on the total number of times the random position generator is called when generating the new candidate. By default (maxiter=None) no such bound is imposed. If the generator takes too long time to create a new candidate, it may be suitable to specify a finite value. When the bound is exceeded, None is returned. """ pbc = [True] * 3 blmin = self.blmin # generating the cell # cell splitting: # choose factors according to the probabilities r = random() cumprob = 0 for split, prob in self.splits.items(): cumprob += prob if cumprob > r: break directions = sample(range(3), len(split)) repeat = [1, 1, 1] for i, d in enumerate(directions): repeat[d] = split[i] repeat = tuple(repeat) nparts = np.product(repeat) target_volume = self.volume / nparts # Randomly create a cell; without loss of generality, # a lower triangular form can be used, with tilt factors # within certain bounds. # For a cell to be valid, the full cell has to satisfy # the cellbounds constraints. Additionally, the length of # each subcell vector has to be greater than the largest # (X,X)-minimal-interatomic-distance in blmin. if self.cell is not None: full_cell = np.copy(self.cell) cell = (full_cell.T / repeat).T valid = True else: valid = False while not valid: blminmax = max([blmin[k] for k in blmin if k[0] == k[1]]) cell = np.zeros((3, 3)) l = target_volume**0.33 cell[0, 0] = random() * l cell[1, 0] = (random() - 0.5) * cell[0, 0] cell[1, 1] = random() * l cell[2, 0] = (random() - 0.5) * cell[0, 0] cell[2, 1] = (random() - 0.5) * cell[1, 1] cell[2, 2] = random() * l volume = abs(np.linalg.det(cell)) cell *= (target_volume / volume)**0.33 full_cell = (repeat * cell.T).T valid = True if self.cellbounds is not None: if not self.cellbounds.is_within_bounds(full_cell): valid = False for i in range(3): if np.linalg.norm(cell[i, :]) < blminmax: valid = False # generating the atomic positions blocks = [] surplus = [] indices = [] for i, (block, count) in enumerate(self.blocks): count_part = int(np.ceil(count * 1. / nparts)) surplus.append(nparts * count_part - count) blocks.extend([block] * count_part) indices.extend([i] * count_part) N_blocks = len(blocks) # The ordering is shuffled so different blocks # are added in random order. order = list(range(N_blocks)) shuffle(order) blocks = [blocks[i] for i in order] indices = np.array(indices)[order] # Runs until we have found a valid candidate. cand = Atoms('', cell=cell, pbc=pbc) niter = 0 for i in range(N_blocks): atoms = blocks[i].copy() atoms.set_tags(i) rotate = len(atoms) > 1 # Make each new position one at a time. while maxiter is None or niter < maxiter: cop = atoms.get_positions().mean(axis=0) pos = random_pos(cell) atoms.translate(pos - cop) if rotate: phi, theta, psi = 360 * np.random.random(3) atoms.euler_rotate(phi=phi, theta=0.5 * theta, psi=psi, center=pos) # add if it fits: attempt = cand + atoms attempt.wrap() too_close = atoms_too_close(attempt, blmin, use_tags=True) if not too_close: cand += atoms break niter += 1 else: # Reached upper bound on iteration count cand = None break if cand is None: return None # rebuild the candidate after repeating, # randomly deleting surplus blocks and # sorting back to the original order tags = cand.get_tags() nrep = int(np.prod(repeat)) cand_full = cand.repeat(repeat) tags_full = cand_full.get_tags() for i in range(nrep): tags_full[len(cand) * i:len(cand) * (i + 1)] += i * N_blocks cand_full.set_tags(tags_full) cand = Atoms('', cell=full_cell, pbc=pbc) indices_full = np.tile(indices, nrep) tag_counter = 0 for i, (block, count) in enumerate(self.blocks): tags = np.where(indices_full == i)[0] bad = np.random.choice(tags, size=surplus[i], replace=False) for tag in tags: if tag not in bad: select = [a.index for a in cand_full if a.tag == tag] atoms = cand_full[select] # is indeed a copy! atoms.set_tags(tag_counter) assert len(atoms) == len(block) cand += atoms tag_counter += 1 return cand