Source code for gpaw.wavefunctions.lcao

import numpy as np
from ase.units import Bohr
from ase.utils.timing import timer

# from gpaw import debug
from gpaw.directmin.etdm_lcao import LCAOETDM
from gpaw.directmin.tools import loewdin_lcao, gramschmidt_lcao
from gpaw.lcao.atomic_correction import (DenseAtomicCorrection,
                                         SparseAtomicCorrection)
# from gpaw.lcao.overlap import NewTwoCenterIntegrals as NewTCI
from gpaw.lcao.tci import TCIExpansions
from gpaw.lfc import BasisFunctions
from gpaw.utilities import unpack_hermitian
from gpaw.utilities.blas import mmm, gemmdot
from gpaw.utilities.tools import tri2full
from gpaw.wavefunctions.base import WaveFunctions
from gpaw.wavefunctions.mode import Mode


class LCAO(Mode):
    name = 'lcao'

    def __init__(self, atomic_correction=None, interpolation=3,
                 force_complex_dtype=False):
        self.atomic_correction = atomic_correction
        self.interpolation = interpolation
        Mode.__init__(self, force_complex_dtype)

    def __call__(self, *args, **kwargs):
        return LCAOWaveFunctions(*args,
                                 atomic_correction=self.atomic_correction,
                                 **kwargs)

    def __repr__(self):
        return f'LCAO({self.todict()})'

    def todict(self):
        dct = Mode.todict(self)
        dct['interpolation'] = self.interpolation
        return dct


def update_phases(C_unM, q_u, ibzk_qc, spos_ac, oldspos_ac, setups, Mstart):
    """Complex-rotate coefficients compensating discontinuous phase shift.

    This changes the coefficients to counteract the phase discontinuity
    of overlaps when atoms move across a cell boundary."""

    # We don't want to apply any phase shift unless we crossed a cell
    # boundary.  So we round the shift to either 0 or 1.
    #
    # Example: spos_ac goes from 0.01 to 0.99 -- this rounds to 1 and
    # we apply the phase.  If someone moves an atom by half a cell
    # without crossing a boundary, then we are out of luck.  But they
    # should have reinitialized from LCAO anyway.
    phase_qa = np.exp(2j * np.pi *
                      np.dot(ibzk_qc, (spos_ac - oldspos_ac).T.round()))

    for q, C_nM in zip(q_u, C_unM):
        if C_nM is None:
            continue
        for a in range(len(spos_ac)):
            M1 = setups.M_a[a] - Mstart
            M2 = M1 + setups[a].nao
            M1 = max(0, M1)
            C_nM[:, M1:M2] *= phase_qa[q, a]  # (may truncate M2)


# replace by class to make data structure perhaps a bit less confusing
def get_r_and_offsets(nl, spos_ac, cell_cv):
    r_and_offset_aao = {}

    def add(a1, a2, R_c, offset):
        if (a1, a2) not in r_and_offset_aao:
            r_and_offset_aao[(a1, a2)] = []
        r_and_offset_aao[(a1, a2)].append((R_c, offset))

    for a1, spos1_c in enumerate(spos_ac):
        a2_a, offsets = nl.get_neighbors(a1)
        for a2, offset in zip(a2_a, offsets):
            spos2_c = spos_ac[a2] + offset

            R_c = np.dot(spos2_c - spos1_c, cell_cv)
            add(a1, a2, R_c, offset)
            if a1 != a2 or offset.any():
                add(a2, a1, -R_c, -offset)

    return r_and_offset_aao


[docs]class LCAOWaveFunctions(WaveFunctions): mode = 'lcao' def __init__(self, ksl, gd, nvalence, setups, bd, dtype, world, kd, kptband_comm, timer, atomic_correction=None, collinear=True): WaveFunctions.__init__(self, gd, nvalence, setups, bd, dtype, collinear, world, kd, kptband_comm, timer) self.ksl = ksl self.S_qMM = None self.T_qMM = None self.P_aqMi = None self.debug_tci = False if atomic_correction is None: atomic_correction = 'sparse' if ksl.using_blacs else 'dense' if atomic_correction == 'sparse': self.atomic_correction_cls = SparseAtomicCorrection else: assert atomic_correction == 'dense' self.atomic_correction_cls = DenseAtomicCorrection # self.tci = NewTCI(gd.cell_cv, gd.pbc_c, setups, kd.ibzk_qc, kd.gamma) with self.timer('TCI: Evaluate splines'): self.tciexpansions = TCIExpansions.new_from_setups(setups) self.basis_functions = BasisFunctions(gd, [setup.basis_functions_J for setup in setups], kd, dtype=dtype, cut=True) self.coefficients_read_from_file = False self.set_orthonormalized(False) def set_orthonormalized(self, flag): self.orthonormalized = flag @timer('Orthonormalize') def orthonormalize(self, kpt=None, type='gramschmidt'): assert type == 'gramschmidt' or type == 'loewdin' if kpt is None: for kpt in self.kpt_u: self.orthonormalize(kpt) self.orthonormalized = True return if type == 'loewdin': kpt.C_nM[:] = loewdin_lcao(kpt.C_nM, kpt.S_MM.conj()) elif type == 'gramschmidt': kpt.C_nM[:] = gramschmidt_lcao(kpt.C_nM, kpt.S_MM.conj()) def empty(self, n=(), global_array=False, realspace=False): if realspace: return self.gd.empty(n, self.dtype, global_array) else: if isinstance(n, int): n = (n,) nao = self.setups.nao return np.empty(n + (nao,), self.dtype) def __str__(self): s = 'Wave functions: LCAO\n' s += ' Diagonalizer: %s\n' % self.ksl.get_description() s += (' Atomic Correction: %s\n' % self.atomic_correction_cls.description) s += ' Data-type: %s\n' % self.dtype.__name__ return s def set_eigensolver(self, eigensolver): WaveFunctions.set_eigensolver(self, eigensolver) if eigensolver: if isinstance(eigensolver, LCAOETDM): eigensolver.initialize(self.gd, self.dtype, self.bd.nbands, self.kd.nibzkpts, self.setups.nao, self.ksl.using_blacs, self.bd.comm.size, self.kpt_u) else: eigensolver.initialize(self.gd, self.dtype, self.setups.nao, self.ksl) def set_positions(self, spos_ac, atom_partition=None, move_wfs=False): oldspos_ac = self.spos_ac with self.timer('Basic WFS set positions'): WaveFunctions.set_positions(self, spos_ac, atom_partition) with self.timer('Basis functions set positions'): self.basis_functions.set_positions(spos_ac) if self.ksl is not None: self.basis_functions.set_matrix_distribution(self.ksl.Mstart, self.ksl.Mstop) Mstop = self.ksl.Mstop Mstart = self.ksl.Mstart # if self.ksl.using_blacs: # XXX # S and T have been distributed to a layout with blacs, so # discard them to force reallocation from scratch. # # TODO: evaluate S and T when they *are* distributed, thus saving # memory and avoiding this problem for kpt in self.kpt_u: kpt.S_MM = None kpt.T_MM = None # Free memory in case of old matrices: self.S_qMM = self.T_qMM = self.P_aqMi = None if self.dtype == complex and oldspos_ac is not None: update_phases([kpt.C_nM for kpt in self.kpt_u], [kpt.q for kpt in self.kpt_u], self.kd.ibzk_qc, spos_ac, oldspos_ac, self.setups, Mstart) self.timer.start('mktci') manytci = self.tciexpansions.get_manytci_calculator( self.setups, self.gd, spos_ac, self.kd.ibzk_qc, self.dtype, self.timer) self.timer.stop('mktci') self.manytci = manytci self.newtci = manytci.tci my_atom_indices = self.basis_functions.my_atom_indices self.timer.start('ST tci') newS_qMM, newT_qMM = manytci.O_qMM_T_qMM(self.gd.comm, Mstart, Mstop, self.ksl.using_blacs) self.timer.stop('ST tci') self.timer.start('P tci') P_qIM = manytci.P_qIM(my_atom_indices) self.timer.stop('P tci') self.P_aqMi = manytci.P_aqMi(my_atom_indices) self.P_qIM = P_qIM # XXX atomic correction self.atomic_correction = self.atomic_correction_cls.new_from_wfs(self) # TODO # OK complex/conj, periodic images # OK scalapack # derivatives/forces # sparse # use symmetry/conj tricks to reduce calculations # enable caching of spherical harmonics self.atomic_correction.add_overlap_correction(newS_qMM) self.allocate_arrays_for_projections(my_atom_indices) newS_qMM = self.ksl.distribute_overlap_matrix(newS_qMM, root=-1) newT_qMM = self.ksl.distribute_overlap_matrix(newT_qMM, root=-1) self.positions_set = True for kpt in self.kpt_u: q = kpt.q kpt.S_MM = newS_qMM[q] kpt.T_MM = newT_qMM[q] self.S_qMM = newS_qMM self.T_qMM = newT_qMM # Elpa wants to reuse the decomposed form of S_qMM. # We need to keep track of the existence of that object here, # since this is where we change S_qMM. Hence, expect this to # become arrays after the first diagonalization: self.decomposed_S_qMM = [None] * len(self.S_qMM) self.set_orthonormalized(False) def initialize(self, density, hamiltonian, spos_ac): # Note: The above line exists also in set_positions. # This is guaranteed to be correct, but we can probably remove one. # Of course no human can understand the initialization process, # so this will be some other day. self.timer.start('LCAO WFS Initialize') if density.nt_sG is None: if self.kpt_u[0].f_n is None or self.kpt_u[0].C_nM is None: density.initialize_from_atomic_densities(self.basis_functions) else: # We have the info we need for a density matrix, so initialize # from that instead of from scratch. This will be the case # after set_positions() during a relaxation density.initialize_from_wavefunctions(self) # Initialize GLLB-potential from basis function orbitals if hamiltonian.xc.type == 'GLLB': hamiltonian.xc.initialize_from_atomic_orbitals( self.basis_functions) else: # After a restart, nt_sg doesn't exist yet, so we'll have to # make sure it does. Of course, this should have been taken care # of already by this time, so we should improve the code elsewhere density.calculate_normalized_charges_and_mix() hamiltonian.update(density) self.timer.stop('LCAO WFS Initialize') return 0, 0
[docs] def initialize_wave_functions_from_lcao(self): """Fill the calc.wfs.kpt_[u].psit_nG arrays with useful data. Normally psit_nG is NOT used in lcao mode, but some extensions (like ase.dft.wannier) want to have it. This code is adapted from fd.py / initialize_from_lcao_coefficients() and fills psit_nG with data constructed from the current lcao coefficients (kpt.C_nM). (This may or may not work in band-parallel case!) """ from gpaw.wavefunctions.arrays import UniformGridWaveFunctions bfs = self.basis_functions for kpt in self.kpt_u: kpt.psit = UniformGridWaveFunctions( self.bd.nbands, self.gd, self.dtype, kpt=kpt.q, dist=None, spin=kpt.s, collinear=True) kpt.psit_nG[:] = 0.0 bfs.lcao_to_grid(kpt.C_nM, kpt.psit_nG[:self.bd.mynbands], kpt.q)
[docs] def initialize_wave_functions_from_restart_file(self): """Dummy function to ensure compatibility to fd mode""" self.initialize_wave_functions_from_lcao()
def add_orbital_density(self, nt_G, kpt, n): rank, q = self.kd.get_rank_and_index(kpt.k) u = q * self.nspins + kpt.s assert rank == self.kd.comm.rank assert self.kpt_u[u] is kpt psit_G = self._get_wave_function_array(u, n, realspace=True) self.add_realspace_orbital_to_density(nt_G, psit_G) def calculate_density_matrix(self, f_n, C_nM, rho_MM=None): self.timer.start('Calculate density matrix') rho_MM = self.ksl.calculate_density_matrix(f_n, C_nM, rho_MM) self.timer.stop('Calculate density matrix') return rho_MM if 1: # XXX Should not conjugate, but call gemm(..., 'c') # Although that requires knowing C_Mn and not C_nM. # that also conforms better to the usual conventions in literature Cf_Mn = C_nM.T.conj() * f_n self.timer.start('gemm') mmm(1.0, Cf_Mn, 'N', C_nM, 'N', 0.0, rho_MM) self.timer.stop('gemm') self.timer.start('band comm sum') self.bd.comm.sum(rho_MM) self.timer.stop('band comm sum') else: # Alternative suggestion. Might be faster. Someone should test this from gpaw.utilities.blas import r2k C_Mn = C_nM.T.copy() r2k(0.5, C_Mn, f_n * C_Mn, 0.0, rho_MM) tri2full(rho_MM)
[docs] def calculate_atomic_density_matrices_with_occupation(self, D_asp, f_un): # ac = self.atomic_correction # if ac.implements_distributed_projections(): # D2_asp = ac.redistribute(self, D_asp, type='asp', op='forth') # WaveFunctions.calculate_atomic_density_matrices_with_occupation( # self, D2_asp, f_un) # D3_asp = ac.redistribute(self, D2_asp, type='asp', op='back') # for a in D_asp: # D_asp[a][:] = D3_asp[a] # else: WaveFunctions.calculate_atomic_density_matrices_with_occupation( self, D_asp, f_un)
def calculate_density_matrix_delta(self, d_nn, C_nM, rho_MM=None): self.timer.start('Calculate density matrix') rho_MM = self.ksl.calculate_density_matrix_delta(d_nn, C_nM, rho_MM) self.timer.stop('Calculate density matrix') return rho_MM
[docs] def add_to_density_from_k_point_with_occupation(self, nt_sG, kpt, f_n): """Add contribution to pseudo electron-density. Do not use the standard occupation numbers, but ones given with argument f_n.""" # Custom occupations are used in calculation of response potential # with GLLB-potential if kpt.rho_MM is None: rho_MM = self.calculate_density_matrix(f_n, kpt.C_nM) if hasattr(kpt, 'c_on'): assert self.bd.comm.size == 1 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), dtype=kpt.C_nM.dtype) for ne, c_n in zip(kpt.ne_o, kpt.c_on): assert abs(c_n.imag).max() < 1e-14 d_nn += ne * np.outer(c_n.conj(), c_n).real rho_MM += self.calculate_density_matrix_delta(d_nn, kpt.C_nM) else: rho_MM = kpt.rho_MM self.timer.start('Construct density') self.basis_functions.construct_density(rho_MM, nt_sG[kpt.s], kpt.q) self.timer.stop('Construct density')
def add_to_kinetic_density_from_k_point(self, taut_G, kpt): raise NotImplementedError('Kinetic density calculation for LCAO ' 'wavefunctions is not implemented.') def calculate_forces(self, hamiltonian, F_av): self.timer.start('LCAO forces') Fref_av = np.zeros_like(F_av) self.forcecalc = LCAOforces(self.ksl, self.dtype, self.gd, self.bd, self.kd, self.kpt_u, self.nspins, self.basis_functions, self.newtci, self.P_aqMi, self.setups, self.manytci, hamiltonian, self, self.spos_ac, self.timer, Fref_av) F_av[:, :] = self.forcecalc.get_forces_sum_GS() self.timer.stop('LCAO forces') def _get_wave_function_array(self, u, n, realspace=True, periodic=False): # XXX Taking kpt is better than taking u kpt = self.kpt_u[u] C_M = kpt.C_nM[n] if realspace: psit_G = self.gd.zeros(dtype=self.dtype) self.basis_functions.lcao_to_grid(C_M, psit_G, kpt.q) if periodic and self.dtype == complex: k_c = self.kd.ibzk_kc[kpt.k] return self.gd.plane_wave(-k_c) * psit_G return psit_G else: return C_M def write(self, writer, write_wave_functions=False): WaveFunctions.write(self, writer) if write_wave_functions: self.write_wave_functions(writer) def write_wave_functions(self, writer): writer.add_array( 'coefficients', (self.nspins, self.kd.nibzkpts, self.bd.nbands, self.setups.nao), dtype=self.dtype) for s in range(self.nspins): for k in range(self.kd.nibzkpts): C_nM = self.collect_array('C_nM', k, s) writer.fill(C_nM * Bohr**-1.5) def read(self, reader): WaveFunctions.read(self, reader) r = reader.wave_functions if 'coefficients' in r: self.read_wave_functions(r) def read_wave_functions(self, reader): for kpt in self.kpt_u: C_nM = reader.proxy('coefficients', kpt.s, kpt.k) kpt.C_nM = self.bd.empty(self.setups.nao, dtype=self.dtype) for myn, C_M in enumerate(kpt.C_nM): n = self.bd.global_index(myn) # XXX number of bands could have been rounded up! if n >= len(C_nM): break C_M[:] = C_nM[n] * Bohr**1.5 self.coefficients_read_from_file = True def estimate_memory(self, mem): nq = len(self.kd.ibzk_qc) nao = self.setups.nao ni_total = sum([setup.ni for setup in self.setups]) itemsize = mem.itemsize[self.dtype] mem.subnode('C [qnM]', nq * self.bd.mynbands * nao * itemsize) nM1, nM2 = self.ksl.get_overlap_matrix_shape() mem.subnode('S, T [2 x qmm]', 2 * nq * nM1 * nM2 * itemsize) mem.subnode('P [aqMi]', nq * nao * ni_total // self.gd.comm.size) # self.tci.estimate_memory(mem.subnode('TCI')) self.basis_functions.estimate_memory(mem.subnode('BasisFunctions')) self.eigensolver.estimate_memory(mem.subnode('Eigensolver'), self.dtype)
class LCAOforces: def __init__(self, ksl, dtype, gd, bd, kd, kpt_u, nspins, bfs, newtci, P_aqMi, setups, manytci, hamiltonian, wfs, spos_ac, timer, Fref_av): """ Object which calculates LCAO forces """ self.ksl = ksl self.nao = ksl.nao self.mynao = ksl.mynao self.dtype = dtype self.newtci = newtci self.manytci = manytci self.P_aqMi = P_aqMi self.gd = gd self.bd = bd self.kd = kd self.kpt_u = kpt_u self.nspins = nspins self.bfs = bfs self.spos_ac = spos_ac self.Mstart = ksl.Mstart self.Mstop = ksl.Mstop self.setups = setups self.hamiltonian = hamiltonian self.wfs = wfs self.timer = timer self.Fref_av = Fref_av self.my_atom_indices = bfs.my_atom_indices self.atom_indices = bfs.atom_indices self.dH_asp = hamiltonian.dH_asp from gpaw.kohnsham_layouts import BlacsOrbitalLayouts self.isblacs = isinstance(self.ksl, BlacsOrbitalLayouts) if not self.isblacs: self.timer.start('TCI derivative') self.dThetadR_qvMM, self.dTdR_qvMM = self.manytci.O_qMM_T_qMM( self.gd.comm, self.Mstart, self.Mstop, False, derivative=True) self.dPdR_aqvMi = self.manytci.P_aqMi(self.bfs.my_atom_indices, derivative=True) self.gd.comm.sum(self.dThetadR_qvMM) self.gd.comm.sum(self.dTdR_qvMM) self.timer.stop('TCI derivative') self.rhoT_uMM, self.ET_uMM = self.get_den_mat_and_E() def get_forces_sum_GS(self): """ This function calculates ground state forces in LCAO mode """ if not self.isblacs: F_av = np.zeros_like(self.Fref_av) Fkin_av = self.get_kinetic_term() Fpot_av = self.get_pot_term() Ftheta_av = self.get_den_mat_term() Frho_av = self.get_den_mat_paw_term() Fatom_av = self.get_atomic_density_term() F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av else: F_av = np.zeros_like(self.Fref_av) Fpot_av = self.get_pot_term_blacs() Fkin_av, Ftheta_av = self.get_kin_and_den_term_blacs() Fatom_av, Frho_av = self.get_at_den_and_den_paw_blacs() F_av += Fkin_av + Fpot_av + Ftheta_av + Frho_av + Fatom_av self.timer.start('Wait for sum') self.ksl.orbital_comm.sum(F_av) if self.bd.comm.rank == 0: self.kd.comm.sum(F_av, 0) self.timer.stop('Wait for sum') return F_av def _slices(self, indices): for a in indices: M1 = self.bfs.M_a[a] - self.Mstart M2 = M1 + self.setups[a].nao if M2 > 0: yield a, max(0, M1), M2 def slices(self): return self._slices(self.atom_indices) def my_slices(self): return self._slices(self.my_atom_indices) def get_den_mat_and_E(self): # # ----- ----- # \ -1 \ * # E = ) S H rho = ) c eps f c # mu nu / mu x x z z nu / n mu n n n nu # ----- ----- # x z n # # We use the transpose of that matrix. The first form is used # if rho is given, otherwise the coefficients are used. self.timer.start('Initial') if self.kpt_u[0].rho_MM is None: rhoT_uMM = [] ET_uMM = [] self.timer.start('Get density matrix') for kpt in self.kpt_u: rhoT_MM = self.ksl.get_transposed_density_matrix(kpt.f_n, kpt.C_nM) rhoT_uMM.append(rhoT_MM) ET_MM = self.ksl.get_transposed_density_matrix(kpt.f_n * kpt.eps_n, kpt.C_nM) ET_uMM.append(ET_MM) if hasattr(kpt, 'c_on'): # XXX does this work with BLACS/non-BLACS/etc.? assert self.bd.comm.size == 1 d_nn = np.zeros((self.bd.mynbands, self.bd.mynbands), dtype=kpt.C_nM.dtype) for ne, c_n in zip(kpt.ne_o, kpt.c_on): d_nn += ne * np.outer(c_n.conj(), c_n) rhoT_MM += self.ksl.get_transposed_density_matrix_delta( d_nn, kpt.C_nM) ET_MM += self.ksl.get_transposed_density_matrix_delta( d_nn * kpt.eps_n, kpt.C_nM) self.timer.stop('Get density matrix') else: rhoT_uMM = [] ET_uMM = [] for kpt in self.kpt_u: H_MM = self.wfs.eigensolver.calculate_hamiltonian_matrix( self.hamiltonian, self.wfs, kpt) tri2full(H_MM) S_MM = kpt.S_MM.copy() tri2full(S_MM) ET_MM = np.linalg.solve(S_MM, gemmdot(H_MM, kpt.rho_MM)).T.copy() del S_MM, H_MM rhoT_MM = kpt.rho_MM.T.copy() rhoT_uMM.append(rhoT_MM) ET_uMM.append(ET_MM) self.timer.stop('Initial') return rhoT_uMM, ET_uMM def get_kinetic_term(self): """Calculate Kinetic energy term in LCAO""" Fkin_av = np.zeros_like(self.Fref_av) self.timer.start('TCI derivative') # Kinetic energy contribution # # ----- d T # a \ mu nu # F += 2 Re ) -------- rho # / d R nu mu # ----- mu nu # mu in a; nu # Fkin_av = np.zeros_like(Fkin_av) for u, kpt in enumerate(self.kpt_u): dEdTrhoT_vMM = (self.dTdR_qvMM[kpt.q] * self.rhoT_uMM[u][np.newaxis]).real # XXX load distribution! for a, M1, M2 in self.my_slices(): Fkin_av[a, :] += \ 2.0 * dEdTrhoT_vMM[:, M1:M2].sum(-1).sum(-1) self.timer.stop('TCI derivative') return Fkin_av def get_den_mat_term(self): """Calculate density matrix term in LCAO""" Ftheta_av = np.zeros_like(self.Fref_av) # Density matrix contribution due to basis overlap # # ----- d Theta # a \ mu nu # F += -2 Re ) ------------ E # / d R nu mu # ----- mu nu # mu in a; nu # Ftheta_av = np.zeros_like(Ftheta_av) for u, kpt in enumerate(self.kpt_u): dThetadRE_vMM = (self.dThetadR_qvMM[kpt.q] * self.ET_uMM[u][np.newaxis]).real for a, M1, M2 in self.my_slices(): Ftheta_av[a, :] += \ -2.0 * dThetadRE_vMM[:, M1:M2].sum(-1).sum(-1) return Ftheta_av def get_pot_term(self): """Calculate potential term""" Fpot_av = np.zeros_like(self.Fref_av) # Potential contribution # # ----- / d Phi (r) # a \ | mu ~ # F += -2 Re ) | ---------- v (r) Phi (r) dr rho # / | d R nu nu mu # ----- / a # mu in a; nu # self.timer.start('Potential') vt_sG = self.hamiltonian.vt_sG Fpot_av = np.zeros_like(Fpot_av) for u, kpt in enumerate(self.kpt_u): vt_G = vt_sG[kpt.s] Fpot_av += self.bfs.calculate_force_contribution(vt_G, self.rhoT_uMM[u], kpt.q) self.timer.stop('Potential') return Fpot_av def get_den_mat_paw_term(self): """Calcualte PAW correction""" # TO DO: split this function into # _get_den_mat_paw_term (which calculate Frho_av) and # get_paw_correction (which calculate ZE_MM) # Density matrix contribution from PAW correction # # ----- ----- # a \ a \ b # F += 2 Re ) Z E - 2 Re ) Z E # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # with # b* # ----- dP # b \ i mu b b # Z = ) -------- dS P # mu nu / dR ij j nu # ----- b mu # ij # self.timer.start('Paw correction') Frho_av = np.zeros_like(self.Fref_av) for u, kpt in enumerate(self.kpt_u): work_MM = np.zeros((self.mynao, self.nao), self.dtype) ZE_MM = None for b in self.my_atom_indices: setup = self.setups[b] dO_ii = np.asarray(setup.dO_ii, self.dtype) dOP_iM = np.zeros((setup.ni, self.nao), self.dtype) mmm(1.0, dO_ii, 'N', self.P_aqMi[b][kpt.q], 'C', 0.0, dOP_iM) for v in range(3): mmm(1.0, self.dPdR_aqvMi[b][kpt.q][v][self.Mstart:self.Mstop], 'N', dOP_iM, 'N', 0.0, work_MM) ZE_MM = (work_MM * self.ET_uMM[u]).real for a, M1, M2 in self.slices(): dE = 2 * ZE_MM[M1:M2].sum() Frho_av[a, v] -= dE # the "b; mu in a; nu" term Frho_av[b, v] += dE # the "mu nu" term self.timer.stop('Paw correction') return Frho_av def _get_den_mat_paw_term(self): # THIS doesn't work in parallel # Density matrix contribution from PAW correction # # ----- ----- # a \ a \ b # F += 2 Re ) Z E - 2 Re ) Z E # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # with # b* # ----- dP # b \ i mu b b # Z = ) -------- dS P # mu nu / dR ij j nu # ----- b mu # ij # Frho_av = np.zeros_like(self.Fref_av) self.timer.start('add paw correction') ZE_MM = self.get_paw_correction() for u, kpt in enumerate(self.kpt_u): for b in self.my_atom_indices: for v in range(3): for a, M1, M2 in self.slices(): dE = 2 * ZE_MM[u, b, v, M1:M2].sum() Frho_av[a, v] -= dE.real # the "b; mu in a; nu" term Frho_av[b, v] += dE.real # the "mu nu" term self.timer.stop('add paw correction') return Frho_av def get_paw_correction(self): # THIS doesn't work in parallel # <Phi_nu|pt_i>O_ii<dPt_i/dR|Phi_mu> self.timer.start('get paw correction') ZE_MM = np.zeros((len(self.kpt_u), len(self.my_atom_indices), 3, self.mynao, self.nao), self.dtype) for u, kpt in enumerate(self.kpt_u): work_MM = np.zeros((self.mynao, self.nao), self.dtype) for b in self.my_atom_indices: setup = self.setups[b] dO_ii = np.asarray(setup.dO_ii, self.dtype) dOP_iM = np.zeros((setup.ni, self.nao), self.dtype) mmm(1.0, dO_ii, 'N', self.P_aqMi[b][kpt.q], 'C', 0.0, dOP_iM) for v in range(3): mmm(1.0, self.dPdR_aqvMi[b][kpt.q][v][self.Mstart:self.Mstop], 'N', dOP_iM, 'N', 0.0, work_MM) ZE_MM[u, b, v, :, :] = (work_MM * self.ET_uMM[u]).real self.timer.stop('get paw correction') return ZE_MM def get_atomic_density_term(self): Fatom_av = np.zeros_like(self.Fref_av) # Atomic density contribution # ----- ----- # a \ a \ b # F += -2 Re ) A rho + 2 Re ) A rho # / mu nu nu mu / mu nu nu mu # ----- ----- # mu nu b; mu in a; nu # # b* # ----- d P # b \ i mu b b # A = ) ------- dH P # mu nu / d R ij j nu # ----- b mu # ij # self.timer.start('Atomic Hamiltonian force') Fatom_av = np.zeros_like(Fatom_av) for u, kpt in enumerate(self.kpt_u): for b in self.my_atom_indices: H_ii = np.asarray(unpack_hermitian(self.dH_asp[b][kpt.s]), self.dtype) if len(H_ii) == 0: # gemmdot does not like empty matrices! # (has been fixed in the new code) continue HP_iM = gemmdot(H_ii, np.ascontiguousarray( self.P_aqMi[b][kpt.q].T.conj())) for v in range(3): dPdR_Mi = \ self.dPdR_aqvMi[b][kpt.q][v][self.Mstart:self.Mstop] ArhoT_MM = \ (gemmdot(dPdR_Mi, HP_iM) * self.rhoT_uMM[u]).real for a, M1, M2 in self.slices(): dE = 2 * ArhoT_MM[M1:M2].sum() Fatom_av[a, v] += dE # the "b; mu in a; nu" term Fatom_av[b, v] -= dE # the "mu nu" term self.timer.stop('Atomic Hamiltonian force') return Fatom_av def get_den_mat_block_blacs(self, f_n, C_nM, redistributor): rho1_mm = self.ksl.calculate_blocked_density_matrix(f_n, C_nM).conj() rho_mm = redistributor.redistribute(rho1_mm) return rho_mm def get_pot_term_blacs(self): Fpot_av = np.zeros_like(self.Fref_av) from gpaw.blacs import BlacsGrid, Redistributor self.grid = BlacsGrid(self.ksl.block_comm, self.gd.comm.size, self.bd.comm.size) self.blocksize1 = -(-self.nao // self.grid.nprow) self.blocksize2 = -(-self.nao // self.grid.npcol) desc = self.grid.new_descriptor(self.nao, self.nao, self.blocksize1, self.blocksize2) vt_sG = self.hamiltonian.vt_sG self.rhoT_umm = [] self.ET_umm = [] self.redistributor = Redistributor(self.grid.comm, self.ksl.mmdescriptor, desc) Fpot_av = np.zeros_like(self.Fref_av) for u, kpt in enumerate(self.kpt_u): self.timer.start('Get density matrix') rhoT_mm = self.get_den_mat_block_blacs(kpt.f_n, kpt.C_nM, self.redistributor) self.rhoT_umm.append(rhoT_mm) self.timer.stop('Get density matrix') self.timer.start('Potential') rhoT_mM = self.ksl.distribute_to_columns(rhoT_mm, desc) vt_G = vt_sG[kpt.s] Fpot_av += self.bfs.calculate_force_contribution(vt_G, rhoT_mM, kpt.q) del rhoT_mM self.timer.stop('Potential') return Fpot_av def get_kin_and_den_term_blacs(self): Fkin_av_sum = np.zeros_like(self.Fref_av) Ftheta_av_sum = np.zeros_like(self.Fref_av) # pcutoff_a = [max([pt.get_cutoff() for pt in setup.pt_j]) # for setup in self.setups] # phicutoff_a = [max([phit.get_cutoff() for phit in setup.phit_j]) # for setup in self.setups] # XXX should probably use bdsize x gdsize instead # That would be consistent with some existing grids # I'm not sure if this is correct # XXX what are rows and columns actually? dH_asp = self.hamiltonian.dH_asp self.timer.start('Get density matrix') for kpt in self.kpt_u: ET_mm = self.get_den_mat_block_blacs(kpt.f_n * kpt.eps_n, kpt.C_nM, self.redistributor) self.ET_umm.append(ET_mm) self.timer.stop('Get density matrix') self.M1start = self.blocksize1 * self.grid.myrow self.M2start = self.blocksize2 * self.grid.mycol self.M1stop = min(self.M1start + self.blocksize1, self.nao) self.M2stop = min(self.M2start + self.blocksize2, self.nao) self.m1max = self.M1stop - self.M1start self.m2max = self.M2stop - self.M2start # from gpaw.lcao.overlap import TwoCenterIntegralCalculator self.timer.start('Prepare TCI loop') self.M_a = self.bfs.M_a Fkin2_av = np.zeros_like(self.Fref_av) Ftheta2_av = np.zeros_like(self.Fref_av) self.atompairs = self.newtci.a1a2.get_atompairs() self.timer.start('broadcast dH') self.alldH_asp = {} for a in range(len(self.setups)): gdrank = self.bfs.sphere_a[a].rank if gdrank == self.gd.rank: dH_sp = dH_asp[a] else: ni = self.setups[a].ni dH_sp = np.empty((self.nspins, ni * (ni + 1) // 2)) self.gd.comm.broadcast(dH_sp, gdrank) # okay, now everyone gets copies of dH_sp self.alldH_asp[a] = dH_sp self.timer.stop('broadcast dH') # This will get sort of hairy. We need to account for some # three-center overlaps, such as: # # a1 # Phi ~a3 a3 ~a3 a2 a2,a1 # < ---- |p > dH <p |Phi > rho # dR # # To this end we will loop over all pairs of atoms (a1, a3), # and then a sub-loop over (a3, a2). self.timer.stop('Prepare TCI loop') self.timer.start('Not so complicated loop') for (a1, a2) in self.atompairs: if a1 >= a2: # Actually this leads to bad load balance. # We should take a1 > a2 or a1 < a2 equally many times. # Maybe decide which of these choices # depending on whether a2 % 1 == 0 continue m1start = self.M_a[a1] - self.M1start m2start = self.M_a[a2] - self.M2start if m1start >= self.blocksize1 or m2start >= self.blocksize2: continue # (we have only one block per CPU) nm1 = self.setups[a1].nao nm2 = self.setups[a2].nao m1stop = min(m1start + nm1, self.m1max) m2stop = min(m2start + nm2, self.m2max) if m1stop <= 0 or m2stop <= 0: continue m1start = max(m1start, 0) m2start = max(m2start, 0) J1start = max(0, self.M1start - self.M_a[a1]) J2start = max(0, self.M2start - self.M_a[a2]) M1stop = J1start + m1stop - m1start J2stop = J2start + m2stop - m2start dThetadR_qvmm, dTdR_qvmm = self.newtci.dOdR_dTdR(a1, a2) for u, kpt in enumerate(self.kpt_u): rhoT_mm = self.rhoT_umm[u][m1start:m1stop, m2start:m2stop] ET_mm = self.ET_umm[u][m1start:m1stop, m2start:m2stop] Fkin_v = 2.0 * (dTdR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] * rhoT_mm[np.newaxis]).real.sum(-1).sum(-1) Ftheta_v = 2.0 * (dThetadR_qvmm[kpt.q][:, J1start:M1stop, J2start:J2stop] * ET_mm[np.newaxis]).real.sum(-1).sum(-1) Fkin2_av[a1] += Fkin_v Fkin2_av[a2] -= Fkin_v Ftheta2_av[a1] -= Ftheta_v Ftheta2_av[a2] += Ftheta_v Fkin_av = Fkin2_av Ftheta_av = Ftheta2_av self.timer.stop('Not so complicated loop') Fkin_av_sum += Fkin_av Ftheta_av_sum += Ftheta_av return Fkin_av_sum, Ftheta_av_sum def get_at_den_and_den_paw_blacs(self): Fatom_av = np.zeros_like(self.Fref_av) Frho_av = np.zeros_like(self.Fref_av) Fatom_av_sum = np.zeros_like(self.Fref_av) Frho_av_sum = np.zeros_like(self.Fref_av) self.dHP_and_dSP_aauim = {} self.a2values = {} for (a2, a3) in self.atompairs: if a3 not in self.a2values: self.a2values[a3] = [] self.a2values[a3].append(a2) self.timer.start('Complicated loop') for a1, a3 in self.atompairs: if a1 == a3: # Functions reside on same atom, so their overlap # does not change when atom is displaced continue m1start = self.M_a[a1] - self.M1start if m1start >= self.blocksize1: continue nm1 = self.setups[a1].nao m1stop = min(m1start + nm1, self.m1max) if m1stop <= 0: continue dPdR_qvim = self.newtci.dPdR(a3, a1) if dPdR_qvim is None: continue dPdR_qvmi = -dPdR_qvim.transpose(0, 1, 3, 2).conj() m1start = max(m1start, 0) J1start = max(0, self.M1start - self.M_a[a1]) J1stop = J1start + m1stop - m1start dPdR_qvmi = dPdR_qvmi[:, :, J1start:J1stop, :].copy() for a2 in self.a2values[a3]: m2start = self.M_a[a2] - self.M2start if m2start >= self.blocksize2: continue nm2 = self.setups[a2].nao m2stop = min(m2start + nm2, self.m2max) if m2stop <= 0: continue m2start = max(m2start, 0) J2start = max(0, self.M2start - self.M_a[a2]) J2stop = J2start + m2stop - m2start if (a2, a3) in self.dHP_and_dSP_aauim: dHP_uim, dSP_uim = self.dHP_and_dSP_aauim[(a2, a3)] else: P_qim = self.newtci.P(a3, a2) if P_qim is None: continue P_qmi = P_qim.transpose(0, 2, 1).conj() P_qmi = P_qmi[:, J2start:J2stop].copy() dH_sp = self.alldH_asp[a3] dS_ii = self.setups[a3].dO_ii dHP_uim = [] dSP_uim = [] for u, kpt in enumerate(self.kpt_u): dH_ii = unpack_hermitian(dH_sp[kpt.s]) dHP_im = np.dot(P_qmi[kpt.q], dH_ii).T.conj() # XXX only need nq of these, # but the looping is over all u dSP_im = np.dot(P_qmi[kpt.q], dS_ii).T.conj() dHP_uim.append(dHP_im) dSP_uim.append(dSP_im) self.dHP_and_dSP_aauim[(a2, a3)] = dHP_uim, dSP_uim for u, kpt in enumerate(self.kpt_u): rhoT_mm = self.rhoT_umm[u][m1start:m1stop, m2start:m2stop] ET_mm = self.ET_umm[u][m1start:m1stop, m2start:m2stop] dPdRdHP_vmm = np.dot(dPdR_qvmi[kpt.q], dHP_uim[u]) dPdRdSP_vmm = np.dot(dPdR_qvmi[kpt.q], dSP_uim[u]) Fatom_c = 2.0 * (dPdRdHP_vmm * rhoT_mm).real.sum(-1).sum(-1) Frho_c = 2.0 * (dPdRdSP_vmm * ET_mm).real.sum(-1).sum(-1) Fatom_av[a1] += Fatom_c Fatom_av[a3] -= Fatom_c Frho_av[a1] -= Frho_c Frho_av[a3] += Frho_c self.timer.stop('Complicated loop') Fatom_av_sum += Fatom_av Frho_av_sum += Frho_av return Fatom_av_sum, Frho_av_sum