Source code for pyscf.pbc.df.rsdf

#!/usr/bin/env python
# Copyright 2014-2020 The PySCF Developers. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: Hong-Zhou Ye <hzyechem@gmail.com>
#

r'''
Range-separated Gaussian Density Fitting (RSGDF)

rsdf_builder.py uses a different algorithm to build RS-GDF tensors. Note both
modules CANNOT handle the long-range operator erf(omega*r12)/r12. It has to be
computed with the gdf_builder module.

ref.:
[1] For the RSGDF method:
    Hong-Zhou Ye and Timothy C. Berkelbach, J. Chem. Phys. 154, 131104 (2021).
[2] For the SR lattice sum integral screening:
    Hong-Zhou Ye and Timothy C. Berkelbach, arXiv:2107.09704.

In RSGDF, the two-center and three-center Coulomb integrals are calculated in two pars:
    j2c = j2c_SR(omega) + j2c_LR(omega)
    j3c = j3c_SR(omega) + j3c_LR(omega)
where the SR and LR integrals correpond to using the following potentials
    g_SR(r_12;omega) = erfc(omega * r_12) / r_12
    g_LR(r_12;omega) = erf(omega * r_12) / r_12
The SR integrals are evaluated in real space using a lattice summation, while
the LR integrals are evaluated in reciprocal space with a plane wave basis.
'''

import os
import h5py
import scipy.linalg
import tempfile
import numpy as np

from pyscf import lib
from pyscf.lib import logger, zdotCN
from pyscf.lib import parameters as param
from pyscf.pbc.df.df import GDF
from pyscf.pbc.df import aft, aft_jk
from pyscf.pbc.df import ft_ao
from pyscf.pbc.df import rsdf_helper
from pyscf.pbc.df import rsdf_builder
from pyscf.pbc.df import gdf_builder
from pyscf.pbc.df.incore import Int3cBuilder
from pyscf.df.outcore import _guess_shell_ranges
from pyscf.pbc.tools import k2gamma
from pyscf.pbc.lib.kpts_helper import (is_zero, member, unique,
                                       members_with_wrap_around)
from pyscf.df.addons import make_auxmol


[docs] def get_aux_chg(auxcell): r""" Compute charge of the auxiliary basis, \int_Omega dr chi_P(r) Returns: The function returns a 1d numpy array of size auxcell.nao_nr(). """ G0 = np.zeros((1, 3)) return ft_ao.ft_ao(auxcell, G0)[0].real
[docs] class RSGDF(GDF): '''Range Separated Gaussian Density Fitting ''' _keys = { 'use_bvk', 'precision_R', 'precision_G', 'npw_max', '_omega_min', 'omega', 'ke_cutoff', 'mesh_compact', 'omega_j2c', 'mesh_j2c', 'precision_j2c', 'j2c_eig_always', 'kpts', }
[docs] def weighted_coulG(self, kpt=np.zeros(3), exx=False, mesh=None, omega=None): return aft.weighted_coulG(self, kpt, exx, mesh, omega)
def __init__(self, cell, kpts=np.zeros((1,3))): if cell.dimension < 3: raise NotImplementedError(""" RSGDF for low-dimensional systems are not available yet. We recommend using cell.dimension=3 with large vacuum.""") # if True and kpts are gamma-inclusive, RSDF will use the bvk cell # trick for computing both j3c_SR and j3c_LR. If kpts are not # gamma-inclusive, this attribute will be ignored. self.use_bvk = True # precision for real-space lattice sum (R) and reciprocal-space # Fourier transform (G). self.precision_R = cell.precision * 1e-2 self.precision_G = cell.precision # omega and PW mesh size for j3c. # 1. If 'omega' is given, the code can search an appropriate PW mesh of # size 'mesh_compact' that computes the LR-AFT of j3c to 'precision_G'. # 2. If 'omega' is not given, the code will search for the maximum # omega such that the size of 'mesh_compact' does not exceed 'npw_max'. # The default for 'npw_max' is 350 (i.e., 7x7x7 for a 3D cubic # lattice). If thus determined 'omega' is smaller than '_omega_min' # (default: 0.3), 'omega' will be set to '_omega_min' and 'mesh_compact' # is determined from the new 'omega' (ignoring 'npw_max'). # Note 1: In both cases, the user can manually overwrite the # auto-determined 'mesh_compact'. # Note 2: 'ke_cutoff' is not an input option. Use 'mesh_compact' directly. self.npw_max = 350 self._omega_min = 0.3 self.omega = None self.ke_cutoff = None self.mesh_compact = None # omega and PW mesh size for j2c. # Like for j3c, if 'omega_j2c' is given, the code can determine an # appropriate PW mesh of size 'mesh_j2c' that computes the LR-AFT of j2c # to 'precision_j2c'. # The default ('omega_j2c' = 0.4 and 'precision_j2c' = 1e-14) is recommended. # Like for j3c, 'mesh_j2c' can be overwritten manually. self.omega_j2c = 0.4 self.mesh_j2c = None self.precision_j2c = 1e-14 # set True to force calculating j2c^(-1/2) using eigenvalue # decomposition (ED); otherwise, Cholesky decomposition (CD) is used # first, and ED is called only if CD fails. self.j2c_eig_always = False GDF.__init__(self, cell, kpts=kpts) self.kpts = np.reshape(self.kpts, (-1,3))
[docs] def dump_flags(self, verbose=None): cell = self.cell log = logger.new_logger(self, verbose) log.info('\n') log.info('******** %s ********', self.__class__) log.info('cell num shells = %d, num cGTOs = %d, num pGTOs = %d', cell.nbas, cell.nao_nr(), cell.npgto_nr()) log.info('use_bvk = %s', self.use_bvk) log.info('precision_R = %s', self.precision_R) log.info('precision_G = %s', self.precision_G) log.info('j2c_eig_always = %s', self.j2c_eig_always) log.info('omega = %s', self.omega) log.info('ke_cutoff = %s', self.ke_cutoff) if self.mesh is not None: log.info('mesh = %s (%d PWs)', self.mesh, np.prod(self.mesh)) log.info('mesh_compact = %s (%d PWs)', self.mesh_compact, np.prod(self.mesh_compact)) if self.auxcell is None: log.info('auxbasis = %s', self.auxbasis) else: log.info('auxbasis = %s', self.auxcell.basis) log.info('auxcell precision= %s', self.auxcell.precision) log.info('auxcell rcut = %s', self.auxcell.rcut) log.info('omega_j2c = %s', self.omega_j2c) log.info('mesh_j2c = %s (%d PWs)', self.mesh_j2c, np.prod(self.mesh_j2c)) auxcell = self.auxcell log.info('auxcell num shells = %d, num cGTOs = %d, num pGTOs = %d', auxcell.nbas, auxcell.nao_nr(), auxcell.npgto_nr()) log.info('exp_to_discard = %s', self.exp_to_discard) if isinstance(self._cderi, str): log.info('_cderi = %s where DF integrals are loaded (readonly).', self._cderi) elif isinstance(self._cderi_to_save, str): log.info('_cderi_to_save = %s', self._cderi_to_save) else: log.info('_cderi_to_save = %s', self._cderi_to_save.name) log.info('len(kpts) = %d', len(self.kpts)) log.debug1(' kpts = %s', self.kpts) if self.kpts_band is not None: log.info('len(kpts_band) = %d', len(self.kpts_band)) log.debug1(' kpts_band = %s', self.kpts_band) return self
def _rs_build(self): log = logger.Logger(self.stdout, self.verbose) # find kmax kpts = self.kpts if self.kpts_band is None else np.vstack( [self.kpts, self.kpts_band]) b = self.cell.reciprocal_vectors() scaled_kpts = np.linalg.solve(b.T, kpts.T).T scaled_kpts[scaled_kpts > 0.49999999] -= 1 kpts = np.dot(scaled_kpts, b) kmax = np.linalg.norm(kpts, axis=-1).max() scaled_kpts = kpts = None if kmax < 1.e-3: kmax = (0.75/np.pi/self.cell.vol)**0.33333333*2*np.pi # If omega is not given, estimate it from npw_max r2o = True if self.omega is None: self.omega, self.ke_cutoff, mesh_compact = \ rsdf_helper.estimate_omega_for_npw( self.cell, self.npw_max, self.precision_G, kmax=kmax, round2odd=r2o) # if omega from npw_max is too small, use omega_min if self.omega < self._omega_min: self.omega = self._omega_min self.ke_cutoff, mesh_compact = \ rsdf_helper.estimate_mesh_for_omega( self.cell, self.omega, self.precision_G, kmax=kmax, round2odd=r2o) # Use the thus determined mesh_compact only if not p[rovided if self.mesh_compact is None: self.mesh_compact = mesh_compact # If omega is provded but mesh_compact is not elif self.mesh_compact is None: self.ke_cutoff, self.mesh_compact = \ rsdf_helper.estimate_mesh_for_omega( self.cell, self.omega, self.precision_G, kmax=kmax, round2odd=r2o) self.mesh_compact = self.cell.symmetrize_mesh(self.mesh_compact) # build auxcell auxcell = make_auxmol(self.cell, self.auxbasis) # drop exponents drop_eta = self.exp_to_discard if drop_eta is not None and drop_eta > 0: log.info("Drop primitive fitting functions with exponent < %s", drop_eta) auxbasis = rsdf_helper.remove_exp_basis(auxcell._basis, amin=drop_eta) auxcellnew = make_auxmol(self.cell, auxbasis) auxcell = auxcellnew # determine mesh for computing j2c auxcell.precision = self.precision_j2c auxcell.rcut = max([auxcell.bas_rcut(ib, auxcell.precision) for ib in range(auxcell.nbas)]) if self.mesh_j2c is None: self.mesh_j2c = rsdf_helper.estimate_mesh_for_omega( auxcell, self.omega_j2c, round2odd=True)[1] self.mesh_j2c = self.cell.symmetrize_mesh(self.mesh_j2c) self.auxcell = auxcell def _kpts_build(self, kpts_band=None): if self.kpts_band is not None: self.kpts_band = np.reshape(self.kpts_band, (-1,3)) if kpts_band is not None: kpts_band = np.reshape(kpts_band, (-1,3)) if self.kpts_band is None: self.kpts_band = kpts_band else: self.kpts_band = unique(np.vstack((self.kpts_band,kpts_band)))[0] def _gdf_build(self, j_only=None, with_j3c=True): if j_only is not None: self._j_only = j_only if with_j3c: if isinstance(self._cderi_to_save, str): cderi = self._cderi_to_save else: cderi = self._cderi_to_save.name if isinstance(self._cderi, str): if self._cderi == cderi and os.path.isfile(cderi): logger.warn(self, 'File %s (specified by ._cderi) is ' 'overwritten by GDF initialization.', cderi) else: logger.warn(self, 'Value of ._cderi is ignored. ' 'DF integrals will be saved in file %s .', cderi) self._cderi = cderi t1 = (logger.process_clock(), logger.perf_counter()) self._make_j3c(self.cell, self.auxcell, None, cderi) t1 = logger.timer_debug1(self, 'j3c', *t1) def _make_j3c(self, cell=None, auxcell=None, kptij_lst=None, cderi_file=None): if cell is None: cell = self.cell if auxcell is None: auxcell = self.auxcell if cderi_file is None: cderi_file = self._cderi_to_save if self.kpts_band is None: kpts_union = self.kpts else: kpts_union = unique(np.vstack([self.kpts, self.kpts_band]))[0] dfbuilder = _RSGDFBuilder(cell, auxcell, kpts_union) dfbuilder.__dict__.update(self.__dict__) j_only = self._j_only or len(kpts_union) == 1 dfbuilder.make_j3c(cderi_file, j_only=j_only, dataname=self._dataname, kptij_lst=kptij_lst)
[docs] def build(self, j_only=None, with_j3c=True, kpts_band=None): # formatting k-points self._kpts_build(kpts_band=kpts_band) # build for range-separation hybrid self._rs_build() # dump flags before the final build self.check_sanity() self.dump_flags() # do normal gdf build with the modified _make_j3c self._gdf_build(j_only=j_only, with_j3c=with_j3c) return self
RSDF = RSGDF class _RSGDFBuilder(rsdf_builder._RSGDFBuilder): _keys = { 'use_bvk', 'precision_R', 'precision_G', 'npw_max', '_omega_min', 'omega', 'ke_cutoff', 'mesh_compact', 'omega_j2c', 'mesh_j2c', 'precision_j2c', 'j2c_eig_always', 'kpts', } def __init__(self, cell, auxcell, kpts=np.zeros((1,3))): self.eta = None self.mesh = None if cell.omega != 0: # Initialize omega to cell.omega for HF exchange of short range # int2e in RSH functionals self.omega = abs(cell.omega) else: self.omega = None self.ke_cutoff = None self.bvk_kmesh = None self.supmol_ft = None Int3cBuilder.__init__(self, cell, auxcell, kpts) # set True to force calculating j2c^(-1/2) using eigenvalue # decomposition (ED); otherwise, Cholesky decomposition (CD) is used # first, and ED is called only if CD fails. self.j2c_eig_always = False self.linear_dep_threshold = rsdf_builder.LINEAR_DEP_THR def build(self, omega=None): log = logger.new_logger(self) cell = self.cell kpts = self.kpts self.bvk_kmesh = kmesh = k2gamma.kpts_to_kmesh(cell, kpts) log.debug('kmesh for bvk-cell = %s', kmesh) self.rs_cell = rs_cell = ft_ao._RangeSeparatedCell.from_cell( cell, self.ke_cutoff, rsdf_builder.RCUT_THRESHOLD, verbose=log) rcut = rsdf_builder.estimate_ft_rcut(rs_cell, cell.precision, exclude_dd_block=False) supmol_ft = rsdf_builder._ExtendedMoleFT.from_cell(rs_cell, kmesh, rcut.max(), log) supmol_ft.exclude_dd_block = False self.supmol_ft = supmol_ft.strip_basis(rcut) log.debug('sup-mol-ft nbas = %d cGTO = %d pGTO = %d', supmol_ft.nbas, supmol_ft.nao, supmol_ft.npgto_nr()) return self def get_2c2e(self, uniq_kpts): cell = self.cell auxcell = self.auxcell # compute j2c first as it informs the integral screening in computing j3c # short-range part of j2c ~ (-kpt_ji | kpt_ji) omega_j2c = abs(self.omega_j2c) j2c = rsdf_helper.intor_j2c(auxcell, omega_j2c, kpts=uniq_kpts) # get charge of auxbasis if cell.dimension == 3: qaux = get_aux_chg(auxcell) else: qaux = np.zeros(auxcell.nao_nr()) # Add (1) short-range G=0 (i.e., charge) part and (2) long-range part qaux2 = None g0_j2c = np.pi/omega_j2c**2./cell.vol mesh_j2c = self.mesh_j2c Gv, Gvbase, kws = cell.get_Gv_weights(mesh_j2c) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([np.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] max_memory = max(2000, self.max_memory - lib.current_memory()[0]) blksize = max(2048, int(max_memory*.5e6/16/auxcell.nao_nr())) logger.debug2(self, 'max_memory %s (MB) blocksize %s', max_memory, blksize) for k, kpt in enumerate(uniq_kpts): # short-range charge part if is_zero(kpt) and cell.dimension == 3: if qaux2 is None: qaux2 = np.outer(qaux,qaux) j2c[k] -= qaux2 * g0_j2c # long-range part via aft coulG_lr = self.weighted_coulG(kpt, mesh=mesh_j2c, omega=omega_j2c) for p0, p1 in lib.prange(0, ngrids, blksize): auxG = ft_ao.ft_ao(auxcell, Gv[p0:p1], None, b, gxyz[p0:p1], Gvbase, kpt).T auxGR = np.asarray(auxG.real, order='C') auxGI = np.asarray(auxG.imag, order='C') auxG = None if is_zero(kpt): # kpti == kptj j2c[k] += lib.ddot(auxGR*coulG_lr[p0:p1], auxGR.T) j2c[k] += lib.ddot(auxGI*coulG_lr[p0:p1], auxGI.T) else: j2cR, j2cI = zdotCN(auxGR*coulG_lr[p0:p1], auxGI*coulG_lr[p0:p1], auxGR.T, auxGI.T) j2c[k] += j2cR + j2cI * 1j auxGR = auxGI = j2cR = j2cI = None return j2c def outcore_auxe2(self, cderi_file, intor='int3c2e', aosym='s2', comp=None, kptij_lst=None, j_only=False, dataname='j3c-junk', shls_slice=None): # Deadlock on NFS if you open an already-opened tmpfile in H5PY # swapfile = tempfile.NamedTemporaryFile(dir=os.path.dirname(cderi_file)) fswap = lib.H5TmpFile(dir=os.path.dirname(cderi_file), prefix='.outcore_auxe2_swap') # avoid trash files os.unlink(fswap.filename) cell = self.cell if self.use_bvk and self.kpts_band is None: bvk_kmesh = self.bvk_kmesh else: bvk_kmesh = None max_memory = max(2000, self.max_memory-lib.current_memory()[0]) rsdf_helper._aux_e2_nospltbas(cell, self.auxcell, self.omega, fswap, intor, aosym=aosym, kptij_lst=kptij_lst, dataname=dataname, max_memory=max_memory, bvk_kmesh=bvk_kmesh, precision=self.precision_R) return fswap def weighted_ft_ao(self, kpt): cell = self.cell auxcell = self.auxcell mesh = self.mesh_compact Gv, Gvbase, kws = cell.get_Gv_weights(mesh) b = cell.reciprocal_vectors() gxyz = lib.cartesian_prod([np.arange(len(x)) for x in Gvbase]) shls_slice = (0, auxcell.nbas) auxG = ft_ao.ft_ao(auxcell, Gv, shls_slice, b, gxyz, Gvbase, kpt).T wcoulG_lr = self.weighted_coulG(kpt, mesh=mesh, omega=self.omega) auxG *= wcoulG_lr Gaux = lib.transpose(auxG) GauxR = np.asarray(Gaux.real, order='C') GauxI = np.asarray(Gaux.imag, order='C') return GauxR, GauxI def gen_j3c_loader(self, h5group, kpt, kpt_ij_idx, ijlst_mapping, aosym): cell = self.cell kpts = self.kpts nkpts = len(self.kpts) vbar = None if is_zero(kpt) and cell.dimension == 3: qaux = get_aux_chg(self.auxcell) vbar = np.pi / self.omega**2 / cell.vol * qaux vbar_idx = np.where(vbar != 0)[0] ovlp = cell.pbc_intor('int1e_ovlp', hermi=1, kpts=kpts) if aosym == 's2': ovlp = [lib.pack_tril(s) for s in ovlp] else: ovlp = [s.ravel() for s in ovlp] # TODO: Store rs_density_fit cderi tensor in v1 format for the moment. # It should be changed to 'v2' format in the future. if ijlst_mapping is None: data_version = 'v2' else: data_version = 'v1' if data_version == 'v1': nsegs = len(h5group[f'j3c-junk/{ijlst_mapping[kpt_ij_idx[0]]}']) else: nsegs = len(h5group[f'j3c-junk/{kpt_ij_idx[0]}']) def load_j3c(col0, col1): j3cR = [] j3cI = [] for kk in kpt_ij_idx: if data_version == 'v1': v = np.hstack([h5group[f'j3c-junk/{ijlst_mapping[kk]}/{i}'][0,col0:col1] for i in range(nsegs)]) else: v = np.hstack([h5group[f'j3c-junk/{kk}/{i}'][0,col0:col1] for i in range(nsegs)]) vR = np.asarray(v.real, order='C') kj = kk % nkpts if is_zero(kpt) and is_zero(kpts[kj]): vI = None else: vI = np.asarray(v.imag, order='C') # vbar is the interaction between the background charge # and the auxiliary basis. 0D, 1D, 2D do not have vbar. if vbar is not None: vmod = ovlp[kj][col0:col1,None] * vbar[vbar_idx] vR[:,vbar_idx] -= vmod.real if vI is not None: vI[:,vbar_idx] -= vmod.imag j3cR.append(vR) j3cI.append(vI) return j3cR, j3cI return load_j3c def make_j3c(self, cderi_file, intor='int3c2e', aosym='s2', comp=None, j_only=False, dataname='j3c', shls_slice=None, kptij_lst=None): if self.cell.omega != 0: raise RuntimeError('RSGDF cannot be used to evaluate the long-range ' 'HF exchange in RSH functionals.') cpu1 = (logger.process_clock(), logger.perf_counter()) log = logger.Logger(self.stdout, self.verbose) if self.rs_cell is None: self.build() cell = self.cell kpts = self.kpts nkpts = len(kpts) nao = cell.nao naux = self.auxcell.nao if shls_slice is not None: raise NotImplementedError ish0, ish1 = 0, cell.nbas # ijlst_mapping maps the [nkpts x nkpts] kpts-pair to kpts-pair in # kptij_lst. Value -1 in ijlst_mapping means the kpts-pair does not # exist in kptij_lst ijlst_mapping = np.empty(nkpts * nkpts, dtype=int) ijlst_mapping[:] = -1 if kptij_lst is None: if j_only: kpti_idx = np.arange(nkpts) ijlst_mapping[kpti_idx * nkpts + kpti_idx] = kpti_idx kptij_lst = np.concatenate([kpts[:,None,:], kpts[:,None,:]], axis=1) kk_idx = kpti_idx * nkpts + kpti_idx else: kpti_idx, kptj_idx = np.tril_indices(nkpts) nkpts_pair = kpti_idx.size ijlst_mapping[kpti_idx * nkpts + kptj_idx] = np.arange(nkpts_pair) kptij_lst = np.concatenate([kpts[kpti_idx,None,:], kpts[kptj_idx,None,:]], axis=1) kk_idx = kpti_idx * nkpts + kptj_idx else: kpti_idx = members_with_wrap_around(cell, kptij_lst[:,0], kpts) kptj_idx = members_with_wrap_around(cell, kptij_lst[:,1], kpts) ijlst_mapping[kpti_idx * nkpts + kptj_idx] = np.arange(len(kptij_lst)) kk_idx = kpti_idx * nkpts + kptj_idx fswap = self.outcore_auxe2(cderi_file, intor, aosym, comp, kptij_lst, j_only, 'j3c-junk', shls_slice) cpu1 = log.timer_debug1('3c2e', *cpu1) ft_kern = self.supmol_ft.gen_ft_kernel(aosym, return_complex=False, verbose=log) # recompute g0 and Gvectors for j3c mesh = self.mesh_compact Gv, Gvbase, kws = cell.get_Gv_weights(mesh) gxyz = lib.cartesian_prod([np.arange(len(x)) for x in Gvbase]) ngrids = gxyz.shape[0] # Add (1) short-range G=0 (i.e., charge) part and (2) long-range part tspans = np.zeros((3,2)) # lr, j2c_inv, j2c_cntr tspannames = ["ftaop+pw", "j2c_inv", "j2c_cntr"] feri = lib.H5FileWrap(cderi_file, 'w') # TODO: Store rs_density_fit cderi tensor in v1 format for the moment. # It should be changed to 'v2' format in the future. data_version = 'v1' if data_version == 'v1': feri['j3c-kptij'] = kptij_lst else: feri['kpts'] = kpts ijlst_mapping = None def make_cderi(kpt, kpt_ij_idx, j2c): log.debug1('make_cderi for %s', kpt) kptjs = kpts[kpt_ij_idx % nkpts] nkptj = len(kptjs) if data_version == 'v1': input_kptij_idx = ijlst_mapping[kpt_ij_idx] # filter kpt_ij_idx, keeps only the kpts-pair in kptij_lst kpt_ij_idx = kpt_ij_idx[input_kptij_idx >= 0] # input_kptij_idx saves the indices of remaining kpts-pair in kptij_lst input_kptij_idx = input_kptij_idx[input_kptij_idx >= 0] log.debug1('kpt_ij_idx = %s', kpt_ij_idx) log.debug1('input_kptij_idx = %s', input_kptij_idx) else: input_kptij_idx = kpt_ij_idx if kpt_ij_idx.size == 0: return Gaux = self.weighted_ft_ao(kpt) if is_zero(kpt): # kpti == kptj aosym = 's2' nao_pair = nao*(nao+1)//2 else: aosym = 's1' nao_pair = nao**2 load = self.gen_j3c_loader(fswap, kpt, kpt_ij_idx, ijlst_mapping, aosym) mem_now = lib.current_memory()[0] log.debug2('memory = %s', mem_now) max_memory = max(1000, self.max_memory - mem_now) # nkptj for 3c-coulomb arrays plus 1 Lpq array buflen = min(max(int(max_memory*.3e6/16/naux/(nkptj+1)), 1), nao_pair) sh_ranges = _guess_shell_ranges(cell, buflen, aosym, start=ish0, stop=ish1) buflen = max([x[2] for x in sh_ranges]) # * 2 for the buffer used in preload max_memory -= buflen * naux * (nkptj+1) * 16e-6 * 2 # +1 for a pqkbuf Gblksize = max(16, int(max_memory*1e6/16/buflen/(nkptj+1))) Gblksize = min(Gblksize, ngrids, 200000) cols = [sh_range[2] for sh_range in sh_ranges] locs = np.append(0, np.cumsum(cols)) # buf for ft_aopair buf = np.empty(nkptj*buflen*Gblksize, dtype=np.complex128) for istep, j3c in enumerate(lib.map_with_prefetch(load, locs[:-1], locs[1:])): bstart, bend, ncol = sh_ranges[istep] log.debug1('int3c2e [%d/%d], AO [%d:%d], ncol = %d', istep+1, len(sh_ranges), bstart, bend, ncol) if aosym == 's2': shls_slice = (bstart, bend, 0, bend) else: shls_slice = (bstart, bend, 0, cell.nbas) if self.has_long_range(): tick_ = np.asarray((logger.process_clock(), logger.perf_counter())) for p0, p1 in lib.prange(0, ngrids, Gblksize): # shape of Gpq (nkpts, nGv, ni, nj) Gpq = ft_kern(Gv[p0:p1], gxyz[p0:p1], Gvbase, kpt, kptjs, shls_slice, aosym, out=buf) self.add_ft_j3c(j3c, Gpq, Gaux, p0, p1) Gpq = None tock_ = np.asarray((logger.process_clock(), logger.perf_counter())) tspans[0] += tock_ - tick_ j3cR, j3cI = j3c for k, idx in enumerate(input_kptij_idx): cderi, cderi_negative = self.solve_cderi(j2c, j3cR[k], j3cI[k]) feri[f'{dataname}/{idx}/{istep}'] = cderi if cderi_negative is not None: # for low-dimension systems feri[f'{dataname}-/{idx}/{istep}'] = cderi_negative j3cR = j3cI = j3c = cderi = None tick_ = np.asarray((logger.process_clock(), logger.perf_counter())) tspans[2] += tick_ - tock_ for kpt, kpt_ij_idx, cd_j2c \ in self.gen_uniq_kpts_groups(j_only, fswap, kk_idx=kk_idx): make_cderi(kpt, kpt_ij_idx, cd_j2c) feri.close() # report time for aft part for tspan, tspanname in zip(tspans, tspannames): log.debug1(" CPU time for %s %9.2f sec, wall time %9.2f sec", "%10s"%tspanname, *tspan) return self