# Author: Daniel S. King
'''
APC Ranked-Orbital Active Space Selection
If you find this module useful for your work, please consider citing the following:
A Ranked-Orbital Approach to Select Active Spaces for High-Throughput Multireference Computation
https://doi.org/10.1021/acs.jctc.1c00037
Large-Scale Benchmarking of Multireference Vertical-Excitation Calculations via Automated Active-Space Selection
https://doi.org/10.1021/acs.jctc.2c00630
'''
from pyscf.lib import logger
from pyscf import scf, lib
import numpy as np
[docs]
class Chooser():
"""
Chooser Class
Implements the ranked-orbital selection scheme outlined in https://doi.org/10.1021/acs.jctc.1c00037
Given a set of entropies, will select all orbitals for the active space and then drop the lowest-entropy orbitals
until the size constraint max_size is met.
Args:
orbs: 2D Numpy Array
Orbitals to choose from, spanning the entire basis (must be square matrix of coefficients)
occ: 1D Numpy Array
Orbital occupations for orbs (2,1,0); nactel will be set to the number of electrons in the selected orbitals
entropies: 1D Numpy Array
Importance measurment used to rank the orbitals
max_size: Int or Tuple
Active space size constraint.
If tuple, interpreted as (nelecas,ncas)
If int, interpreted as max # of orbitals
Returns:
active-space-size, #-active-electrons, orbital-initial-guess, chosen-active-orbital-indeces
Example:
#Randomly ranked orbitals
>>> import numpy as np
>>> from pyscf import gto, scf, mcscf
>>> from pyscf.mcscf import apc
>>> mol = gto.M(atom='H 0 0 0; H 0 0 1', basis='ccpvtz')
>>> mf = scf.RHF(mol).run()
>>> entropies = np.random.choice(np.arange(len(mf.mo_occ)),len(mf.mo_occ),replace=False)
>>> chooser = apc.Chooser(mf.mo_coeff,mf.mo_occ,entropies,max_size=(2,2))
>>> ncas, nelecas, casorbs, active_idx = chooser.kernel()
>>> mc = mcscf.CASSCF(mf, ncas, nelecas).run(casorbs)
"""
def __init__(self,orbs,occ,entropies,max_size=(8,8),verbose=4):
#Check that we have a full set of orbitals:
assert(orbs.shape[0] == orbs.shape[1])
assert(len(occ) == len(entropies))
assert(len(entropies) == orbs.shape[1])
self.log = logger.new_logger(lib.StreamObject,verbose)
self.orbs = orbs
self.occ = np.array(occ)
self.entropies = np.asarray(entropies)
self.max_size = max_size
self.verbose = verbose
def _ncsf(self,nactel,norbs):
"""
Returns number of CSFs in a (nactel,nactorbs) active space
Assumes minimum number Sz = alpha - beta (0 for even nactel, 1 for odd nactel)
"""
from scipy.special import comb
alpha = int(nactel//2 + nactel%2)
beta = int(nactel//2)
term1 = comb(norbs,alpha)*comb(norbs,beta)
term2 = comb(norbs,alpha+1)*comb(norbs,beta-1)
ncsf = term1-term2
return ncsf
def _calc_ncsf(self,active_idx):
"""
Returns the number of CSFs given the active index using the info in self.occ
Passes this info to self._ncsf to calculate the size of the active space
"""
occ = self.occ
nactel = np.sum(np.array(occ)[active_idx])
norbs = len(active_idx)
return self._ncsf(nactel,norbs)
def _as_is_reasonable(self,active_idx):
#Checks active space reasonability
occ = self.occ
nactel = np.sum(np.array(occ)[active_idx])
num_os = len(np.where(occ == 1)[0])
nactorbs = len(active_idx)
condition1 = (nactel > 0)
condition2 = (nactel < 2*len(active_idx))
condition3 = (nactorbs >= num_os)
if (condition1 and condition2 and condition3):
return True
else:
self.log.debug("Active space is not reasonable!")
self.log.debug(f"Nactel: {nactel}, Nactorbs: {nactorbs}, Num OS: {num_os}")
if not condition1:
self.log.debug("Condition 1 not met")
elif not condition2:
self.log.debug("Condition 2 not met")
elif not condition3:
self.log.debug("Condition 3 not met")
return False
[docs]
def kernel(self):
log = self.log
entropies = self.entropies.copy()
occ = self.occ.copy()
#Change singly occupied orbitals to have larger entropies so they are selected:
os_idx = np.where(occ == 1)[0]
entropies[os_idx] = np.max(entropies) + 0.01
if len(os_idx) > 0:
log.info("Singly occupied orbitals found, setting them to have entropy of max + 0.01...")
#Start with all orbitals in active space:
active_idx = list(range(len(entropies)))
inactive_idx = []
secondary_idx = []
#Size constraint:
if isinstance(self.max_size, (tuple, list, np.ndarray)):
nactel,norbs = self.max_size
max_size = self._ncsf(nactel,norbs)
as_size = self._calc_ncsf(active_idx)
else:
max_size = self.max_size
as_size = len(active_idx)
nactel = int(np.sum(np.array(occ)[active_idx]))
nactorbs = len(active_idx)
log.debug(f"Initial active space of ({nactel},{nactorbs}) has size {as_size}")
log.debug(f"Maximum active space size set to {max_size}")
#Drop orbitals until size constraint is satisfied:
while as_size > max_size:
nactel = int(np.sum(np.array(occ)[active_idx]))
nactorbs = len(active_idx)
log.debug(f"Active space of ({nactel},{nactorbs}) has size {as_size} larger than {max_size}")
log.debug("Dropping lowest entropy orbital...")
#Get active orbital entropies
active_entropies = entropies[active_idx]
ranked_active_idx = [active_idx[i] for i in np.argsort(active_entropies)]
#Drop lowest orbital in succession, checking for reasonability:
active_space_is_reasonable = False
tries = 0
while not active_space_is_reasonable:
try:
dropped_idx = ranked_active_idx[tries] #Move to next possibility
dropped_idx_entropy = np.round(entropies[dropped_idx],4)
dropped_idx_occ = int(occ[dropped_idx])
except IndexError:
log.error("Not enough orbitals to choose a reasonable active space!")
raise RuntimeError("Not enough orbitals to choose a reasonable active space!")
new_inactive_idx = inactive_idx.copy()
new_active_idx = active_idx.copy()
new_secondary_idx = secondary_idx.copy()
if dropped_idx_occ > 0:
new_active_idx.remove(dropped_idx)
new_inactive_idx += [dropped_idx]
else:
new_active_idx.remove(dropped_idx)
new_secondary_idx += [dropped_idx]
log.debug(f"Attempting to drop orbital {dropped_idx} \
(occ={dropped_idx_occ}, S={dropped_idx_entropy})...")
active_space_is_reasonable = self._as_is_reasonable(new_active_idx)
if active_space_is_reasonable:
log.debug("Orbital has been dropped")
else:
log.debug("Active space becomes unreasonable if this orbital is dropped, trying next option...")
tries += 1
inactive_idx = new_inactive_idx
active_idx = new_active_idx
secondary_idx = new_secondary_idx
#Calculate new NCSFs:
if isinstance(self.max_size,tuple):
nactel,norbs = self.max_size
as_size = self._calc_ncsf(active_idx)
else:
as_size = len(active_idx)
#Final checks:
assert(len(active_idx) <= len(entropies))
assert(as_size <= max_size)
orbs = self.orbs.copy()
inactive_orbs = orbs[:,inactive_idx]
active_orbs = orbs[:,active_idx]
secondary_orbs = orbs[:,secondary_idx]
casorbs = np.hstack([inactive_orbs,active_orbs,secondary_orbs])
nactorbs = active_orbs.shape[1]
active_occ = np.array(occ)[active_idx]
nboth = int(np.sum(active_occ[np.where(active_occ == 2)])/2)
nalpha = int(np.sum(active_occ[np.where(active_occ == 1)]))
alpha = nboth + nalpha
beta = nboth
nactel = (alpha,beta)
log.info(f"Final selected active space: ({nactel},{nactorbs})")
return nactorbs, nactel, casorbs, active_idx
[docs]
class APC():
"""
APC Class
Implements APC orbital entropy estimation from https://doi.org/10.1021/acs.jctc.1c00037
APC-N implemented from https://doi.org/10.1021/acs.jctc.2c00630
.kernel() combines this with the ranked-orbital scheme implemented in Chooser() to select
an active space of size max_size from the orbitals in mf.mo_coeff with occupancy mf.mo_occ
Args:
mf: an :class:`SCF` object
Must expose mf.mo_coeff, mf.mo_occ, mf.get_fock(), and mf.get_k()
max_size: Int or Tuple
Active space size constraint.
If tuple, interpreted as (nelecas,ncas)
If int interpreted as max # of orbitals
n: Int
Number of times to remove highest-entropy virtual orbitals in entropy calculation.
A higher value will tend to select active spaces with less doubly occupied orbitals.
Kwargs:
eps: Float
Small offset added to singly occupied and removed virtual orbital entropies (can generally be ignored)
Returns:
active-space-size, #-active-electrons, orbital-initial-guess (following AVAS convention)
Example:
>>> import numpy as np
>>> from pyscf import gto, scf, mcscf
>>> from pyscf.mcscf import apc
>>> mol = gto.M(atom='H 0 0 0; H 0 0 1', basis='ccpvtz')
>>> mf = scf.RHF(mol).run()
>>> myapc = apc.APC(mf,max_size=2)
>>> ncas,nelecas,casorbs = myapc.kernel()
>>> mc = mcscf.CASSCF(mf, ncas, nelecas).run(casorbs)
"""
def __init__(self,mf,max_size=(8,8),n=2,eps=1e-3,verbose=4):
self.log = logger.new_logger(lib.StreamObject,verbose)
self.mf = mf
self.n = n
self.eps = eps
assert(eps > 0) #Check that eps > 0
self.max_size = max_size
self.verbose = verbose
def _apc(self,orbs,occ,f_mo,k_mo):
"""
Calculates APC entropies for given orbitals, occupations, and F and K matrix elements
Singly occupied orbitals are set to max value of other orbitals + self.eps
Args:
orbs: 2D Numpy Array
A nbasis x nmo array of candidate AS orbitals
occ: 1D Numpy Array
Orbital occupations for orbs (2,1,0)
f_mo: 2D Numpy Array
Fock operator in the basis of the orbs (nmo x nmo)
k_mo: 2D Numpy Array
Exchange operator in the basis of the orbs (nmo x nmo)
"""
eps = self.eps
docc_idx = np.where(occ == 2)[0]
os_idx = np.where(occ == 1)[0]
virt_idx = np.where(occ == 0)[0]
#Calculate APCs
apcs = np.zeros([len(docc_idx),len(virt_idx)])
for i,d in enumerate(docc_idx):
for j,v in enumerate(virt_idx):
k12 = 0.5*k_mo[v,v]
delta = f_mo[v,v] - f_mo[d,d]
c = -k12/(delta + np.sqrt(k12**2 + delta**2))
apcs[i,j] = c
#Calculate entropies
apc_entropies = np.zeros(orbs.shape[1])
for o in range(orbs.shape[1]):
#Collect APCs for this orbital:
if o in os_idx:
continue #Fill in later with max value
elif o in docc_idx:
idx = np.where(docc_idx == o)[0][0]
apcs_o = apcs[idx,:]
elif o in virt_idx:
idx = np.where(virt_idx == o)[0][0]
apcs_o = apcs[:,idx]
#Normalize APCs:
cis = apcs_o
cis2 = cis**2
sumci2 = np.sum(cis2)
norm = np.sqrt((sumci2 + 1))
cisnorm = cis/norm
#Square Normalized APCs to calculate entropies:
cisnorm2 = cisnorm**2
assert((cisnorm2 < 1).any().all())
sumcisnorm2 = np.sum(cisnorm2)
assert(np.allclose((sumcisnorm2 + (1/norm)**2),1,atol=1e-6))
exent = -sumcisnorm2 * np.log(sumcisnorm2)
gsent = -(1/norm)**2 * np.log((1/norm)**2)
ent = exent + gsent
apc_entropies[o] = ent
#Assign max value to singly occupied orbitals plus some small value:
apc_entropies[os_idx] = np.max(apc_entropies) + eps
return apc_entropies
def _calc_apc_entropies(self,mf):
"""
Implements the "APC-N" approach in which high-entropy virtual orbitals are repeatedly set to singly occupied
Then sets the singly occupied orbitals and previously removed orbitals to high values
Reads the value of n from self.n
Args:
mf: an :class:`SCF` object
Must expose mf.mo_coeff, mf.mo_occ, mf.get_fock(), and mf.get_k()
"""
log = self.log
n = self.n
eps = self.eps
log.info(f"Calculating APC entropies (N={n})...")
f_ao = mf.get_fock()
k_ao = mf.get_k()
if isinstance(mf, scf.uhf.UHF):
log.note('UHF object found. APC uses averaged F, summed K, summed occupation, and alpha orbitals.')
orbs = mf.mo_coeff[0]
occ = mf.mo_occ.sum(axis=0) #summed occupation
f_ao = np.sum(f_ao,axis=0)/2 #averaged fock
k_ao = np.sum(k_ao,axis=0) #summed exchange
elif isinstance(mf, scf.rohf.ROHF):
log.note('ROHF object found. APC uses summed K')
orbs = mf.mo_coeff
occ = mf.mo_occ.copy()
k_ao = np.sum(k_ao,axis=0) #summed exchange
else:
orbs = mf.mo_coeff
occ = mf.mo_occ.copy()
#Calculate f and k in mo basis
log.info("Transforming F and K to MO basis...")
f_mo = np.linalg.multi_dot([orbs.T,f_ao,orbs])
k_mo = np.linalg.multi_dot([orbs.T,k_ao,orbs])
#Calculate entropies
removed_idx = []
original_os = np.where(occ == 1)[0]
log.info("Calculating initial APC entropies...")
apc_entropies = self._apc(orbs,occ,f_mo,k_mo)
for loop_n in range(n):
if loop_n > 0:
log.info(f"Calculating APC entropies (Round {loop_n})...")
apc_entropies = self._apc(orbs,occ,f_mo,k_mo)
maxS = np.round(np.max(apc_entropies),5)
log.info(f"Maximum entropy: {maxS}")
#Remove highest virtual and set occ to 1
virt_idx = np.where(occ == 0)[0]
to_remove = virt_idx[np.argmax(apc_entropies[virt_idx])]
removed_idx += [to_remove]
log.info(f"Setting maximum virtual orbitals {removed_idx} to occupation 1...")
occ[removed_idx] = 1
log.info("Calculating final APC entropies...")
apc_entropies = self._apc(orbs,occ,f_mo,k_mo)
maxS = np.round(np.max(apc_entropies),5)
log.info(f"Maximum entropy: {maxS}")
#Iterate over os and removed virtuals and set to max in order:
maxs = np.max(apc_entropies)
for i,o in enumerate(original_os):
apc_entropies[o] = maxs + 2*eps - i*eps*1e-2
for i,o in enumerate(removed_idx):
apc_entropies[o] = maxs + eps - i*eps*1e-2
return apc_entropies
[docs]
def kernel(self):
log = self.log
log.info('\n** APC Active Space Selection **')
entropies = self._calc_apc_entropies(self.mf)
self.entropies = entropies
if isinstance(self.mf, scf.uhf.UHF):
orbs = self.mf.mo_coeff[0] #alpha orbitals
occ = self.mf.mo_occ.sum(axis=0) #summed occupation
else:
orbs = self.mf.mo_coeff
occ = self.mf.mo_occ
max_size = self.max_size
log.info(f"Choosing active space with ranked orbital approach (max_size = {max_size})...")
chooser = Chooser(orbs,occ,entropies,max_size,verbose=self.verbose)
nactorbs, nactel, casorbs, active_idx = chooser.kernel()
self.active_idx = active_idx
return nactorbs, nactel, casorbs