from __future__ import division
import numpy as np
from timeit import default_timer as timer
from scipy.sparse import csr_matrix, coo_matrix
from scipy.linalg import blas
from pynao.m_sparsetools import csr_matvec, csc_matvec, csc_matvecs
import math
[docs]def calc_sab(mat1, mat2, vec, GPU=False):
"""
Perform two matrix-vector multiplication in an row, i.e
out = M2.dot(M1.dot(in))
"""
if GPU:
vdp = mat1.dot(vec)
return mat2.dot(vdp)
else:
vdp = csr_matvec(mat1, vec)
return csr_matvec(mat2, vdp)
[docs]def calc_nm2v(mat1, mat2, matin):
"""
calculate nm2v via two matrix-matrix multiplication in an row
"""
nb2v = mat1.dot(matin)
return nb2v.dot(mat2)
[docs]def calc_ab2v(mat1, mat2, matin):
"""
calculate ab2v via two matrix-matrix multiplication in an row
"""
nb2v = matin.dot(mat1)
return mat2.dot(nb2v)
[docs]def div_eigenenergy(ksn2e, ksn2f, vstart, nfermi, comega, spin, use_numba,
nm2v_re, nm2v_im, div_numba=None, GPU=False,
blockspergrid=None, threadsperblock=None):
"""
Divide by the energy terms
omega - (E_m - E_n) + i*varepsilon
See equation 2.81 in Ref 1.
Reference:
[1]: Plasmon in Nanoparticles: Atomistic Ab initio theory for large
Systems, M. Barbry, PhD thesis, 2018
"""
if use_numba and div_numba is not None:
if GPU:
div_numba[blockspergrid, threadsperblock](ksn2e[0, spin], ksn2f[0, spin],
nfermi[spin], vstart[spin],
comega.real, comega.imag,
nm2v_re, nm2v_im)
else:
div_numba(ksn2e[0, spin], ksn2f[0, spin], nfermi[spin], vstart[spin],
comega, nm2v_re, nm2v_im)
else:
for n, (en, fn) in enumerate(zip(ksn2e[0, spin, :nfermi[spin]],
ksn2f[0, spin, :nfermi[spin]])):
for m, (em, fm) in enumerate(zip(ksn2e[0, spin, vstart[spin]:],
ksn2f[0, spin, vstart[spin]:])):
nm2v = nm2v_re[n, m] + 1.0j*nm2v_im[n, m]
nm2v = nm2v * (fn - fm) * \
( 1.0 / (comega - (em - en)) - 1.0 / (comega + (em - en)) )
nm2v_re[n, m] = nm2v.real
nm2v_im[n, m] = nm2v.imag
# padding m<n i.e. negative occupations' difference
for n in range(vstart[spin] + 1, nf):
for m in range(n - vstart[spin]):
nm2v_re[n, m] = 0.0
nm2v_im[n, m] = 0.0
[docs]def chi0_mv(self, dvin, comega, dnout=None):
"""
Apply the non-interacting response function Chi_0 to the induced effetive
potential delta V_eff. See equation 2.81 and 2.83in Ref 1.
Input Parameters:
self : tddft_iter or tddft_tem class
sp2v : vector describing the effective perturbation [spin*product] --> value
comega: complex frequency
Reference:
[1]: Plasmon in Nanoparticles: Atomistic Ab initio theory for large
Systems, M. Barbry, PhD thesis, 2018
"""
if dnout is None:
dnout = np.zeros_like(dvin, dtype=self.dtypeComplex)
sp2v = dvin.reshape((self.nspin,self.nprod))
sp2dn = dnout.reshape((self.nspin,self.nprod))
for spin in range(self.nspin):
# real part
t1 = timer()
temp = calc_sab(self.cc_da, self.v_dab_trans, sp2v[spin].real)
t2 = timer()
self.chi0_timing[0] += t2-t1
t1 = timer()
nm2v_re = calc_nm2v(self.xocc[spin], self.xvrt[spin].T,
temp.reshape(self.norbs, self.norbs))
t2 = timer()
self.chi0_timing[1] += t2-t1
# imag
t1 = timer()
temp = calc_sab(self.cc_da, self.v_dab_trans, sp2v[spin].imag)
t2 = timer()
self.chi0_timing[2] += t2-t1
t1 = timer()
nm2v_im = calc_nm2v(self.xocc[spin], self.xvrt[spin].T,
temp.reshape(self.norbs, self.norbs))
t2 = timer()
self.chi0_timing[3] += t2-t1
t1 = timer()
div_eigenenergy(self.ksn2e, self.ksn2f, self.vstart, self.nfermi, comega,
spin, self.use_numba, nm2v_re, nm2v_im,
div_numba=self.div_numba)
t2 = timer()
self.chi0_timing[4] += t2-t1
# real part
t1 = timer()
temp = calc_ab2v(self.xvrt[spin], self.xocc[spin].T, nm2v_re)
t2 = timer()
self.chi0_timing[5] += t2-t1
t1 = timer()
sp2dn[spin] = calc_sab(self.v_dab, self.cc_da_trans,
temp.reshape(self.norbs*self.norbs))
t2 = timer()
self.chi0_timing[6] += t2-t1
# imag part
t1 = timer()
temp = calc_ab2v(self.xvrt[spin], self.xocc[spin].T, nm2v_im)
t2 = timer()
self.chi0_timing[7] += t2-t1
t1 = timer()
sp2dn[spin] += 1.0j*calc_sab(self.v_dab, self.cc_da_trans,
temp.reshape(self.norbs*self.norbs))
t2 = timer()
self.chi0_timing[8] += t2-t1
return dnout
[docs]def chi0_mv_gpu(self, dvin, comega, dnout=None):
"""
Apply the non-interacting response function Chi_0 to the induced effetive
potential delta V_eff. See equation 2.81 and 2.83in Ref 1.
Performs the matrix-matrix operation on GPU
Input Parameters:
self : tddft_iter or tddft_tem class
sp2v : vector describing the effective perturbation [spin*product] --> value
comega: complex frequency
Reference:
[1]: Plasmon in Nanoparticles: Atomistic Ab initio theory for large
Systems, M. Barbry, PhD thesis, 2018
"""
import cupy as cp
if dnout is None:
dnout = np.zeros_like(dvin, dtype=self.dtypeComplex)
sp2v = dvin.reshape((self.nspin,self.nprod))
sp2dn = dnout.reshape((self.nspin,self.nprod))
for spin in range(self.nspin):
# real part
t1 = timer()
temp = calc_sab(self.cc_da, self.v_dab_trans, sp2v[spin].real)
t2 = timer()
self.chi0_timing[0] += t2-t1
temp_gpu = cp.asarray(temp)
t1 = timer()
nm2v_re = calc_nm2v(self.xocc_gpu[spin], self.xvrt_gpu[spin].T,
temp_gpu.reshape(self.norbs, self.norbs))
t2 = timer()
self.chi0_timing[1] += t2-t1
# imag
t1 = timer()
temp = calc_sab(self.cc_da, self.v_dab_trans, sp2v[spin].imag)
t2 = timer()
self.chi0_timing[2] += t2-t1
temp_gpu = cp.asarray(temp)
t1 = timer()
nm2v_im = calc_nm2v(self.xocc_gpu[spin], self.xvrt_gpu[spin].T,
temp_gpu.reshape(self.norbs, self.norbs))
t2 = timer()
self.chi0_timing[3] += t2-t1
t1 = timer()
div_eigenenergy(self.ksn2e, self.ksn2f, self.vstart, self.nfermi, comega,
spin, self.use_numba, nm2v_re, nm2v_im,
div_numba=self.div_numba, GPU=True,
blockspergrid=self.block_size[spin],
threadsperblock=self.grid_size[spin])
t2 = timer()
self.chi0_timing[4] += t2-t1
# real part
t1 = timer()
temp_gpu = calc_ab2v(self.xvrt_gpu[spin], self.xocc_gpu[spin].T, nm2v_re)
t2 = timer()
self.chi0_timing[5] += t2-t1
temp = cp.asnumpy(temp_gpu)
t1 = timer()
sp2dn[spin] = calc_sab(self.v_dab, self.cc_da_trans,
temp.reshape(self.norbs*self.norbs))
t2 = timer()
self.chi0_timing[6] += t2-t1
# imag part
t1 = timer()
temp_gpu = calc_ab2v(self.xvrt_gpu[spin], self.xocc_gpu[spin].T, nm2v_im)
t2 = timer()
self.chi0_timing[7] += t2-t1
temp = cp.asnumpy(temp_gpu)
t1 = timer()
sp2dn[spin] += 1.0j*calc_sab(self.v_dab, self.cc_da_trans,
temp.reshape(self.norbs*self.norbs))
t2 = timer()
self.chi0_timing[8] += t2-t1
return dnout