from __future__ import division
import sys
from timeit import default_timer as timer
import numpy as np
from pynao.m_chi0_noxv import calc_sab, div_eigenenergy, calc_ab2v, calc_nm2v
[docs]def gw_chi0_mv(self, dvin, comega):
# real part
t1 = timer()
temp_re = calc_sab(self.cc_da, self.v_dab_trans, dvin.real)
t2 = timer()
self.chi0_timing[0] += t2-t1
# imaginary
t1 = timer()
temp_im = calc_sab(self.cc_da, self.v_dab_trans, dvin.imag)
t2 = timer()
self.chi0_timing[1] += t2-t1
ab2v_re = np.zeros((self.norbs, self.norbs), dtype=self.dtype)
ab2v_im = np.zeros((self.norbs, self.norbs), dtype=self.dtype)
for spin in range(self.nspin):
t1 = timer()
nm2v_re = calc_nm2v(self.xocc[spin], self.xvrt[spin].T,
temp_re.reshape(self.norbs, self.norbs))
t2 = timer()
self.chi0_timing[2] += t2-t1
t1 = timer()
nm2v_im = calc_nm2v(self.xocc[spin], self.xvrt[spin].T,
temp_im.reshape(self.norbs, self.norbs))
t2 = timer()
self.chi0_timing[3] += t2-t1
t1 = timer()
div_eigenenergy(self.ksn2e, self.ksn2f, self.vstart, self.nfermi, comega,
spin, self.use_numba, nm2v_re, nm2v_im,
div_numba=self.div_numba)
t2 = timer()
self.chi0_timing[4] += t2-t1
t1 = timer()
ab2v_re += calc_ab2v(self.xvrt[spin], self.xocc[spin].T, nm2v_re)
t2 = timer()
self.chi0_timing[5] += t2-t1
t1 = timer()
ab2v_im += calc_ab2v(self.xvrt[spin], self.xocc[spin].T, nm2v_im)
t2 = timer()
self.chi0_timing[6] += t2-t1
# real part
t1 = timer()
chi0_re = calc_sab(self.v_dab, self.cc_da_trans,
ab2v_re.reshape(self.norbs*self.norbs))
t2 = timer()
self.chi0_timing[7] += t2-t1
# imag part
t1 = timer()
chi0_im = calc_sab(self.v_dab, self.cc_da_trans,
ab2v_im.reshape(self.norbs*self.norbs))
t2 = timer()
self.chi0_timing[8] += t2-t1
return chi0_re + 1.0j*chi0_im
[docs]def gw_chi0_mv_gpu(self, dvin, comega):
import cupy as cp
# real part
t1 = timer()
temp_re = calc_sab(self.cc_da, self.v_dab_trans, dvin.real)
t2 = timer()
self.chi0_timing[0] += t2-t1
temp_re_gpu = cp.asarray(temp_re)
# imaginary
t1 = timer()
temp_im = calc_sab(self.cc_da, self.v_dab_trans, dvin.imag)
t2 = timer()
self.chi0_timing[1] += t2-t1
temp_im_gpu = cp.asarray(temp_im)
ab2v_re_gpu = cp.zeros((self.norbs, self.norbs), dtype=self.dtype)
ab2v_im_gpu = cp.zeros((self.norbs, self.norbs), dtype=self.dtype)
for spin in range(self.nspin):
t1 = timer()
nm2v_re = calc_nm2v(self.xocc_gpu[spin], self.xvrt_gpu[spin].T,
temp_re_gpu.reshape(self.norbs, self.norbs))
t2 = timer()
self.chi0_timing[2] += t2-t1
t1 = timer()
nm2v_im = calc_nm2v(self.xocc_gpu[spin], self.xvrt_gpu[spin].T,
temp_im_gpu.reshape(self.norbs, self.norbs))
t2 = timer()
self.chi0_timing[3] += t2-t1
t1 = timer()
div_eigenenergy(self.ksn2e, self.ksn2f, self.vstart, self.nfermi, comega,
spin, self.use_numba, nm2v_re, nm2v_im,
div_numba=self.div_numba, GPU=True,
blockspergrid=self.block_size[spin],
threadsperblock=self.grid_size[spin])
t2 = timer()
self.chi0_timing[4] += t2-t1
t1 = timer()
ab2v_re_gpu += calc_ab2v(self.xvrt_gpu[spin], self.xocc_gpu[spin].T,
nm2v_re)
t2 = timer()
self.chi0_timing[5] += t2-t1
t1 = timer()
ab2v_im_gpu += calc_ab2v(self.xvrt_gpu[spin], self.xocc_gpu[spin].T,
nm2v_im)
t2 = timer()
self.chi0_timing[6] += t2-t1
ab2v = cp.asnumpy(ab2v_re_gpu)
# real part
t1 = timer()
chi0_re = calc_sab(self.v_dab, self.cc_da_trans,
ab2v.reshape(self.norbs*self.norbs))
t2 = timer()
self.chi0_timing[7] += t2-t1
ab2v = cp.asnumpy(ab2v_im_gpu)
# imag part
t1 = timer()
chi0_im = calc_sab(self.v_dab, self.cc_da_trans,
ab2v.reshape(self.norbs*self.norbs))
t2 = timer()
self.chi0_timing[8] += t2-t1
return chi0_re + 1.0j*chi0_im