# Source code for gpaw.fftw

```"""
Python wrapper for FFTW3 library
================================

.. autoclass:: FFTPlans

"""
from __future__ import annotations

import weakref
from types import ModuleType

import numpy as np
from scipy.fft import fftn, ifftn, irfftn, rfftn
import warnings

import gpaw.cgpaw as cgpaw
from gpaw.typing import Array1D, Array3D, DTypeLike, IntVector

ESTIMATE = 64
MEASURE = 0
PATIENT = 32
EXHAUSTIVE = 8

_plan_cache: dict[tuple, weakref.ReferenceType] = {}

[docs]def have_fftw() -> bool:
"""Did we compile with FFTW?"""
return hasattr(cgpaw, 'FFTWPlan')

[docs]def check_fft_size(n: int, factors=[2, 3, 5, 7]) -> bool:
"""Check if n is an efficient fft size.

Efficient means that n can be factored into small primes (2, 3, 5, 7).

>>> check_fft_size(17)
False
>>> check_fft_size(18)
True
"""

if n == 1:
return True
for x in factors:
if n % x == 0:
return check_fft_size(n // x, factors)
return False

[docs]def get_efficient_fft_size(N: int, n=1, factors=[2, 3, 5, 7]) -> int:
"""Return smallest efficient fft size.

Must be greater than or equal to N and divisible by n.

>>> get_efficient_fft_size(17)
18
"""
N = -(-N // n) * n
while not check_fft_size(N, factors):
N += n
return N

[docs]def empty(shape, dtype=float):
"""numpy.empty() equivalent with 16 byte alignment."""
assert dtype == complex
N = np.prod(shape)
a = np.empty(2 * N + 1)
offset = (a.ctypes.data % 16) // 8
a = a[offset:2 * N + offset].view(complex)
a.shape = shape
return a

[docs]def create_plans(size_c: IntVector,
dtype: DTypeLike,
flags: int = MEASURE,
xp: ModuleType = np) -> FFTPlans:
"""Create plan-objects for FFT and inverse FFT."""
key = (tuple(size_c), dtype, flags, xp)
# Look up weakref to plan:
if key in _plan_cache:
plan = _plan_cache[key]()
# Check if plan is still "alive":
if plan is not None:
return plan
# Create new plan:
if xp is not np:
plan = CuPyFFTPlans(size_c, dtype)
elif have_fftw():
plan = FFTWPlans(size_c, dtype, flags)
else:
plan = NumpyFFTPlans(size_c, dtype)
_plan_cache[key] = weakref.ref(plan)
return plan

[docs]class FFTPlans:
def __init__(self,
size_c: IntVector,
dtype: DTypeLike,
empty=empty):
self.shape: tuple[int, ...]
if dtype == float:
self.shape = (size_c[0], size_c[1], size_c[2] // 2 + 1)
self.tmp_Q = empty(self.shape, complex)
self.tmp_R = self.tmp_Q.view(float)[:, :, :size_c[2]]
else:
self.shape = tuple(size_c)
self.tmp_Q = empty(size_c, complex)
self.tmp_R = self.tmp_Q

def fft(self) -> None:
"""Do FFT from ``tmp_R`` to ``tmp_Q``.

>>> plans = create_plans([4, 1, 1], float)
>>> plans.tmp_R[:, 0, 0] = [1, 0, 1, 0]
>>> plans.fft()
>>> plans.tmp_Q[:, 0, 0]
array([2.+0.j, 0.+0.j, 2.+0.j, 0.+0.j])
"""
raise NotImplementedError

def ifft(self) -> None:
"""Do inverse FFT from ``tmp_Q`` to ``tmp_R``.

>>> plans = create_plans([4, 1, 1], complex)
>>> plans.tmp_Q[:, 0, 0] = [0, 1j, 0, 0]
>>> plans.ifft()
>>> plans.tmp_R[:, 0, 0]
array([ 0.+1.j, -1.+0.j,  0.-1.j,  1.+0.j])
"""
raise NotImplementedError

def ifft_sphere(self, coef_G, pw, out_R):
if coef_G is None:
out_R.scatter_from(None)
return
pw.paste(coef_G, self.tmp_Q)
if pw.dtype == float:
t = self.tmp_Q[:, :, 0]
n, m = (s // 2 - 1 for s in out_R.desc.size_c[:2])
t[0, -m:] = t[0, m:0:-1].conj()
t[n:0:-1, -m:] = t[-n:, m:0:-1].conj()
t[-n:, -m:] = t[n:0:-1, m:0:-1].conj()
t[-n:, 0] = t[n:0:-1, 0].conj()
self.ifft()
out_R.scatter_from(self.tmp_R)

def fft_sphere(self, in_R, pw):
self.tmp_R[:] = in_R.data
self.fft()
coefs = pw.cut(self.tmp_Q) * (1 / self.tmp_R.size)
return coefs

[docs]class FFTWPlans(FFTPlans):
"""FFTW3 3d transforms."""
def __init__(self, size_c, dtype, flags=MEASURE):
if not have_fftw():
raise ImportError('Not compiled with FFTW.')
super().__init__(size_c, dtype)
self._fftplan = cgpaw.FFTWPlan(self.tmp_R, self.tmp_Q, -1, flags)
self._ifftplan = cgpaw.FFTWPlan(self.tmp_Q, self.tmp_R, 1, flags)

[docs]    def fft(self):
cgpaw.FFTWExecute(self._fftplan)

[docs]    def ifft(self):
cgpaw.FFTWExecute(self._ifftplan)

def __del__(self):
# Attributes will not exist if execution stops during FFTW planning
if hasattr(self, '_fftplan'):
cgpaw.FFTWDestroy(self._fftplan)
if hasattr(self, '_ifftplan'):
cgpaw.FFTWDestroy(self._ifftplan)

[docs]class NumpyFFTPlans(FFTPlans):
"""Numpy fallback."""
[docs]    def fft(self):
if self.tmp_R.dtype == float:
self.tmp_Q[:] = rfftn(self.tmp_R, overwrite_x=True)
else:
self.tmp_Q[:] = fftn(self.tmp_R, overwrite_x=True)

[docs]    def ifft(self):
if self.tmp_R.dtype == float:
self.tmp_R[:] = irfftn(self.tmp_Q, self.tmp_R.shape,
norm='forward', overwrite_x=True)
else:
self.tmp_R[:] = ifftn(self.tmp_Q, self.tmp_R.shape,
norm='forward', overwrite_x=True)

def rfftn_patch(tmp_R):
from gpaw.gpu import cupyx
warnings.warn(f'CuFFTError for cupyx.scipy.fft.rfftn {tmp_R.shape}.'
f'reverting to using just fftn. This is a bug in ROCM cupy.')
return cupyx.scipy.fft.fftn(tmp_R)[:, :, :tmp_R.shape[-1] // 2 + 1]

class CuPyFFTPlans(FFTPlans):
def __init__(self,
size_c: IntVector,
dtype: DTypeLike):
from gpaw.core import PWDesc
from gpaw.gpu import cupy as cp
self.dtype = dtype
super().__init__(size_c, dtype, empty=cp.empty)
self.Q_G_cache: dict[PWDesc, Array1D] = {}

def fft(self):
from gpaw.gpu import cupyx
from gpaw.gpu import cupy as cp
if self.tmp_R.dtype == float:
try:
self.tmp_Q[:] = cupyx.scipy.fft.rfftn(self.tmp_R)
except cp.cuda.cufft.CuFFTError:
self.tmp_Q[:] = rfftn_patch(self.tmp_R)
else:
self.tmp_Q[:] = cupyx.scipy.fft.fftn(self.tmp_R)

def ifft(self):
from gpaw.gpu import cupyx
if self.tmp_R.dtype == float:
self.tmp_R[:] = cupyx.scipy.fft.irfftn(
self.tmp_Q, self.tmp_R.shape,
norm='forward',
overwrite_x=True)
else:
self.tmp_R[:] = cupyx.scipy.fft.ifftn(
self.tmp_Q, self.tmp_R.shape,
norm='forward',
overwrite_x=True)

def indices(self, pw):
from gpaw.gpu import cupy as cp
Q_G = self.Q_G_cache.get(pw)
if Q_G is None:
Q_G = cp.asarray(pw.indices(self.shape))
self.Q_G_cache[pw] = Q_G
return Q_G

def ifft_sphere(self, coef_G, pw, out_R):
from gpaw.gpu import cupyx

if coef_G is None:
out_R.scatter_from(None)
return

if out_R.desc.comm.size == 1:
array_R = out_R.data
else:
array_R = self.tmp_R
array_Q = self.tmp_Q

array_Q[:] = 0.0
Q_G = self.indices(pw)
array_Q.ravel()[Q_G] = coef_G

if self.dtype == complex:
array_R[:] = cupyx.scipy.fft.ifftn(
array_Q, array_Q.shape,
norm='forward', overwrite_x=True)
else:
# We need a GPU kernel for this stuff:
t = array_Q[:, :, 0]
n, m = (s // 2 - 1 for s in out_R.desc.size_c[:2])
t[0, -m:] = t[0, m:0:-1].conj()
t[n:0:-1, -m:] = t[-n:, m:0:-1].conj()
t[-n:, -m:] = t[n:0:-1, m:0:-1].conj()
t[-n:, 0] = t[n:0:-1, 0].conj()
array_R[:] = cupyx.scipy.fft.irfftn(
array_Q, out_R.desc.global_shape(),
norm='forward', overwrite_x=True)

if out_R.desc.comm.size > 1:
out_R.scatter_from(array_R)

def fft_sphere(self, in_R, pw):
from gpaw.gpu import cupyx
from gpaw.gpu import cupy as cp
if self.dtype == complex:
out_Q = cupyx.scipy.fft.fftn(in_R)
else:
try:
out_Q = cupyx.scipy.fft.rfftn(in_R)
except cp.cuda.cufft.CuFFTError:
out_Q = rfftn_patch(in_R)
Q_G = self.indices(pw)
coef_G = out_Q.ravel()[Q_G] * (1 / in_R.size)
return coef_G

# The rest of this file will be removed in the future ...

def check_fftw_inputs(in_R, out_R):
for arr in in_R, out_R:
# Note: Arrays not necessarily contiguous due to 16-byte alignment
assert arr.ndim == 3  # We can perhaps relax this requirement
assert arr.dtype == float or arr.dtype == complex

if in_R.dtype == out_R.dtype == complex:
assert in_R.shape == out_R.shape
else:
# One real and one complex:
R, C = (in_R, out_R) if in_R.dtype == float else (out_R, in_R)
assert C.dtype == complex
assert R.shape[:2] == C.shape[:2]
assert C.shape[2] == 1 + R.shape[2] // 2

[docs]class FFTPlan:
"""FFT 3d transform."""
def __init__(self,
in_R: Array3D,
out_R: Array3D,
sign: int,
flags: int = MEASURE):
check_fftw_inputs(in_R, out_R)
self.in_R = in_R
self.out_R = out_R
self.sign = sign
self.flags = flags

def execute(self) -> None:
raise NotImplementedError

[docs]class FFTWPlan(FFTPlan):
"""FFTW3 3d transform."""
def __init__(self, in_R, out_R, sign, flags=MEASURE):
if not have_fftw():
raise ImportError('Not compiled with FFTW.')
self._ptr = cgpaw.FFTWPlan(in_R, out_R, sign, flags)
FFTPlan.__init__(self, in_R, out_R, sign, flags)

def execute(self):
cgpaw.FFTWExecute(self._ptr)

def __del__(self):
if getattr(self, '_ptr', None):
cgpaw.FFTWDestroy(self._ptr)
self._ptr = None

[docs]class NumpyFFTPlan(FFTPlan):
"""Numpy fallback."""
def execute(self):
if self.in_R.dtype == float:
self.out_R[:] = np.fft.rfftn(self.in_R)
elif self.out_R.dtype == float:
self.out_R[:] = np.fft.irfftn(self.in_R, self.out_R.shape)
self.out_R *= self.out_R.size
elif self.sign == 1:
self.out_R[:] = np.fft.ifftn(self.in_R, self.out_R.shape)
self.out_R *= self.out_R.size
else:
self.out_R[:] = np.fft.fftn(self.in_R)

def create_plan(in_R: Array3D,
out_R: Array3D,
sign: int,
flags: int = MEASURE) -> FFTPlan:
if have_fftw():
return FFTWPlan(in_R, out_R, sign, flags)
return NumpyFFTPlan(in_R, out_R, sign, flags)
```