Source code for maicos.lib.math

#!/usr/bin/env python -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8
# -*-
#
# Copyright (c) 2023 Authors and contributors (see the AUTHORS.rst file for the full
# list of names)
#
# Released under the GNU Public Licence, v3 or any higher version
# SPDX-License-Identifier: GPL-3.0-or-later
"""Helper functions for mathematical and physical operations."""
from typing import Optional, Tuple, Union

import MDAnalysis as mda
import numpy as np
from scipy.fftpack import dst

from . import tables
from ._cmath import compute_structure_factor  # noqa: F401


# Max variation from the mean dt or dk that is allowed (~1e-10 suggested)
dt_dk_tolerance = 1e-8



[docs]
def FT(
    t: np.ndarray, x: np.ndarray, indvar: bool = True
) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
    """
    Discrete Fourier transformation using fast Fourier transformation (FFT).

    Parameters
    ----------
    t : numpy.ndarray
        Time values of the time series.
    x : numpy.ndarray
        Function values corresponding to the time series.
    indvar : bool
        If :obj:`True`, returns the FFT and frequency values. If :obj:`False`, returns
        only the FFT.

    Returns
    -------
    tuple(numpy.ndarray, numpy.ndarray) or numpy.ndarray
        If ``indvar`` is :obj:`True`, returns a tuple ``(k, xf2)`` where:
            - ``k`` (numpy.ndarray): Frequency values corresponding to the FFT.
            - ``xf2`` (numpy.ndarray): FFT of the input function, scaled by the time
              range and phase shifted.

        If indvar is :obj:`False`, returns the FFT (``xf2``) directly as a
        :class:`numpy.ndarray`.

    Raises
    ------
    RuntimeError
        If the time series is not equally spaced.

    Example
    -------
    >>> t = np.linspace(0, np.pi, 4)
    >>> x = np.sin(t)
    >>> k, xf2 = FT(t, x)
    >>> k
    array([-3. , -1.5,  0. ,  1.5])
    >>> np.round(xf2, 2)
    array([ 0.  +0.j  , -0.68+0.68j,  1.36+0.j  , -0.68-0.68j])

    See Also
    --------
    :func:`iFT` : For the inverse fourier transform.
    """
    a, b = np.min(t), np.max(t)
    dt = (t[-1] - t[0]) / float(len(t) - 1)  # timestep
    if (abs((t[1:] - t[:-1] - dt)) > dt_dk_tolerance).any():
        raise RuntimeError("Time series not equally spaced!")
    N = len(t)

    # calculate frequency values for FT
    k = np.fft.fftshift(np.fft.fftfreq(N, d=dt) * 2 * np.pi)

    # calculate FT of data
    xf = np.fft.fftshift(np.fft.fft(x))
    xf2 = xf * (b - a) / N * np.exp(-1j * k * a)

    if indvar:
        return k, xf2
    else:
        return xf2




[docs]
def iFT(
    k: np.ndarray, xf: np.ndarray, indvar: bool = True
) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
    """Inverse Fourier transformation using fast Fourier transformation (FFT).

    Takes the frequency series and the function as arguments. By default, returns the
    iFT and the time series. Setting indvar=False means the function returns only the
    iFT.

    Parameters
    ----------
    k : numpy.ndarray
        The frequency series.
    xf : numpy.ndarray
        The function series in the frequency domain.
    indvar : bool
        If :obj:`True`, return both the iFT and the time series. If :obj:`False`, return
        only the iFT.

    Returns
    -------
    tuple(numpy.ndarray, numpy.ndarray) or numpy.ndarray
        If indvar is :obj:`True`, returns a tuple containing the time series and the
        iFT. If indvar is :obj:`False`, returns only the iFT.

    Raises
    ------
    RuntimeError
        If the time series is not equally spaced.

    See Also
    --------
    :func:`FT` : For the Fourier transform.
    """
    dk = (k[-1] - k[0]) / float(len(k) - 1)  # timestep
    if (abs((k[1:] - k[:-1] - dk)) > dt_dk_tolerance).any():
        raise RuntimeError("Time series not equally spaced!")
    N = len(k)
    x = np.fft.ifftshift(np.fft.ifft(xf))
    t = np.fft.ifftshift(np.fft.fftfreq(N, d=dk)) * 2 * np.pi
    if N % 2 == 0:
        x2 = x * np.exp(-1j * t * N * dk / 2.0) * N * dk / (2 * np.pi)
    else:
        x2 = x * np.exp(-1j * t * (N - 1) * dk / 2.0) * N * dk / (2 * np.pi)
    if indvar:
        return t, x2
    else:
        return x2




[docs]
def correlation(
    a: np.ndarray, b: Optional[np.ndarray] = None, subtract_mean: bool = False
) -> np.ndarray:
    """Calculate correlation or autocorrelation.

    Uses fast fourier transforms to give the correlation function of two arrays, or, if
    only one array is given, the autocorrelation. Setting ``subtract_mean=True`` causes
    the mean to be subtracted from the input data.

    Parameters
    ----------
    a : numpy.ndarray
        The first input array to calculate the correlation
    b : numpy.ndarray
        The second input array. If :obj:`None`, autocorrelation of ``a`` is calculated.
    subtract_mean : bool
        If :obj:`True`, subtract the mean from the input data.

    Returns
    -------
    numpy.ndarray
        The correlation or autocorrelation function.
    """
    meana = int(subtract_mean) * np.mean(
        a
    )  # essentially an if statement for subtracting mean
    a2 = np.append(
        a - meana, np.zeros(2 ** int(np.ceil((np.log(len(a)) / np.log(2)))) - len(a))
    )  # round up to a power of 2
    data_a = np.append(a2, np.zeros(len(a2)))  # pad with an equal number of zeros
    fra = np.fft.fft(data_a)  # FT the data
    if b is None:
        sf = (
            np.conj(fra) * fra
        )  # take the conj and multiply pointwise if autocorrelation
    else:
        meanb = int(subtract_mean) * np.mean(b)
        b2 = np.append(
            b - meanb,
            np.zeros(2 ** int(np.ceil((np.log(len(b)) / np.log(2)))) - len(b)),
        )
        data_b = np.append(b2, np.zeros(len(b2)))
        frb = np.fft.fft(data_b)
        sf = np.conj(fra) * frb
    cor = np.real(np.fft.ifft(sf)[: len(a)]) / np.array(
        range(len(a), 0, -1)
    )  # inverse FFT and normalization
    return cor




[docs]
def scalar_prod_corr(
    a: np.ndarray, b: Optional[np.ndarray] = None, subtract_mean: bool = False
) -> np.ndarray:
    """Give the corr. function of the scalar product of two vector timeseries.

    Arguments should be given in the form a[t, i], where t is the time variable along
    which the correlation is calculated, and i indexes the vector components.

    Parameters
    ----------
    a : numpy.ndarray
        The first vector timeseries of shape (t, i).
    b : numpy.ndarray
        The second vector timeseries of shape (t, i). If :obj:`None`, correlation with
        itself is calculated.
    subtract_mean : bool
        If :obj:`True`, subtract the mean from the timeseries before calculating the
        correlation.

    Returns
    -------
    numpy.ndarray
        The correlation function of the scalar product of the vector timeseries.

    """
    corr = np.zeros(len(a[:, 0]))

    if b is None:
        for i in range(0, len(a[0, :])):
            corr[:] += correlation(a[:, i], None, subtract_mean)

    else:
        for i in range(0, len(a[0, :])):
            corr[:] += correlation(a[:, i], b[:, i], subtract_mean)

    return corr




[docs]
def correlation_time(
    timeseries: np.ndarray,
    method: str = "sokal",
    mintime: int = 3,
    sokal_factor: float = 8,
) -> float:
    r"""Compute the integrated correlation time of a time series.

    The integrated correlation time (in units of the sampling interval) is given by

    .. math::
        \tau = \sum\limits_{t=1}^{N_\mathrm{cut}} C(t) \left(1 - \frac{t}{N}\right)

    where :math:`N_\mathrm{cut} < N` is a subset of the time series of length :math:`N`
    and :math:`C(t)` is the discrete-time autocorrelation function. To obtain the upper
    limit of the sum :math:`N_\mathrm{cut}` two different methods are provided:

    1. For "chodera" :footcite:p:`choderaWeightedHistogramAnalysis2007`
       :math:`N_\mathrm{cut}` is given by the time when :math:`C(t)`
       crosses zero the first time.

    2. For "sokal" :footcite:p:`sokalLecture` :math:`N_\mathrm{cut}` is determined
       iteratively by stepwise increasing until

       .. math::
            N_\mathrm{cut} \geq c \cdot \tau

       where :math:`c` is the constant ``sokal_factor``. If the condition is never
       fulfilled, ``-1`` is returned, indicating that the time series does not provide
       sufficient statistics to estimate a
       correlation time.

    While both methods give the same correlation time for a smooth time series that
    decays to 0, "sokal" will results in a more reasonable result for actual time series
    that are noisy and cross zero several times.

    Parameters
    ----------
    timeseries : numpy.ndarray
        The time series used to calculate the correlation time from.
    method : {"sokal", "chodera"}
        Method to choose summation cutoff :math:`N_\mathrm{cut}`.
    mintime: int
        Minimum possible value for :math:`N_\mathrm{cut}`.
    sokal_factor : float
        Cut-off factor :math:`c` for the Sokal method.

    Returns
    -------
    tau : float
        Integrated correlation time :math:`\tau`. If ``-1`` (only for
        ``method="sokal"``) the provided time series does not provide sufficient
        statistics to estimate a correlation time.

    Raises
    ------
    ValueError
        If mintime is larger than the length of the timeseries.
    ValueError
        If method is not one of "sokal" or "chodera".

    References
    ----------
    .. footbibliography::
    """
    if mintime > len(timeseries):
        raise ValueError(
            f"mintime ({mintime}) has to be smaller then the length of `timeseries` "
            f"({len(timeseries)})."
        )

    corr = correlation(timeseries, subtract_mean=True)

    if method == "sokal":
        for cutoff in range(mintime, len(timeseries)):
            tau = np.sum(
                (1 - np.arange(1, cutoff) / len(timeseries)) * corr[1:cutoff] / corr[0]
            )
            if cutoff >= sokal_factor * tau:
                break

            if cutoff > len(timeseries) / 3:
                return -1

    elif method == "chodera":
        cutoff = np.max([mintime, np.min(np.argwhere(corr < 0))])
        tau = np.sum(
            (1 - np.arange(1, cutoff) / len(timeseries)) * corr[1:cutoff] / corr[0]
        )
    else:
        raise ValueError(
            f"Unknown method: {method}. Chose either 'sokal' or 'chodera'."
        )
    return tau




[docs]
def new_mean(old_mean: float, data: float, length: int) -> float:
    r"""Compute the arithmetic mean of a series iteratively.

    Compute the arithmetic mean of n samples based on an existing mean of n-1 and the
    n-th value.

    Given the mean of a data series

    .. math::

        \bar x_N = \frac{1}{N} \sum_{n=1}^N x_n

    we seperate the last value

    .. math::

        \bar x_N = \frac{1}{N} \sum_{n=1}^{N-1} x_n + \frac{x_N}{N}

    and multiply 1 = (N - 1)/(N - 1)

    .. math::

        \bar x_N = \frac{N-1}{N} \frac{1}{N-1} \\ \sum_{n=1}^{N-1} x_n + \frac{x_N}{N}

    The first term can be identified as the mean of the first N - 1 values and we arrive
    at

    .. math::

        \bar x_N = \frac{N-1}{N} \bar x_{N-1} + \frac{x_N}{N}


    Parameters
    ----------
    old_mean : float
        arithmetic mean of the first n - 1 samples.
    data : float
        n-th value of the series.
    length : int
        Length of the updated series, here called n.

    Returns
    -------
    new_mean : float
        Updated mean of the series of n values.

    Examples
    --------
    The mean of a data set can easily be calculated from the data points. However this
    requires one to keep all data points on hand until the end of the calculation.

    >>> np.mean([1, 3, 5, 7])
    4.0

    Alternatively, one can update an existing mean, this requires only knowledge of the
    total number of samples.

    >>> new_mean(np.mean([1, 3, 5]), data=7, length=4)
    4.0
    """
    return ((length - 1) * old_mean + data) / length




[docs]
def new_variance(
    old_variance: Union[float, np.ndarray],
    old_mean: Union[float, np.ndarray],
    new_mean: Union[float, np.ndarray],
    data: Union[float, np.ndarray],
    length: int,
) -> Union[float, np.ndarray]:
    r"""Calculate the variance of a timeseries iteratively.

    The variance of a timeseries :math:`x_n` can be calculated iteratively by using the
    following formula:

    .. math::

        S_n = S_n-1 + (n-1) * (x_n - \bar{x}_n-1)^2 / (n-1)

    Here, :math:`\bar{x}_n` is the mean of the timeseries up to the :math:`n`-th value.

    Floating point imprecision can lead to slight negative variances leading non defined
    standard deviations. Therefore a negetaive variance is set to 0.

    Parameters
    ----------
    old_variance : float, numpy.ndarray
        The variance of the first n-1 samples.
    old_mean : float
        The mean of the first n-1 samples.
    new_mean : float, numpy.ndarray
        The mean of the full n samples.
    data : float, numpy.ndarray
        The n-th value of the series.
    length : int
        Length of the updated series, here called n.

    Returns
    -------
    new_variance : float
        Updated variance of the series of n values.

    Examples
    --------
    The data set ``[1, 5, 5, 1]`` has a variance of ``4.0``

    >>> np.var([1, 5, 5, 1])
    4.0

    Knowing the total number of data points, this operation can be performed
    iteratively.

    >>> new_variance(
    ...     old_variance=np.var([1, 5, 5]),
    ...     old_mean=np.mean([1, 5, 5]),
    ...     new_mean=np.mean([1, 5, 5, 1]),
    ...     data=1,
    ...     length=4,
    ... )
    4.0
    """
    S_old = old_variance * (length - 1)
    S_new = S_old + (data - old_mean) * (data - new_mean)

    if type(S_new) is np.ndarray:
        S_new[S_new < 0] = 0
    else:
        if S_new < 0:
            S_new = 0

    return S_new / length




[docs]
def center_cluster(ag: mda.AtomGroup, weights: np.ndarray) -> np.ndarray:
    """Calculate the center of the atomgroup with respect to some weights.

    Parameters
    ----------
    ag : MDAnalysis.core.groups.AtomGroup
        Group of atoms to calculate the center for.

    weights : numpy.ndarray
        Weights in the shape of ag.

    Returns
    -------
    com : numpy.ndarray
        The center with respect to the weights.


    Without proper treatment of periodic boundrary conditions (PBC) most algorithms will
    result in wrong center calculations. As shown below without treating PBC the center
    of mass is located in the center of the box ::

       +-----------+
       |           |
       | 1   x   2 |
       |           |
       +-----------+

    However, the distance accross the box boundary is shorter and therefore the center
    with PBC should be located somwhere else. The correct way to calculate the center is
    described in :footcite:t:`bai_calculating_2008` where coordinates of the particles
    are projected on a circle and weighted by their mass in this two dimensional space.
    The center of mass is obtained by transforming this point back to the corresponding
    point in the real system. This is done seperately for each dimension.

    Reasons for doing this include the analysis of clusters in periodic boundrary
    conditions and consistent center of mass calculation across box boundraries. This
    procedure results in the right center of mass as seen below ::

       +-----------+
       |           |
       x 1       2 |
       |           |
       +-----------+
    """
    theta = (ag.positions / ag.universe.dimensions[:3]) * 2 * np.pi
    xi = (np.cos(theta) * weights[:, None]).sum(axis=0) / weights.sum()
    zeta = (np.sin(theta) * weights[:, None]).sum(axis=0) / weights.sum()
    theta_com = np.arctan2(-zeta, -xi) + np.pi
    return theta_com / (2 * np.pi) * ag.universe.dimensions[:3]




[docs]
def symmetrize(
    m: np.ndarray, axis: Union[None, int, Tuple[int]] = None, inplace: bool = False
) -> np.ndarray:
    """Symmeterize an array.

    The shape of the array is preserved, but the elements are symmetrized with respect
    to the given axis.

    Parameters
    ----------
    m : array_like
        Input array to symmetrize
    axis : int, tuple(int)
         Axis or axes along which to symmetrize over. The default, ``axis=None``, will
         symmetrize over all of the axes of the input array. If axis is negative it
         counts from the last to the first axis. If axis is a :obj:`tuple` of ints,
         symmetrizing is performed on all of the axes specified in the :obj:`tuple`.
    inplace : bool
        Do symmetrizations inplace. If :obj:`False` a new array is returned.

    Returns
    -------
    out : array_like
        the symmetrized array

    Notes
    -----
    symmetrize uses :meth:`np.flip` for flipping the indices.

    Examples
    --------
    >>> A = np.arange(10).astype(float)
    >>> A
    array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
    >>> symmetrize(A)
    array([4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5])
    >>> symmetrize(A, inplace=True)
    array([4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5])
    >>> A
    array([4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5])

    It also works for arrays with more than 1 dimensions in a general dimension.

    >>> A = np.arange(20).astype(float).reshape(2, 10).T
    >>> A
    array([[ 0., 10.],
           [ 1., 11.],
           [ 2., 12.],
           [ 3., 13.],
           [ 4., 14.],
           [ 5., 15.],
           [ 6., 16.],
           [ 7., 17.],
           [ 8., 18.],
           [ 9., 19.]])
    >>> symmetrize(A)
    array([[9.5, 9.5],
           [9.5, 9.5],
           [9.5, 9.5],
           [9.5, 9.5],
           [9.5, 9.5],
           [9.5, 9.5],
           [9.5, 9.5],
           [9.5, 9.5],
           [9.5, 9.5],
           [9.5, 9.5]])
    >>> symmetrize(A, axis=0)
    array([[ 4.5, 14.5],
           [ 4.5, 14.5],
           [ 4.5, 14.5],
           [ 4.5, 14.5],
           [ 4.5, 14.5],
           [ 4.5, 14.5],
           [ 4.5, 14.5],
           [ 4.5, 14.5],
           [ 4.5, 14.5],
           [ 4.5, 14.5]])
    """
    if inplace:
        out = m
    else:
        out = np.copy(m)

    out += np.flip(m, axis=axis)
    out /= 2

    return out




[docs]
def compute_form_factor(q: float, atom_type: str) -> float:
    r"""Calculate the form factor :math:`f(q)`.

    :math:`f(q)` is expressed in terms of the scattering vector as

    .. math::
        f(q) = \sum_{i=1}^4 a_i e^{-b_i q^2/(4\pi)^2} + c \,.

    The coefficients :math:`a_{1,\dots,4}`, :math:`b_{1,\dots,4}` and :math:`c` are also
    known as Cromer-Mann X-ray scattering factors and are documented in
    :footcite:t:`princeInternationalTablesCrystallography2004`.

    For determining the elements :obj:`maicos.lib.tables.atomtypes` is used and the
    Cromer-Mann X-ray scattering factors are stored in
    :obj:`maicos.lib.tables.CM_parameters`.

    Parameters
    ----------
    q : float
        The magnitude of the scattering vector in reciprocal angstroms (1/Å).
    atom_type : str
        The type of the atom for which the form factor is calculated.

    Returns
    -------
    float
        The calculated form factor for the specified atom type and q.
    """
    element = tables.atomtypes[atom_type]

    if element == "CH1":
        form_factor = compute_form_factor(q, "C") + compute_form_factor(q, "H")
    elif element == "CH2":
        form_factor = compute_form_factor(q, "C") + 2 * compute_form_factor(q, "H")
    elif element == "CH3":
        form_factor = compute_form_factor(q, "C") + 3 * compute_form_factor(q, "H")
    elif element == "CH4":
        form_factor = compute_form_factor(q, "C") + 4 * compute_form_factor(q, "H")
    elif element == "NH1":
        form_factor = compute_form_factor(q, "N") + compute_form_factor(q, "H")
    elif element == "NH2":
        form_factor = compute_form_factor(q, "N") + 2 * compute_form_factor(q, "H")
    elif element == "NH3":
        form_factor = compute_form_factor(q, "N") + 3 * compute_form_factor(q, "H")
    else:
        form_factor = tables.CM_parameters[element].c
        q2 = (q / (4 * np.pi)) ** 2
        for i in range(4):
            form_factor += tables.CM_parameters[element].a[i] * np.exp(
                -tables.CM_parameters[element].b[i] * q2
            )

    return form_factor




[docs]
def transform_cylinder(
    positions: np.ndarray, origin: np.ndarray, dim: int
) -> np.ndarray:
    """Transform positions into cylinder coordinates.

    The origin of th coordinate system is at `origin`, the direction of the cylinder is
    defined by `dim`.

    Parameters
    ----------
    positions : numpy.ndarray
        Cartesian coordinates (x,y,z)
    origin : numpy.ndarray
        Origin of the new cylindrical coordinate system (x,y,z).
    dim : int
        Direction of the cylinder axis (0=x, 1=y, 2=z).

    Returns
    -------
    numpy.ndarray
        Positions in cylinder coordinates (r, phi, z)
    """
    trans_positions = np.zeros(positions.shape)

    odims = np.roll(np.arange(3), -dim)[1:]

    # shift origin to box center
    pos_xyz_center = positions - origin

    # r component
    trans_positions[:, 0] = np.linalg.norm(pos_xyz_center[:, odims], axis=1)

    # phi component
    np.arctan2(*pos_xyz_center[:, odims].T, out=trans_positions[:, 1])

    # z component
    trans_positions[:, 2] = np.copy(positions[:, dim])

    return trans_positions




[docs]
def transform_sphere(positions: np.ndarray, origin: np.ndarray) -> np.ndarray:
    """Transform positions into spherical coordinates.

    The origin of the new coordinate system is at `origin`.

    Parameters
    ----------
    positions : numpy.ndarray
        Cartesian coordinates (x,y,z)
    origin : numpy.ndarray
        Origin of the new spherical coordinate system (x,y,z).

    Returns
    -------
    numpy.ndarray
        Positions in spherical coordinates (:math:`r`, phi, theta)
    """
    trans_positions = np.zeros(positions.shape)

    # shift origin to box center
    # positions -= origin
    pos_xyz_center = positions - origin

    # r component
    trans_positions[:, 0] = np.linalg.norm(pos_xyz_center, axis=1)
    # phi component
    np.arctan2(pos_xyz_center[:, 1], pos_xyz_center[:, 0], out=trans_positions[:, 1])
    # theta component
    np.arccos(pos_xyz_center[:, 2] / trans_positions[:, 0], out=trans_positions[:, 2])

    return trans_positions




[docs]
def compute_rdf_structure_factor(
    rdf: np.ndarray, r: np.ndarray, density: float
) -> Tuple[np.ndarray, np.ndarray]:
    r"""Computes the structure factor based on the radial distribution function (RDF).

    The structure factor :math:`S(q)` based on the RDF :math:`g(r)` is given by

    .. math::
        S(q) = 1 + 4 \pi \rho \int_0^\infty \mathrm{d}r r
                         \frac{\sin(qr)}{q} (g(r) - 1)\,

    where :math:`q` is the magnitude of the scattering vector. The calculation is
    performed via a discrete sine transform as implemented in :func:`scipy.fftpack.dst`.

    For an `example` take a look at :ref:`howto-saxs`.

    Parameters
    ----------
    rdf : numpy.ndarray
        radial distribution function
    r : numpy.ndarray
        equally spaced distance array on which rdf is defined
    density : float
        number density of particles

    Returns
    -------
    q : numpy.ndarray
        array of q points
    struct_factor : numpy.ndarray
        structure factor
    """
    dr = r[1] - r[0]
    q = np.pi / r[-1] * np.arange(1, len(r) + 1)
    struct_factor = 1 + 4 * np.pi * density * 0.5 * dst((rdf - 1) * r) / q * dr

    return q, struct_factor