Source code for tardis.montecarlo.montecarlo_numba.formal_integral_cuda

import sys
import numpy as np
from astropy import units as u
from numba import float64, int64, cuda
import math

from tardis.montecarlo.montecarlo_numba.numba_config import SIGMA_THOMSON

C_INV = 3.33564e-11
M_PI = np.arccos(-1)
KB_CGS = 1.3806488e-16
H_CGS = 6.62606957e-27


[docs]@cuda.jit
def cuda_formal_integral(
    r_inner,
    r_outer,
    time_explosion,
    line_list_nu,
    iT,
    inu,
    inu_size,
    att_S_ul,
    Jred_lu,
    Jblue_lu,
    tau_sobolev,
    electron_density,
    N,
    L,
    pp,
    exp_tau,
    I_nu,
    z,
    shell_id,
):
    """
    The CUDA version of numba_formal_integral that can run
    on a NVIDIA GPU.

    Parameters
    ----------
    r_inner : array(float64, 1d, C)
        self.geometry.r_inner
    r_outer : array(float64, 1d, C)
        self.geometry.r_outer
    time_explosion: float64
        self.geometry.time_explosion
    line_list_nu : array(float64, 1d, A)
        self.plasma.line_list_nu
    iT : np.float64
    inu : np.float64
    inu_size : int64
    att_S_ul : array(float64, 1d, C)
    Jred_lu : array(float64, 1d, C)
    Jblue_lu : array(float64, 1d, C)
    tau_sobolev : array(float64, 2d, C)
    electron_density : array(float64, 1d, C)
    N : int64
    L : array(float64, 1d, C)
        This is where the results will be stored
    pp : array(float64, 1d, C)
    exp_tau : array(float64, 1d, C)
    I_nu array(floatt64, 2d, C)
    z : array(float64, 2d, C)
    shell_id : array(int64, 2d, C)
    """

    # todo: add all the original todos

    # global read-only values
    size_line, size_shell = tau_sobolev.shape
    size_tau = size_line * size_shell
    R_ph = r_inner[0]  # make sure these are cgs
    R_max = r_outer[size_shell - 1]

    nu_idx = cuda.grid(1)
    # Check to see if CUDA is out of bounds
    if nu_idx >= inu_size:
        return

    # These all have their own version for each thread to avoid race conditions
    I_nu_thread = I_nu[nu_idx]
    z_thread = z[nu_idx]
    shell_id_thread = shell_id[nu_idx]

    offset = 0
    size_z = 0
    idx_nu_start = 0
    direction = 0
    first = 0
    i = 0
    p = 0.0
    nu_start = 0.0
    nu_end = 0.0
    nu = 0.0
    zstart = 0.0
    zend = 0.0
    escat_contrib = 0.0
    escat_op = 0.0
    Jkkp = 0.0
    pexp_tau = 0
    patt_S_ul = 0
    pJred_lu = 0
    pJblue_lu = 0
    pline = 0

    nu = inu[nu_idx]

    # now loop over discrete values along line
    for p_idx in range(1, N):
        escat_contrib = 0.0
        p = pp[p_idx]

        # initialize z intersections for p values
        size_z = populate_z_cuda(
            r_inner, r_outer, time_explosion, p, z_thread, shell_id_thread
        )
        if p <= R_ph:
            I_nu_thread[p_idx] = intensity_black_body_cuda(nu * z_thread[0], iT)
        else:
            I_nu_thread[p_idx] = 0
        nu_start = nu * z_thread[0]
        nu_end = nu * z_thread[1]

        idx_nu_start = line_search_cuda(line_list_nu, nu_start, size_line)
        offset = shell_id_thread[0] * size_line
        # start tracking accumulated e-scattering optical depth
        zstart = time_explosion / C_INV * (1.0 - z_thread[0])
        # Initialize "pointers"
        pline = int(idx_nu_start)
        pexp_tau = int(offset + idx_nu_start)
        patt_S_ul = int(offset + idx_nu_start)
        pJred_lu = int(offset + idx_nu_start)
        pJblue_lu = int(offset + idx_nu_start)

        # flag for first contribution to integration on current p-ray
        first = 1

        # loop over all interactions
        for i in range(size_z - 1):
            escat_op = electron_density[int(shell_id_thread[i])] * SIGMA_THOMSON
            nu_end = (
                nu * z_thread[i + 1]
            )  # +1 is the offset as the original is from z[1:]

            nu_end_idx = line_search_cuda(
                line_list_nu, nu_end, len(line_list_nu)
            )

            for _ in range(max(nu_end_idx - pline, 0)):

                # calculate e-scattering optical depth to next resonance point
                zend = time_explosion / C_INV * (1.0 - line_list_nu[pline] / nu)
                if first == 1:
                    # first contribution to integration
                    # NOTE: this treatment of I_nu_b (given
                    #   by boundary conditions) is not in Lucy 1999;
                    #   should be re-examined carefully
                    escat_contrib += (
                        (zend - zstart)
                        * escat_op
                        * (Jblue_lu[pJblue_lu] - I_nu_thread[p_idx])
                    )
                    first = 0
                else:
                    # Account for e-scattering, c.f. Eqs 27, 28 in Lucy 1999
                    Jkkp = 0.5 * (Jred_lu[pJred_lu] + Jblue_lu[pJblue_lu])
                    escat_contrib += (
                        (zend - zstart) * escat_op * (Jkkp - I_nu_thread[p_idx])
                    )
                    # this introduces the necessary offset of one element between
                    # pJblue_lu and pJred_lu
                    pJred_lu += 1
                I_nu_thread[p_idx] += escat_contrib
                # // Lucy 1999, Eq 26
                I_nu_thread[p_idx] *= exp_tau[pexp_tau]
                I_nu_thread[p_idx] += att_S_ul[patt_S_ul]

                # // reset e-scattering opacity
                escat_contrib = 0.0
                zstart = zend

                pline += 1
                pexp_tau += 1
                patt_S_ul += 1
                pJblue_lu += 1

            # calculate e-scattering optical depth to grid cell boundary

            Jkkp = 0.5 * (Jred_lu[pJred_lu] + Jblue_lu[pJblue_lu])
            zend = time_explosion / C_INV * (1.0 - nu_end / nu)
            escat_contrib += (
                (zend - zstart) * escat_op * (Jkkp - I_nu_thread[p_idx])
            )
            zstart = zend

            # advance pointers
            direction = int(
                (shell_id_thread[i + 1] - shell_id_thread[i]) * size_line
            )
            pexp_tau += direction
            patt_S_ul += direction
            pJred_lu += direction
            pJblue_lu += direction

        I_nu_thread[p_idx] *= p
    L[nu_idx] = (
        8 * M_PI * M_PI * trapezoid_integration_cuda(I_nu_thread, R_max / N)
    )


[docs]class CudaFormalIntegrator(object):
    """
    Helper class for performing the formal integral
    with CUDA.
    """

    def __init__(self, geometry, model, plasma, points=1000):
        self.geometry = geometry
        self.model = model
        self.plasma = plasma
        self.points = points

[docs]    def formal_integral(
        self,
        iT,
        inu,
        inu_size,
        att_S_ul,
        Jred_lu,
        Jblue_lu,
        tau_sobolev,
        electron_density,
        N,
    ):
        """
        Simple wrapper for the CUDA implementation of the formal integral
        """
        L = np.zeros(inu_size, dtype=np.float64)  # array(float64, 1d, C)
        # global read-only values
        size_line, size_shell = tau_sobolev.shape  # int64, int64
        size_tau = size_line * size_shell  # int64

        pp = np.zeros(N, dtype=np.float64)  # array(float64, 1d, C)
        exp_tau = np.zeros(size_tau, dtype=np.float64)  # array(float64, 1d, C)
        exp_tau = np.exp(-tau_sobolev.T.ravel())  # array(float64, 1d, C)
        pp[::] = calculate_p_values(
            self.geometry.r_outer[size_shell - 1], N
        )  # array(float64, 1d, C)

        I_nu = np.zeros(
            (inu_size, N), dtype=np.float64
        )  # array(float64, 1d, C)
        z = np.zeros(
            (inu_size, 2 * size_shell), dtype=np.float64
        )  # array(float64, 1d, C)
        shell_id = np.zeros(
            (inu_size, 2 * size_shell), dtype=np.int64
        )  # array(int64, 1d, C)
        THREADS_PER_BLOCK = 32
        blocks_per_grid = (inu_size // THREADS_PER_BLOCK) + 1

        cuda_formal_integral[blocks_per_grid, THREADS_PER_BLOCK](
            self.geometry.r_inner,
            self.geometry.r_outer,
            self.model.time_explosion,
            self.plasma.line_list_nu,
            iT.value,
            inu.value,
            inu_size,
            att_S_ul,
            Jred_lu,
            Jblue_lu,
            tau_sobolev,
            electron_density,
            N,
            L,
            pp,
            exp_tau,
            I_nu,
            z,
            shell_id,
        )

        return L, I_nu


[docs]@cuda.jit(device=True)
def populate_z_cuda(r_inner, r_outer, time_explosion, p, oz, oshell_id):
    """
    Calculate p line intersections

    This function calculates the intersection points of the p-line with
    each shell

    Parameters
    ----------
    r_inner : array(float64, 1d, C)
    r_outer : array(float64, 1d, C)
    p : float64
        distance of the integration line to the center
    oz : array(float64, 1d, C)
        will be set with z values. the array is truncated
        by the value `1`.
    oshell_id : array(int64, 1d, C)
        will be set with the corresponding shell_ids

    Returns
    -------
    int64
    """
    N = len(r_inner)
    inv_t = 1 / time_explosion
    z = 0
    offset = N

    if p <= r_inner[0]:
        # intersect the photosphere
        for i in range(N):
            oz[i] = 1 - calculate_z_cuda(r_outer[i], p, inv_t)
            oshell_id[i] = i
        return N
    else:
        # no intersection with photosphere
        # that means we intersect each shell twice
        for i in range(N):
            z = calculate_z_cuda(r_outer[i], p, inv_t)
            if z == 0:
                continue
            if offset == N:
                offset = i
            # calculate the index in the resulting array
            i_low = N - i - 1  # the far intersection with the shell
            i_up = N + i - 2 * offset  # the nearer intersection with the shell

            # setting the arrays
            oz[i_low] = 1 + z
            oshell_id[i_low] = i
            oz[i_up] = 1 - z
            oshell_id[i_up] = i
        return 2 * (N - offset)


[docs]@cuda.jit(device=True)
def calculate_z_cuda(r, p, inv_t):
    """
    Calculate distance to p line

    Calculate half of the length of the p-line inside a shell
    of radius r in terms of unit length (c * t_exp).
    If shell and p-line do not intersect, return 0.

    Parameters
    ----------
    r : float64
        radius of the shell
    p : float64
        distance of the p-line to the center of the supernova
    inv_t : float64
        inverse time_explosio is needed to norm to unit-length

    Returns
    -------
    float64
    """
    if r > p:
        return math.sqrt(r * r - p * p) * C_INV * inv_t
    else:
        return 0.0


[docs]class BoundsError(IndexError):
    """
    Used to check bounds in reverse
    binary search
    """

    pass


[docs]@cuda.jit(device=True)
def line_search_cuda(nu, nu_insert, number_of_lines):
    """
    Insert a value in to an array of line frequencies

    Parameters
    ----------
    nu : (array) line frequencies
    nu_insert : (int) value of nu key
    number_of_lines : (int) number of lines in the line list

    Returns
    -------
    int
        index of the next line to the red.
        If the key value is redder than
        the reddest line returns number_of_lines.
    """
    imin = 0
    imax = number_of_lines - 1
    if nu_insert > nu[imin]:
        result = imin
    elif nu_insert < nu[imax]:
        result = imax + 1
    else:
        result = reverse_binary_search_cuda(nu, nu_insert, imin, imax)
        result = result + 1
    return result


# Credit for this computation is https://github.com/numba/numba/blob/3fd158f79a12ac5276bc5a72c2404464487c91f0/numba/np/arraymath.py#L3542
[docs]@cuda.jit(device=True)
def reverse_binary_search_cuda(x, x_insert, imin, imax):
    """
    Find indicies where elements should be inserted
    to maintain order in an inversely sorted float
    array.

    Find the indices into a sorted array a such that,
    if the corresponding elements in v were inserted
    before the indices on the right, the order of a
    would be preserved.

    Parameters
    ----------
    x : np.ndarray(np.float64, 1d, C)
    x_insert : float64
    imin : int
        Lower bound
    imax : int
        Upper bound

    Returns
    -------
    np.int64
        Location of insertion
    """
    if (x_insert > x[imin]) or (x_insert < x[imax]):
        raise BoundsError  # check
    arr = x[::-1]
    n = len(arr)
    lo = 0
    hi = n
    while hi > lo:
        mid = (lo + hi) >> 1
        if arr[mid] <= x_insert:
            # mid is too low of an index, go higher
            lo = mid + 1
        else:
            # mid is too high of an index, go down some
            hi = mid

    return len(x) - 1 - lo


[docs]@cuda.jit(device=True)
def trapezoid_integration_cuda(arr, dx):
    """
    Computes the approximation of the
    trapezoidal integration of the array.

    Parameters
    ----------
    arr : (array(float64, 1d, C)
    dx : np.float64
    """

    result = arr[0] + arr[-1]

    for x in range(1, len(arr) - 1):
        result += arr[x] * 2.0

    return result * (dx / 2.0)


[docs]@cuda.jit(device=True)
def intensity_black_body_cuda(nu, temperature):
    """
    Calculate the blackbody intensity.

    Parameters
    ----------
    nu : float64
        frequency
    temperature : float64
        Temperature

    Returns
    -------
    float64
    """
    if nu == 0:
        return np.nan  # to avoid ZeroDivisionError
    beta_rad = 1 / (KB_CGS * temperature)
    coefficient = 2 * H_CGS * C_INV * C_INV
    return coefficient * nu * nu * nu / (math.exp(H_CGS * nu * beta_rad) - 1)


[docs]def calculate_p_values(R_max, N):
    """
    Calculates the p values of N

    Parameters
    ----------
    R_max : float64
    N : int64

    Returns
    -------
    float64
    """

    return np.arange(N).astype(np.float64) * R_max / (N - 1)