Source code for volumential.tools

from __future__ import absolute_import, division, print_function
import six

__copyright__ = "Copyright (C) 2018 Xiaoyu Wei"

__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""

import numpy as np
import loopy as lp
import pyopencl as cl
import pymbolic as pmbl
from pytools import memoize_method
from pymbolic.primitives import Variable as VariableType
from pymbolic.primitives import Expression as ExpressionType

import logging

logger = logging.getLogger(__name__)

# {{{ clean files


[docs]def clean_file(filename, new_name=None):
    """Remove/rename file if exists.
    Fails silently when the file does not exist.
    Useful for, for example, writing output files that
    are meant to overwrite existing ones.
    """
    import os

    if new_name is None:
        try:
            os.remove(filename)
        except OSError:
            pass
    else:
        try:
            os.rename(filename, new_name)
        except OSError:
            pass

# }}} End clean files

# {{{ loopy kernel cache wrapper


class KernelCacheWrapper(object):
    # FIXME: largely code duplication with sumpy.

    def __init__(self):
        self.name = "KernelCacheWrapper"
        raise RuntimeError(
                "KernelCacheWrapper objects should not be constructed")

    def get_cache_key(self):
        raise NotImplementedError("Unimplemented cache key")

    def get_kernel(self):
        raise NotImplementedError()

    def get_optimized_kernel(self):
        raise NotImplementedError()

    @memoize_method
    def get_cached_optimized_kernel(self, **kwargs):
        from sumpy import code_cache, CACHING_ENABLED, OPT_ENABLED

        if CACHING_ENABLED:
            import loopy.version
            from sumpy.version import KERNEL_VERSION as SUMPY_KERNEL_VERSION
            from volumential.version import KERNEL_VERSION
            cache_key = (
                    self.get_cache_key()
                    + tuple(sorted(six.iteritems(kwargs)))
                    + (loopy.version.DATA_MODEL_VERSION,)
                    + (SUMPY_KERNEL_VERSION,)
                    + (KERNEL_VERSION,)
                    + (OPT_ENABLED,))

            try:
                result = code_cache[cache_key]
                logger.debug("%s: kernel cache hit [key=%s]" % (
                    self.name, cache_key))
                return result
            except KeyError:
                pass

        logger.info("%s: kernel cache miss" % self.name)
        if CACHING_ENABLED:
            logger.info("%s: kernel cache miss [key=%s]" % (
                self.name, cache_key))

        from pytools import MinRecursionLimit
        with MinRecursionLimit(3000):
            if OPT_ENABLED:
                knl = self.get_optimized_kernel(**kwargs)
            else:
                knl = self.get_kernel()

        if CACHING_ENABLED:
            code_cache.store_if_not_present(cache_key, knl)

        return knl


# }}} End loopy kernel cache wrapper

# {{{ scalar field expression eval


[docs]class ScalarFieldExpressionEvaluation(KernelCacheWrapper):
    """
    Evaluate a field funciton on a set of D-d points.
    Useful for imposing analytic conditions efficiently.
    """

    def __init__(self, dim, expression, variables=None, dtype=np.float64,
                 function_manglers=None, preamble_generators=None):
        """
        :arg dim
        :arg expression A pymbolic expression for the function
        :arg variables A list of variables representing spacial coordinates
        """
        assert dim > 0
        self.dim = dim

        assert isinstance(expression, (ExpressionType, int, float, complex))
        self.expr = expression

        if variables is None:
            self.vars = [pmbl.var("x%d" % d) for d in range(self.dim)]
        else:
            assert isinstance(variables, list)
            for var in variables:
                assert isinstance(var, VariableType)
            self.vars = variables

        self.dtype = dtype
        self.function_manglers = function_manglers
        self.preamble_generators = preamble_generators

        self.name = "ScalarFieldExpressionEvaluation"

    def get_cache_key(self):
        return (
            type(self).__name__,
            str(self.dim) + "D",
            self.expr.__str__(),
            ",".join([x.__str__() for x in self.vars]),
        )

    def get_normalised_expr(self):
        nexpr = self.expr
        nvars = [pmbl.var("x%d" % d) for d in range(self.dim)]
        for var, nvar in zip(self.vars, nvars):
            nexpr = pmbl.substitute(nexpr, {var: nvar})
        return nexpr

    def get_variable_assignment_code(self):
        if self.dim == 1:
            return "<> x0 = target_points[0, itgt]"
        elif self.dim == 2:
            return """<> x0 = target_points[0, itgt]
                      <> x1 = target_points[1, itgt]"""
        elif self.dim == 3:
            return """<> x0 = target_points[0, itgt]
                      <> x1 = target_points[1, itgt]
                      <> x2 = target_points[2, itgt]"""
        else:
            raise NotImplementedError

    def get_kernel(self, **kwargs):

        extra_kernel_kwarg_types = ()
        if "extra_kernel_kwarg_types" in kwargs:
            extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"]

        eval_inames = frozenset(["itgt"])
        scalar_assignment = lp.Assignment(
            id=None,
            assignee="expr_val",
            expression=self.get_normalised_expr(),
            temp_var_type=None,
        )
        eval_insns = [
            insn.copy(within_inames=insn.within_inames | eval_inames)
            for insn in [scalar_assignment]
        ]

        loopy_knl = lp.make_kernel(  # NOQA
            "{ [itgt]: 0<=itgt<n_targets }",
            [
                """
                for itgt
                    VAR_ASSIGNMENT
                end
                """.replace(
                    "VAR_ASSIGNMENT", self.get_variable_assignment_code()
                )
            ]
            + eval_insns
            + [
                """
                for itgt
                    result[itgt] = expr_val
                end
                """
            ],
            [
                lp.ValueArg("dim, n_targets", np.int32),
                lp.GlobalArg("target_points", np.float64, "dim, n_targets"),
                lp.TemporaryVariable("expr_val", None, ()),
            ]
            + list(extra_kernel_kwarg_types)
            + ["...", ],
            name="eval_expr",
            lang_version=(2018, 2),
        )

        loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim)
        loopy_knl = lp.set_options(loopy_knl, write_cl=False)
        loopy_knl = lp.set_options(loopy_knl, return_dict=True)

        if self.function_manglers is not None:
            loopy_knl = lp.register_function_manglers(
                loopy_knl,
                self.function_manglers)

        if self.preamble_generators is not None:
            loopy_knl = lp.register_preamble_generators(
                loopy_knl,
                self.preamble_generators)

        return loopy_knl

    def get_optimized_kernel(self, ncpus=None, **kwargs):
        knl = self.get_kernel(**kwargs)
        if ncpus is None:
            import multiprocessing
            # NOTE: this detects the number of logical cores, which
            # may result in suboptimal performance.
            ncpus = multiprocessing.cpu_count()
        knl = lp.split_iname(
            knl,
            split_iname="itgt",
            inner_length=ncpus,
            inner_tag="g.0")
        return knl

    def __call__(self, queue, target_points, **kwargs):
        """
        :arg target_points
        :arg extra_kernel_kwargs
        """
        # handle target_points given as an obj_array of coords
        if (isinstance(target_points, np.ndarray)
                and target_points.dtype == np.object
                and isinstance(target_points[0], cl.array.Array)):
            target_points = cl.array.concatenate(
                    target_points).reshape([self.dim, -1])

        assert target_points.shape[0] == self.dim

        n_tgt_points = target_points[0].shape[0]
        for tgt_d in target_points:
            assert len(tgt_d) == n_tgt_points

        extra_kernel_kwargs = {}
        if "extra_kernel_kwargs" in kwargs:
            extra_kernel_kwargs = kwargs["extra_kernel_kwargs"]

        knl = self.get_cached_optimized_kernel()

        # FIXME: caching loses function mangler information
        if self.function_manglers is not None:
            knl = lp.register_function_manglers(knl, self.function_manglers)

        if self.preamble_generators is not None:
            knl = lp.register_preamble_generators(knl, self.preamble_generators)

        evt, res = knl(
            queue,
            target_points=target_points,
            n_targets=n_tgt_points,
            result=np.zeros(n_tgt_points, dtype=self.dtype),
            **extra_kernel_kwargs
        )

        return res["result"]

# }}} End scalar field expression eval

# {{{ import code


[docs]def import_code(code, name, add_to_sys_modules=True):
    """Dynamically generates a module.

    :arg code: can be any object containing code -- string, file object, or
    compiled code object. Returns a new module object initialized
    by dynamically importing the given code and optionally adds it
    to sys.modules under the given name.
    """
    import imp
    module = imp.new_module(name)

    if add_to_sys_modules:
        import sys
        sys.modules[name] = module

    exec(code, module.__dict__)

    return module

# }}} End import code

# {{{ box-specific maps


[docs]class BoxSpecificMap(KernelCacheWrapper):
    """
    Box-specific transform that maps between datum defined on quadrature
    nodes. Being box-specific means that the transform for each box is
    independent from the rest of the boxes.
    """
    pass

# {{{ discrete Legendre transform


[docs]class DiscreteLegendreTransform(BoxSpecificMap):
    """
    Transform from nodal values to Legendre polynomial coefficients
    for all cells (leaf boxes of a boxtree Tree object).
    It is assumed that the traversal is built over a tree where the
    sources and targets coincide.
    """

    def __init__(self, dim, degree):
        """
        :arg dim
        :arg degree Number of nodes in each axis direction.
        """
        assert dim > 0
        self.dim = dim
        assert degree > 0
        self.degree = degree

        # Template interval
        self.template_interval = [-1., 1.]
        self.template_interval_extent = 2.
        self.template_interval_center = 0.

        self.leg_tplt_x, self.leg_tplt_w = \
                np.polynomial.legendre.leggauss(degree)

        if self.dim == 1:
            self.V = np.polynomial.legendre.legvander(
                    self.leg_tplt_x, self.degree - 1)
            self.W = self.leg_tplt_w.reshape(-1)

        elif self.dim == 2:
            x, y = np.meshgrid(self.leg_tplt_x, self.leg_tplt_x)
            self.V = np.polynomial.legendre.legvander2d(
                    x.reshape(-1), y.reshape(-1),
                    [self.degree - 1] * self.dim)
            self.W = (self.leg_tplt_w[None, :]
                    * self.leg_tplt_w[:, None]).reshape(-1)

        elif self.dim == 3:
            x, y, z = np.meshgrid(self.leg_tplt_x, self.leg_tplt_x, self.leg_tplt_x)
            self.V = np.polynomial.legendre.legvander3d(
                    x.reshape(-1), y.reshape(-1), z.reshape(-1),
                    [self.degree - 1] * self.dim)
            self.W = (self.leg_tplt_w[:, None, None]
                    * self.leg_tplt_w[None, :, None]
                    * self.leg_tplt_w[None, None, :]).reshape(-1)

        else:
            raise NotImplementedError("Dimension %d is not supported" % self.dim)

        # Vandermonde matrix: each column corresponds to one basis function
        assert self.V.shape == (self.degree**self.dim, self.degree**self.dim)
        assert self.W.shape == (self.degree**self.dim,)

        # Normalizers
        self.I = np.ascontiguousarray(  # noqa: E741
                np.diag(
                    (self.V.T * self.W) @ self.V))
        assert self.I.shape == (self.degree**self.dim,)

        # Fix strides for loopy
        self.V = np.ascontiguousarray(self.V)

        # Check orthogonality
        ortho_resid = np.linalg.norm(
                self.V.T * np.matmul(self.W, self.V) - np.diag(self.I))
        if ortho_resid > 1e-13:
            logger.warn("Legendre polynomials' orthogonality residual = %f"
                    % ortho_resid)

        self.name = "DiscreteLegendreTransform"

    def get_cache_key(self):
        return (
            type(self).__name__,
            str(self.dim) + "D",
            "degree=%d" % self.degree
        )

    def get_kernel(self, **kwargs):

        loopy_knl = lp.make_kernel(  # NOQA
                [
                    "{ [ bid ] : 0 <= bid < n_boxes }",
                    "{ [ mid ] : 0 <= mid < n_box_nodes }",
                    "{ [ nid ] : 0 <= nid < n_box_nodes }"
                    ],
                [
                    """
                for bid
                    <> box_id       = boxes[bid]
                    <> box_node_beg = box_node_starts[box_id]

                    # Rescale weights based on template interval sizes.
                    # Not needed since the rscl in both the numerator and
                    # the denominator and is canceled.
                    #
                    # <> box_level    = box_levels[box_id]
                    # <> box_extent   = root_extent * (1.0 / (2**box_level))
                    # <> weight_rscl  = (box_extent / 2.0)**dim

                    for mid

                        <> mode_id = box_node_beg + mid

                        for nid
                            <> user_node_id = user_node_ids[box_node_beg + nid]
                        end

                        result[mode_id] = sum(
                                              nid,
                                              (
                                              func[user_node_id]
                                              * weight[nid]
                                              * vandermonde[nid, mid]
                                              ) * filter_multiplier[nid]
                                             ) / normalizer[mid]
                    end
                end
                """
                ],
                [
                    lp.ValueArg("n_box_nodes, n_boxes", np.int32),
                    # lp.ValueArg("root_extent", np.float64),
                    lp.GlobalArg("weight, normalizer, filter_multiplier",
                        np.float64, "n_box_nodes"),
                    lp.GlobalArg("vandermonde", np.float64,
                        "n_box_nodes, n_box_nodes"),
                    lp.GlobalArg("func", np.float64, "n_box_nodes * n_boxes"),
                    "...",
                    ],
                name="discrete_legendre_transform",
                lang_version=(2018, 2),
                )

        loopy_knl = lp.set_options(loopy_knl, write_cl=False)
        loopy_knl = lp.set_options(loopy_knl, return_dict=True)

        return loopy_knl

    def get_optimized_kernel(self, ncpus=None, **kwargs):
        knl = self.get_kernel(**kwargs)
        if ncpus is None:
            import multiprocessing
            ncpus = multiprocessing.cpu_count()
        knl = lp.split_iname(
            knl,
            split_iname="bid",
            inner_length=ncpus,
            inner_tag="g.0")
        return knl

    def __call__(self, queue, traversal, nodal_vals, filtering=None, **kwargs):
        """
        :arg traversal
        :arg nodal_vals CL array of nodal values.
        :arg filtering Box-wide filter given by an CL array or None.
        """

        if filtering is None:
            filter_multiplier = 1 + cl.array.zeros(
                queue, self.degree**self.dim, np.float64)
        elif isinstance(filtering, cl.array.Array):
            assert filtering.shape == (self.degree**self.dim,)
            filter_multiplier = filtering
        else:
            raise RuntimeError(f"Invalid filtering argument: {str(filtering)}")

        knl = self.get_cached_optimized_kernel()

        evt, res = knl(
            queue,
            boxes=traversal.target_boxes,
            box_node_starts=traversal.tree.box_target_starts,
            user_node_ids=traversal.tree.user_source_ids,
            # box_levels=traversal.tree.box_levels,
            # root_extent=traversal.tree.root_extent,
            func=nodal_vals,
            weight=cl.array.to_device(queue, self.W),
            vandermonde=cl.array.to_device(queue, self.V),
            normalizer=cl.array.to_device(queue, self.I),
            n_box_nodes=self.degree**self.dim,
            n_boxes=traversal.target_boxes.shape[0],
            filter_multiplier=filter_multiplier,
            result=cl.array.zeros_like(nodal_vals),
        )

        return res["result"]

# }}} End discrete Legendre transform

# {{{ inverse discrete Legendre transform


[docs]class InverseDiscreteLegendreTransform(BoxSpecificMap):
    """
    Box-specific transform that maps box-local modal coefficients
    to nodal values. Inverse of :class:`DiscreteLegendreTransform`.
    """
    pass

# }}} End inverse discrete Legendre transform

# }}} End box-specific maps

# {{{ box-specific reductions


[docs]class BoxSpecificReduction(KernelCacheWrapper):
    """
    Box-specific reduction that maps for each box a data vector defined
    on the quadrature nodes to a scalar.
    Being box-specific means that the reductions for each box is
    independent from the rest of the boxes.
    """
    pass

# {{{ sum


[docs]class BoxSum(BoxSpecificReduction):
    """
    Adds up nodal values within each box.
    """

    def __init__(self, dim, degree):
        """
        :arg dim
        :arg degree Number of nodes in each axis direction.
        """
        assert dim > 0
        self.dim = dim
        assert degree > 0
        self.degree = degree

        self.name = "BoxSum"

    def get_cache_key(self):
        return (
            type(self).__name__,
            str(self.dim) + "D",
            "degree=%d" % self.degree
        )

    def get_kernel(self, **kwargs):

        loopy_knl = lp.make_kernel(  # NOQA
                [
                    "{ [ bid ] : 0 <= bid < n_boxes }",
                    "{ [ nid ] : 0 <= nid < n_box_nodes }"
                    ],
                [
                    """
                for bid
                    <> box_id       = boxes[bid]
                    <> box_node_beg = box_node_starts[box_id]

                    result[bid] = sum(nid,
                                      func[box_node_beg + nid]
                                      * filter_multiplier[nid])
                end
                """
                ],
                [
                    lp.ValueArg("n_box_nodes, n_boxes", np.int32),
                    lp.GlobalArg("filter_multiplier", np.float64, "n_box_nodes"),
                    lp.GlobalArg("func", np.float64, "n_box_nodes * n_boxes"),
                    "...",
                    ],
                name="box_filtered_sum",
                lang_version=(2018, 2),
                )

        loopy_knl = lp.set_options(loopy_knl, write_cl=False)
        loopy_knl = lp.set_options(loopy_knl, return_dict=True)

        return loopy_knl

    def get_optimized_kernel(self, ncpus=None, **kwargs):
        knl = self.get_kernel(**kwargs)
        if ncpus is None:
            import multiprocessing
            ncpus = multiprocessing.cpu_count()
        knl = lp.split_iname(
            knl,
            split_iname="bid",
            inner_length=ncpus,
            inner_tag="g.0")
        return knl

    def __call__(self, queue, traversal, nodal_vals, filtering=None, **kwargs):
        """
        :arg traversal
        :arg nodal_vals CL array of nodal values.
        :arg filtering Box-wide filter given by an CL array or None.

        .. warning::
           The output of this kernel is ordered in :mod:`boxtree`'s box ids.
           It may not be the same as the order implied by the input (e.g.
           box mesh generated by dealii).
        """

        if filtering is None:
            filter_multiplier = 1 + cl.array.zeros(queue,
                    self.degree**self.dim, np.float64)
        elif isinstance(filtering, cl.array.Array):
            assert filtering.shape == (self.degree**self.dim,)
            filter_multiplier = filtering
        else:
            raise RuntimeError("Invalid filtering argument: %s"
                    % str(filtering))

        knl = self.get_cached_optimized_kernel()
        n_boxes = traversal.target_boxes.shape[0]

        evt, res = knl(
            queue,
            boxes=traversal.target_boxes,
            box_node_starts=traversal.tree.box_target_starts,
            func=nodal_vals,
            n_box_nodes=self.degree**self.dim,
            n_boxes=n_boxes,
            filter_multiplier=filter_multiplier,
            result=cl.array.zeros(queue, n_boxes, nodal_vals.dtype),
        )

        return res["result"]

# }}} End sum

# }}} End box-specific reductions

# {{{ filters for box-specific operators


[docs]def generate_leading_order_filtering(dim, n_dofs):
    """Returns a filtering vector that is an indicator function of the node
    that corresponds to the leading order modal values in the Fourier space.
    """

    mask1d = np.zeros(n_dofs)
    mask1d[-1] = 1

    if dim == 1:
        return mask1d

    elif dim == 2:
        return (
            mask1d[:, None] + mask1d[None, :]
            - mask1d[:, None] * mask1d[None, :]
        ).reshape(-1)

    elif dim == 3:
        return (
            mask1d[:, None, None] + mask1d[None, :, None] + mask1d[None, None, :]
            - mask1d[:, None, None] * mask1d[None, :, None]
            - mask1d[:, None, None] * mask1d[None, None, :]
            - mask1d[None, :, None] * mask1d[None, None, :]
            + mask1d[:, None, None] * mask1d[None, :, None] * mask1d[None, None, :]
        ).reshape(-1)

    else:
        raise NotImplementedError("Dimension %d not supported" % dim)

# }}} End filters for box-specific operators