from __future__ import absolute_import, division, print_function
import six
__copyright__ = "Copyright (C) 2018 Xiaoyu Wei"
__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import numpy as np
import loopy as lp
import pyopencl as cl
import pymbolic as pmbl
from pytools import memoize_method
from pymbolic.primitives import Variable as VariableType
from pymbolic.primitives import Expression as ExpressionType
import logging
logger = logging.getLogger(__name__)
# {{{ clean files
[docs]def clean_file(filename, new_name=None):
"""Remove/rename file if exists.
Fails silently when the file does not exist.
Useful for, for example, writing output files that
are meant to overwrite existing ones.
"""
import os
if new_name is None:
try:
os.remove(filename)
except OSError:
pass
else:
try:
os.rename(filename, new_name)
except OSError:
pass
# }}} End clean files
# {{{ loopy kernel cache wrapper
class KernelCacheWrapper(object):
# FIXME: largely code duplication with sumpy.
def __init__(self):
self.name = "KernelCacheWrapper"
raise RuntimeError(
"KernelCacheWrapper objects should not be constructed")
def get_cache_key(self):
raise NotImplementedError("Unimplemented cache key")
def get_kernel(self):
raise NotImplementedError()
def get_optimized_kernel(self):
raise NotImplementedError()
@memoize_method
def get_cached_optimized_kernel(self, **kwargs):
from sumpy import code_cache, CACHING_ENABLED, OPT_ENABLED
if CACHING_ENABLED:
import loopy.version
from sumpy.version import KERNEL_VERSION as SUMPY_KERNEL_VERSION
from volumential.version import KERNEL_VERSION
cache_key = (
self.get_cache_key()
+ tuple(sorted(six.iteritems(kwargs)))
+ (loopy.version.DATA_MODEL_VERSION,)
+ (SUMPY_KERNEL_VERSION,)
+ (KERNEL_VERSION,)
+ (OPT_ENABLED,))
try:
result = code_cache[cache_key]
logger.debug("%s: kernel cache hit [key=%s]" % (
self.name, cache_key))
return result
except KeyError:
pass
logger.info("%s: kernel cache miss" % self.name)
if CACHING_ENABLED:
logger.info("%s: kernel cache miss [key=%s]" % (
self.name, cache_key))
from pytools import MinRecursionLimit
with MinRecursionLimit(3000):
if OPT_ENABLED:
knl = self.get_optimized_kernel(**kwargs)
else:
knl = self.get_kernel()
if CACHING_ENABLED:
code_cache.store_if_not_present(cache_key, knl)
return knl
# }}} End loopy kernel cache wrapper
# {{{ scalar field expression eval
[docs]class ScalarFieldExpressionEvaluation(KernelCacheWrapper):
"""
Evaluate a field funciton on a set of D-d points.
Useful for imposing analytic conditions efficiently.
"""
def __init__(self, dim, expression, variables=None, dtype=np.float64,
function_manglers=None, preamble_generators=None):
"""
:arg dim
:arg expression A pymbolic expression for the function
:arg variables A list of variables representing spacial coordinates
"""
assert dim > 0
self.dim = dim
assert isinstance(expression, (ExpressionType, int, float, complex))
self.expr = expression
if variables is None:
self.vars = [pmbl.var("x%d" % d) for d in range(self.dim)]
else:
assert isinstance(variables, list)
for var in variables:
assert isinstance(var, VariableType)
self.vars = variables
self.dtype = dtype
self.function_manglers = function_manglers
self.preamble_generators = preamble_generators
self.name = "ScalarFieldExpressionEvaluation"
def get_cache_key(self):
return (
type(self).__name__,
str(self.dim) + "D",
self.expr.__str__(),
",".join([x.__str__() for x in self.vars]),
)
def get_normalised_expr(self):
nexpr = self.expr
nvars = [pmbl.var("x%d" % d) for d in range(self.dim)]
for var, nvar in zip(self.vars, nvars):
nexpr = pmbl.substitute(nexpr, {var: nvar})
return nexpr
def get_variable_assignment_code(self):
if self.dim == 1:
return "<> x0 = target_points[0, itgt]"
elif self.dim == 2:
return """<> x0 = target_points[0, itgt]
<> x1 = target_points[1, itgt]"""
elif self.dim == 3:
return """<> x0 = target_points[0, itgt]
<> x1 = target_points[1, itgt]
<> x2 = target_points[2, itgt]"""
else:
raise NotImplementedError
def get_kernel(self, **kwargs):
extra_kernel_kwarg_types = ()
if "extra_kernel_kwarg_types" in kwargs:
extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"]
eval_inames = frozenset(["itgt"])
scalar_assignment = lp.Assignment(
id=None,
assignee="expr_val",
expression=self.get_normalised_expr(),
temp_var_type=None,
)
eval_insns = [
insn.copy(within_inames=insn.within_inames | eval_inames)
for insn in [scalar_assignment]
]
loopy_knl = lp.make_kernel( # NOQA
"{ [itgt]: 0<=itgt<n_targets }",
[
"""
for itgt
VAR_ASSIGNMENT
end
""".replace(
"VAR_ASSIGNMENT", self.get_variable_assignment_code()
)
]
+ eval_insns
+ [
"""
for itgt
result[itgt] = expr_val
end
"""
],
[
lp.ValueArg("dim, n_targets", np.int32),
lp.GlobalArg("target_points", np.float64, "dim, n_targets"),
lp.TemporaryVariable("expr_val", None, ()),
]
+ list(extra_kernel_kwarg_types)
+ ["...", ],
name="eval_expr",
lang_version=(2018, 2),
)
loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim)
loopy_knl = lp.set_options(loopy_knl, write_cl=False)
loopy_knl = lp.set_options(loopy_knl, return_dict=True)
if self.function_manglers is not None:
loopy_knl = lp.register_function_manglers(
loopy_knl,
self.function_manglers)
if self.preamble_generators is not None:
loopy_knl = lp.register_preamble_generators(
loopy_knl,
self.preamble_generators)
return loopy_knl
def get_optimized_kernel(self, ncpus=None, **kwargs):
knl = self.get_kernel(**kwargs)
if ncpus is None:
import multiprocessing
# NOTE: this detects the number of logical cores, which
# may result in suboptimal performance.
ncpus = multiprocessing.cpu_count()
knl = lp.split_iname(
knl,
split_iname="itgt",
inner_length=ncpus,
inner_tag="g.0")
return knl
def __call__(self, queue, target_points, **kwargs):
"""
:arg target_points
:arg extra_kernel_kwargs
"""
# handle target_points given as an obj_array of coords
if (isinstance(target_points, np.ndarray)
and target_points.dtype == np.object
and isinstance(target_points[0], cl.array.Array)):
target_points = cl.array.concatenate(
target_points).reshape([self.dim, -1])
assert target_points.shape[0] == self.dim
n_tgt_points = target_points[0].shape[0]
for tgt_d in target_points:
assert len(tgt_d) == n_tgt_points
extra_kernel_kwargs = {}
if "extra_kernel_kwargs" in kwargs:
extra_kernel_kwargs = kwargs["extra_kernel_kwargs"]
knl = self.get_cached_optimized_kernel()
# FIXME: caching loses function mangler information
if self.function_manglers is not None:
knl = lp.register_function_manglers(knl, self.function_manglers)
if self.preamble_generators is not None:
knl = lp.register_preamble_generators(knl, self.preamble_generators)
evt, res = knl(
queue,
target_points=target_points,
n_targets=n_tgt_points,
result=np.zeros(n_tgt_points, dtype=self.dtype),
**extra_kernel_kwargs
)
return res["result"]
# }}} End scalar field expression eval
# {{{ import code
[docs]def import_code(code, name, add_to_sys_modules=True):
"""Dynamically generates a module.
:arg code: can be any object containing code -- string, file object, or
compiled code object. Returns a new module object initialized
by dynamically importing the given code and optionally adds it
to sys.modules under the given name.
"""
import imp
module = imp.new_module(name)
if add_to_sys_modules:
import sys
sys.modules[name] = module
exec(code, module.__dict__)
return module
# }}} End import code
# {{{ box-specific maps
[docs]class BoxSpecificMap(KernelCacheWrapper):
"""
Box-specific transform that maps between datum defined on quadrature
nodes. Being box-specific means that the transform for each box is
independent from the rest of the boxes.
"""
pass
# {{{ discrete Legendre transform
# }}} End discrete Legendre transform
# {{{ inverse discrete Legendre transform
# }}} End inverse discrete Legendre transform
# }}} End box-specific maps
# {{{ box-specific reductions
[docs]class BoxSpecificReduction(KernelCacheWrapper):
"""
Box-specific reduction that maps for each box a data vector defined
on the quadrature nodes to a scalar.
Being box-specific means that the reductions for each box is
independent from the rest of the boxes.
"""
pass
# {{{ sum
[docs]class BoxSum(BoxSpecificReduction):
"""
Adds up nodal values within each box.
"""
def __init__(self, dim, degree):
"""
:arg dim
:arg degree Number of nodes in each axis direction.
"""
assert dim > 0
self.dim = dim
assert degree > 0
self.degree = degree
self.name = "BoxSum"
def get_cache_key(self):
return (
type(self).__name__,
str(self.dim) + "D",
"degree=%d" % self.degree
)
def get_kernel(self, **kwargs):
loopy_knl = lp.make_kernel( # NOQA
[
"{ [ bid ] : 0 <= bid < n_boxes }",
"{ [ nid ] : 0 <= nid < n_box_nodes }"
],
[
"""
for bid
<> box_id = boxes[bid]
<> box_node_beg = box_node_starts[box_id]
result[bid] = sum(nid,
func[box_node_beg + nid]
* filter_multiplier[nid])
end
"""
],
[
lp.ValueArg("n_box_nodes, n_boxes", np.int32),
lp.GlobalArg("filter_multiplier", np.float64, "n_box_nodes"),
lp.GlobalArg("func", np.float64, "n_box_nodes * n_boxes"),
"...",
],
name="box_filtered_sum",
lang_version=(2018, 2),
)
loopy_knl = lp.set_options(loopy_knl, write_cl=False)
loopy_knl = lp.set_options(loopy_knl, return_dict=True)
return loopy_knl
def get_optimized_kernel(self, ncpus=None, **kwargs):
knl = self.get_kernel(**kwargs)
if ncpus is None:
import multiprocessing
ncpus = multiprocessing.cpu_count()
knl = lp.split_iname(
knl,
split_iname="bid",
inner_length=ncpus,
inner_tag="g.0")
return knl
def __call__(self, queue, traversal, nodal_vals, filtering=None, **kwargs):
"""
:arg traversal
:arg nodal_vals CL array of nodal values.
:arg filtering Box-wide filter given by an CL array or None.
.. warning::
The output of this kernel is ordered in :mod:`boxtree`'s box ids.
It may not be the same as the order implied by the input (e.g.
box mesh generated by dealii).
"""
if filtering is None:
filter_multiplier = 1 + cl.array.zeros(queue,
self.degree**self.dim, np.float64)
elif isinstance(filtering, cl.array.Array):
assert filtering.shape == (self.degree**self.dim,)
filter_multiplier = filtering
else:
raise RuntimeError("Invalid filtering argument: %s"
% str(filtering))
knl = self.get_cached_optimized_kernel()
n_boxes = traversal.target_boxes.shape[0]
evt, res = knl(
queue,
boxes=traversal.target_boxes,
box_node_starts=traversal.tree.box_target_starts,
func=nodal_vals,
n_box_nodes=self.degree**self.dim,
n_boxes=n_boxes,
filter_multiplier=filter_multiplier,
result=cl.array.zeros(queue, n_boxes, nodal_vals.dtype),
)
return res["result"]
# }}} End sum
# }}} End box-specific reductions
# {{{ filters for box-specific operators
[docs]def generate_leading_order_filtering(dim, n_dofs):
"""Returns a filtering vector that is an indicator function of the node
that corresponds to the leading order modal values in the Fourier space.
"""
mask1d = np.zeros(n_dofs)
mask1d[-1] = 1
if dim == 1:
return mask1d
elif dim == 2:
return (
mask1d[:, None] + mask1d[None, :]
- mask1d[:, None] * mask1d[None, :]
).reshape(-1)
elif dim == 3:
return (
mask1d[:, None, None] + mask1d[None, :, None] + mask1d[None, None, :]
- mask1d[:, None, None] * mask1d[None, :, None]
- mask1d[:, None, None] * mask1d[None, None, :]
- mask1d[None, :, None] * mask1d[None, None, :]
+ mask1d[:, None, None] * mask1d[None, :, None] * mask1d[None, None, :]
).reshape(-1)
else:
raise NotImplementedError("Dimension %d not supported" % dim)
# }}} End filters for box-specific operators