Source code for volumential.droste

from __future__ import absolute_import, division, print_function

__copyright__ = "Copyright (C) 2018 Xiaoyu Wei"

__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""

import numpy as np
import loopy as lp
from volumential.tools import KernelCacheWrapper

import logging

logger = logging.getLogger(__name__)

__doc__ = """
.. autoclass:: DrosteBase
   :members:
.. autoclass:: DrosteFull
   :members:
.. autoclass:: DrosteReduced
   :members:
.. autoclass:: InverseDrosteReduced
   :members:
"""

if 0:
    logging.basicConfig(level=logging.INFO)

# {{{ Droste base class


[docs]class DrosteBase(KernelCacheWrapper): """ Base class for Droste methods. It uses sumpy tools to cache the loopy kernel. .. attribute:: integral_knl The integral kernel of sumpy.kernel type. .. attribute:: interaction_case_vecs The relative positions of the target box for each case. .. attribute:: interaction_case_scls The relative sizes of the target box for each case. .. attribute:: special_radial_quadrature If True, the radial direction uses a different quadrature rule. The radial direction is identified with the condition ``ibrick_axis == iaxis`` and always uses iname ``q0``. """ def __init__(self, integral_knl, quad_order, case_vecs, n_brick_quad_points, special_radial_quadrature, nradial_quad_points): from sumpy.kernel import Kernel if integral_knl is not None: assert isinstance(integral_knl, Kernel) self.integral_knl = integral_knl self.dim = integral_knl.dim else: self.integral_knl = None self.dim = None if case_vecs is not None: self.ncases = len(case_vecs) for cvec in case_vecs: assert len(cvec) == self.dim self.interaction_case_vecs = np.array( [[v[d] for v in case_vecs] for d in range(self.dim)] ) else: self.ncases = 0 case_vecs = list() self.interaction_case_vecs = np.array([]) self.interaction_case_scls = np.array( [ 1 if int(max(abs(np.array(vec)))) == 0 else max([abs(cvc) - 0.5 for cvc in np.array(vec) / 4]) * 2 for vec in case_vecs ] ) self.nfunctions = quad_order self.ntgt_points = quad_order self.nquad_points = n_brick_quad_points if special_radial_quadrature and (self.dim == 1): raise ValueError( "please only use normal quadrature nodes for 1D quadrature") self.special_radial_quadrature = special_radial_quadrature self.nradial_quad_points = nradial_quad_points self.brick_quadrature_kwargs = self.make_brick_quadrature_kwargs() if self.dim is not None: self.n_q_points = quad_order ** self.dim else: self.n_q_points = None self.name = "DrosteBase" def make_basis_vars(self): d = self.dim self.basis_vars = ["f%d" % i for i in range(d)] return self.basis_vars def make_tgt_vars(self): d = self.dim self.tgt_vars = ["t%d" % i for i in range(d)] return self.tgt_vars def make_quad_vars(self): d = self.dim self.quad_vars = ["q%d" % i for i in range(d)] return self.quad_vars def make_basis_eval_vars(self): d = self.dim self.basis_eval_vars = ["p%d" % i for i in range(d)] return self.basis_eval_vars def make_pwaffs(self): import islpy as isl self.pwaffs = isl.make_zero_and_vars( self.tgt_vars + self.basis_vars + self.basis_eval_vars + self.quad_vars + ["iside", "iaxis", "ibrick_side", "ibrick_axis", "ilevel", "icase"], ["nlevels"], ) return self.pwaffs def make_brick_domain(self, variables, n, lbound=0, ubound=0): pwaffs = self.pwaffs if isinstance(n, str): ubound_pwaff = pwaffs[n] + ubound else: ubound_pwaff = pwaffs[0] + n from functools import reduce import operator return reduce( operator.and_, [(pwaffs[0] + lbound).le_set(pwaffs[var]) for var in variables] + [pwaffs[var].lt_set(ubound_pwaff) for var in variables], )
[docs] def make_brick_quadrature_kwargs(self): """Produce 1D quadrature formulae used for each brick. The rules returned are defined over [0, 1]. """ # sps.legendre blows up easily at high order import scipy.special as sps # legendre_nodes, _, legendre_weights = sps.legendre( # nquad_points).weights.T legendre_nodes, legendre_weights = sps.p_roots(self.nquad_points) legendre_nodes = legendre_nodes * 0.5 + 0.5 legendre_weights = legendre_weights * 0.5 if self.special_radial_quadrature: # Since there is no exact degree -> nqpts map for tanh-sinh rule, # we find the largest rule whose size dose not exceed the parameter # nradial_quad_points import mpmath mp_ctx = mpmath.mp mp_ctx.dps = 20 # decimal precision mp_ctx.pretty = True ts = mpmath.calculus.quadrature.TanhSinh(mp_ctx) prec = int(np.log(10) / np.log(2) * mp_ctx.dps) # bits of precision for deg in range(1, 100): nodes = ts.calc_nodes(degree=deg, prec=prec) if len(nodes) >= self.nradial_quad_points: self.nradial_quad_points = len(nodes) break # extract quadrature formula over [-1, 1], note the 0.5**level scaling ts_nodes = np.array([p[0] for p in nodes], dtype=np.float64) ts_weights = np.array( [p[1] * 2 * mp_ctx.power(0.5, deg) for p in nodes], dtype=np.float64) if deg == 1: ts_weights *= 0.5 # peculiar scaling when deg=1 # map to [0, 1] ts_nodes = ts_nodes * 0.5 + 0.5 ts_weights *= 0.5 return dict(quadrature_nodes=legendre_nodes, quadrature_weights=legendre_weights, radial_quadrature_nodes=ts_nodes, radial_quadrature_weights=ts_weights) else: return dict(quadrature_nodes=legendre_nodes, quadrature_weights=legendre_weights)
def get_sumpy_kernel_insns(self): # get sumpy kernel insns from sumpy.symbolic import make_sym_vector dvec = make_sym_vector("dist", self.dim) from sumpy.assignment_collection import SymbolicAssignmentCollection sac = SymbolicAssignmentCollection() result_name = sac.assign_unique( "knl_val", self.integral_knl.postprocess_at_target( self.integral_knl.postprocess_at_source( self.integral_knl.get_expression(dvec), dvec ), dvec, ), ) sac.run_global_cse() import six from sumpy.codegen import to_loopy_insns loopy_insns = to_loopy_insns( six.iteritems(sac.assignments), vector_names=set(["dist"]), pymbolic_expr_maps=[self.integral_knl.get_code_transformer()], retain_names=[result_name], complex_dtype=np.complex128, ) return loopy_insns def get_sumpy_kernel_eval_insns(self): # actual integral kernel evaluation (within proper inames) knl_loopy_insns = self.get_sumpy_kernel_insns() quad_inames = frozenset( ["icase"] + self.tgt_vars + self.basis_vars + ["ilevel", "ibrick_axis", "ibrick_side"] + self.quad_vars ) quad_kernel_insns = [ insn.copy(within_inames=insn.within_inames | quad_inames) for insn in knl_loopy_insns ] from sumpy.symbolic import SympyToPymbolicMapper sympy_conv = SympyToPymbolicMapper() scaling_assignment = lp.Assignment( id=None, assignee="knl_scaling", expression=sympy_conv(self.integral_knl.get_global_scaling_const()), temp_var_type=lp.Optional(), ) return quad_kernel_insns + [scaling_assignment]
[docs] def codegen_basis_eval(self, iaxis): """Generate instructions to evaluate Chebyshev polynomial basis. (Chebyshev polynomials of the first kind T_n). """ code = """ <> T0_IAXIS = 1 <> T1_IAXIS = template_mapped_point_tmp[IAXIS] {dep=mpoint} <> Tprev_IAXIS = T0_IAXIS {id=t0_IAXIS} <> Tcur_IAXIS = T1_IAXIS {id=t1_IAXIS,dep=t0_IAXIS} for pIAXIS <> Tnext_IAXIS = (2 * template_mapped_point_tmp[IAXIS] * Tcur_IAXIS - Tprev_IAXIS) {id=tnextIAXIS,dep=t1_IAXIS} Tprev_IAXIS = Tcur_IAXIS {id=tprev_updateIAXIS,dep=tnextIAXIS} Tcur_IAXIS = Tnext_IAXIS {id=tcur_updateIAXIS,dep=tprev_updateIAXIS} end <> basis_evalIAXIS = ( T0_IAXIS * if(fIAXIS == 0, 1, 0) + T1_IAXIS * if(fIAXIS == 1, 1, 0) + simul_reduce(sum, pIAXIS, if(fIAXIS >= 2 and fIAXIS == pIAXIS + 2, Tcur_IAXIS, 0)) ) {id=basisIAXIS,dep=tcur_updateIAXIS} """.replace( "IAXIS", str(iaxis) ) return code
def make_dim_independent(self, knlstring): # replace REV_* first resknl = knlstring.replace("REV_BASIS_VARS", ",".join(self.basis_vars[::-1])) resknl = resknl.replace("REV_TGT_VARS", ",".join(self.tgt_vars[::-1])) resknl = resknl.replace("BASIS_VARS", ",".join(self.basis_vars)) resknl = resknl.replace("TGT_VARS", ",".join(self.tgt_vars)) resknl = resknl.replace("QUAD_VARS", ",".join(self.quad_vars)) resknl = resknl.replace( "POSTPROCESS_KNL_VAL", "<> knl_val_post = knl_val {id=pp_kval}" ) if self.dim == 1: resknl = resknl.replace("TPLTGT_ASSIGNMENT", """target_nodes[t0]""") resknl = resknl.replace("QUAD_PT_ASSIGNMENT", """quadrature_nodes[q0]""") resknl = resknl.replace("PROD_QUAD_WEIGHT", """quadrature_weights[q0]""") resknl = resknl.replace("DENSITY_VAL_ASSIGNMENT", """basis_eval0""") resknl = resknl.replace( "PREPARE_BASIS_VALS", "\n".join([self.codegen_basis_eval(i) for i in range(self.dim)]) + """ ... nop {id=basis_evals,dep=basis0} """, ) elif self.dim == 2: resknl = resknl.replace( "TPLTGT_ASSIGNMENT", """if(iaxis == 0, target_nodes[t0], target_nodes[t1])""", ) if self.special_radial_quadrature: resknl = resknl.replace( "QUAD_PT_ASSIGNMENT", """if(iaxis == ibrick_axis, radial_quadrature_nodes[q0], quadrature_nodes[q1])""", ) resknl = resknl.replace( "PROD_QUAD_WEIGHT", """radial_quadrature_weights[q0] * quadrature_weights[q1]""") else: resknl = resknl.replace( "QUAD_PT_ASSIGNMENT", """if(iaxis == 0, quadrature_nodes[q0], quadrature_nodes[q1])""", ) resknl = resknl.replace( "PROD_QUAD_WEIGHT", """quadrature_weights[q0] * quadrature_weights[q1]""") resknl = resknl.replace( "DENSITY_VAL_ASSIGNMENT", """basis_eval0 * basis_eval1""" ) resknl = resknl.replace( "PREPARE_BASIS_VALS", "\n".join([self.codegen_basis_eval(i) for i in range(self.dim)]) + """ ... nop {id=basis_evals,dep=basis0:basis1} """, ) elif self.dim == 3: resknl = resknl.replace( "TPLTGT_ASSIGNMENT", """if(iaxis == 0, target_nodes[t0], if( iaxis == 1, target_nodes[t1], target_nodes[t2]))""", ) if self.special_radial_quadrature: # depending on ibrick_axis, axes could be # [q0, q1, q2], [q1, q0, q2], or [q1, q2, q0] resknl = resknl.replace( "QUAD_PT_ASSIGNMENT", """if(iaxis == ibrick_axis, radial_quadrature_nodes[q0], if( (iaxis == 0) or ((iaxis == 1) and (ibrick_axis == 0)), quadrature_nodes[q1], quadrature_nodes[q2]))""", ) resknl = resknl.replace( "PROD_QUAD_WEIGHT", """radial_w[q0] * w[q1] * w[q2]""".replace( "w", "quadrature_weights")) else: resknl = resknl.replace( "QUAD_PT_ASSIGNMENT", """if(iaxis == 0, quadrature_nodes[q0], if( iaxis == 1, quadrature_nodes[q1], quadrature_nodes[q2]))""") resknl = resknl.replace( "PROD_QUAD_WEIGHT", """w[q0] * w[q1] * w[q2]""".replace("w", "quadrature_weights")) resknl = resknl.replace( "DENSITY_VAL_ASSIGNMENT", """basis_eval0 * basis_eval1 * basis_eval2""" ) resknl = resknl.replace( "PREPARE_BASIS_VALS", "\n".join([self.codegen_basis_eval(i) for i in range(self.dim)]) + """ ... nop {id=basis_evals,dep=basis0:basis1:basis2} """, ) else: raise NotImplementedError return resknl
[docs] def make_result_array(self, **kwargs): """Allocate memory space for results. """ # by default uses double type returns if "result_dtype" in kwargs: result_dtype = kwargs["result_dtype"] else: if self.integral_knl.is_complex_valued: result_dtype = np.complex128 else: result_dtype = np.float64 # allocate return arrays if self.dim == 1: result_array = ( np.zeros( (self.nfunctions, self.ntgt_points, self.ncases), result_dtype ) + np.nan) elif self.dim == 2: result_array = ( np.zeros( ( self.nfunctions, self.nfunctions, self.ntgt_points, self.ntgt_points, self.ncases, ), result_dtype ) + np.nan ) elif self.dim == 3: result_array = ( np.zeros( ( self.nfunctions, self.nfunctions, self.nfunctions, self.ntgt_points, self.ntgt_points, self.ntgt_points, self.ncases, ), result_dtype ) + np.nan ) else: raise NotImplementedError return result_array
def get_kernel_code(self): return [ self.make_dim_independent( """ # noqa for iaxis <> root_center[iaxis] = 0.5 * ( root_brick[iaxis, 1] + root_brick[iaxis, 0]) {dup=iaxis} <> root_extent[iaxis] = (root_brick[iaxis, 1] - root_brick[iaxis, 0]) {dup=iaxis} end for ilevel, BASIS_VARS, TGT_VARS, icase # Targets outside projected onto the boundary for iaxis <> template_target[iaxis] = TPLTGT_ASSIGNMENT \ {id=tplt_tgt_pre,dup=iaxis} end # True targets are used for kernel evaluation for iaxis <> true_target[iaxis] = ( root_center[iaxis] + interaction_case_vecs[iaxis, icase] * 0.25 * root_extent[iaxis] + interaction_case_scls[icase] * (template_target[iaxis] - 0.5) * root_extent[iaxis] ) {id=true_targets,dup=iaxis,dep=tplt_tgt_pre} # Re-map the mapped points to [-1,1]^dim for Chebyshev evals <> template_true_target[iaxis] = 0.0 + ( true_target[iaxis] - root_center[iaxis] ) / root_extent[iaxis] * 2 {id=template_true_targets,dup=iaxis,dep=true_targets} end # Projected targets are used for brick construction for iaxis <> target[iaxis] = if( true_target[iaxis] > root_brick[iaxis, 1], root_brick[iaxis, 1], if( true_target[iaxis] < root_brick[iaxis, 0], root_brick[iaxis, 0], true_target[iaxis])) {dup=iaxis,dep=true_targets} end for iaxis template_target[iaxis] = (0.5 + interaction_case_vecs[iaxis, icase] * 0.25 + interaction_case_scls[icase] * (template_target[iaxis] - 0.5) ) {id=tplt_tgt3,dup=iaxis,dep=tplt_tgt_pre:true_targets} end for iaxis template_target[iaxis] = if( template_target[iaxis] > 1, 1, if( template_target[iaxis] < 0, 0, template_target[iaxis]) ) {id=tplt_tgt4,dup=iaxis,dep=tplt_tgt3} end ... nop {id=tplt_tgt,dep=tplt_tgt_pre:tplt_tgt4:true_targets} # Debug output #for iaxis # projected_template_targets[TGT_VARS, icase, iaxis] = \ # template_target[iaxis] {dup=iaxis,dep=tplt_tgt} #end # Debug output #for iaxis # target_points[ilevel, BASIS_VARS, TGT_VARS, icase, iaxis # ] = true_target[iaxis] {dup=iaxis,dep=true_targets} #end for iaxis, iside <> outer_brick[iaxis, iside] = ( (alpha**ilevel)*root_brick[iaxis,iside] + (1-alpha**ilevel) * target[iaxis]) {dup=iaxis:iside} <> inner_brick[iaxis, iside] = ( alpha*outer_brick[iaxis,iside] + (1-alpha) * target[iaxis]) {dup=iaxis:iside} end for iaxis <> ob_ext[iaxis] = (outer_brick[iaxis, 1] - outer_brick[iaxis, 0]) {dup=iaxis} end for ibrick_axis, ibrick_side, QUAD_VARS for iaxis <> point[iaxis] = QUAD_PT_ASSIGNMENT {dup=iaxis} end <> deform = point[ibrick_axis]*(1-alpha) for iaxis if iaxis == ibrick_axis <> mapped_point_tmp[iaxis] = ( point[ibrick_axis]*( inner_brick[ibrick_axis, ibrick_side] - outer_brick[ibrick_axis, ibrick_side]) + outer_brick[ibrick_axis, ibrick_side]) \ {id=mpoint1,nosync=mpoint2} else <> pre_scale = ( point[iaxis] + deform*(template_target[iaxis]-point[iaxis]) ) {dep=tplt_tgt} mapped_point_tmp[iaxis] = \ ob_ext[iaxis] * pre_scale + outer_brick[iaxis, 0] \ {id=mpoint2,nosync=mpoint1} end ... nop {id=mpoint,dep=mpoint1:mpoint2} # Re-map the mapped points to [-1,1]^dim for Chebyshev evals <> template_mapped_point_tmp[iaxis] = 0.0 + ( mapped_point_tmp[iaxis] - root_center[iaxis] ) / root_extent[iaxis] * 2 {dep=mpoint} # Debug output #mapped_points[ilevel, ibrick_axis, # ibrick_side, TGT_VARS, icase, iaxis, QUAD_VARS] = \ # mapped_point_tmp[iaxis] {dep=mpoint} #mapped_target[TGT_VARS, icase, iaxis] = target[iaxis] end for iaxis if iaxis == ibrick_axis <> jac_part = ( inner_brick[ibrick_axis, ibrick_side] - outer_brick[ibrick_axis, ibrick_side] ) {id=jpart1} else jac_part = ob_ext[iaxis] * (1-deform) {id=jpart2} end end <> jacobian = abs(product(iaxis, jac_part)) {id=jac,dep=jpart1:jpart2} <> dist[iaxis] = (true_target[iaxis] - mapped_point_tmp[iaxis]) {dep=mpoint:true_targets} # optional postprocessing of kernel values POSTPROCESS_KNL_VAL # in our case 0 * inf = 0 <> knl_val_finished = if(abs(knl_val_post) > 1e16, 0, knl_val_post) {id=finish_kval,dep=pp_kval} end end for BASIS_VARS, TGT_VARS, icase for ilevel for ibrick_axis, ibrick_side, QUAD_VARS PREPARE_BASIS_VALS <> density_val = DENSITY_VAL_ASSIGNMENT \ {id=density,dep=basis_evals} end end end for BASIS_VARS, TGT_VARS, icase # the first index is contiguous result[REV_BASIS_VARS, REV_TGT_VARS, icase] = (sum( (ilevel, ibrick_axis, ibrick_side, QUAD_VARS), PROD_QUAD_WEIGHT * jacobian * knl_val_finished * density_val ) * knl_scaling ) {id=result,dep=jac:mpoint:density:finish_kval} end """) ] def get_target_points(self, queue=None): import volumential.meshgen as mg q_points, _, _ = mg.make_uniform_cubic_grid( degree=self.ntgt_points, level=1, dim=self.dim, queue=queue) # map to [0,1]^d mapped_q_points = np.array( [ 0.5 * (qp + np.ones(self.dim)) for qp in q_points ] ) # sort in dictionary order, preserve only the leading # digits to prevent floating point errors from polluting # the ordering. q_points_ordering = sorted( range(len(mapped_q_points)), key=lambda i: list(np.floor(mapped_q_points[i] * 10000)), ) return mapped_q_points[q_points_ordering] def postprocess_cheb_table(self, cheb_table, cheb_coefs): nfp_table = np.zeros( [self.n_q_points, ] + list(cheb_table.shape[self.dim:]), dtype=cheb_table.dtype) # transform to interpolatory basis functions # NOTE: the reversed order of indices, e.g., # mccoefs[f0, f1, f2], and cheb_table[f2, f1, f0] concat_axes = [iaxis for iaxis in range(self.dim)] for mid in range(self.n_q_points): mccoefs = cheb_coefs[mid] nfp_table[mid] = np.tensordot( mccoefs.reshape([self.nfunctions for iaxis in range(self.dim)]), cheb_table, axes=(concat_axes, concat_axes[::-1]), ) # transform to self.data format (icase, source_id, target_id) # NOTE: the directions of target_id are reversed since cheb_table # is indexed by, e.g. cheb_table[t2, t1, t0] transpose_axes = (self.dim + 1, 0) + tuple( self.dim - i for i in range(self.dim) ) return nfp_table.transpose(transpose_axes).reshape(-1, order="C")
# }}} End Droste base class # {{{ full Droste method
[docs]class DrosteFull(DrosteBase): """ Build the full table directly. """ def __init__(self, integral_knl, quad_order, case_vecs, n_brick_quad_points=50, special_radial_quadrature=False, nradial_quad_points=None): super(DrosteFull, self).__init__( integral_knl, quad_order, case_vecs, n_brick_quad_points, special_radial_quadrature, nradial_quad_points) self.name = "DrosteFull" def make_loop_domain(self): tgt_vars = self.make_tgt_vars() quad_vars = self.make_quad_vars() basis_vars = self.make_basis_vars() basis_eval_vars = self.make_basis_eval_vars() pwaffs = self.make_pwaffs() # noqa: F841 if self.special_radial_quadrature: quad_vars_subdomain = self.make_brick_domain( quad_vars[1:], self.nquad_points) & self.make_brick_domain( quad_vars[:1], self.nradial_quad_points) else: quad_vars_subdomain = self.make_brick_domain( quad_vars, self.nquad_points) self.loop_domain = ( self.make_brick_domain(tgt_vars, self.ntgt_points) & quad_vars_subdomain & self.make_brick_domain(basis_vars, self.nfunctions) & self.make_brick_domain(basis_eval_vars, self.nfunctions) & self.make_brick_domain(["iside"], 2) & self.make_brick_domain(["iaxis"], self.dim) & self.make_brick_domain(["ibrick_side"], 2) & self.make_brick_domain(["ibrick_axis"], self.dim) & self.make_brick_domain(["ilevel"], "nlevels") & self.make_brick_domain(["icase"], self.ncases) ) return self.loop_domain def get_kernel(self, **kwargs): domain = self.make_loop_domain() extra_kernel_kwarg_types = () if "extra_kernel_kwarg_types" in kwargs: extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"] loopy_knl = lp.make_kernel( # NOQA [domain], self.get_kernel_code() + self.get_sumpy_kernel_eval_insns(), [ lp.ValueArg("alpha", np.float64), lp.ValueArg("n_cases, nfunctions, quad_order, dim", np.int32), lp.GlobalArg("interaction_case_vecs", np.float64, "dim, n_cases"), lp.GlobalArg("interaction_case_scls", np.float64, "n_cases"), lp.GlobalArg( "result", None, ", ".join( ["nfunctions" for d in range(self.dim)] + ["quad_order" for d in range(self.dim)] ) + ", n_cases", ), ] + list(extra_kernel_kwarg_types) + ["...", ], name="brick_map", lang_version=(2018, 2), ) loopy_knl = lp.fix_parameters(loopy_knl, d=self.dim) loopy_knl = lp.set_options(loopy_knl, write_cl=False) loopy_knl = lp.set_options(loopy_knl, return_dict=True) try: loopy_knl = self.integral_knl.prepare_loopy_kernel(loopy_knl) except Exception: # noqa: B902 pass return loopy_knl def get_optimized_kernel(self, ncpus=None, **kwargs): if ncpus is None: import multiprocessing # NOTE: this detects the number of logical cores, which # may result in suboptimal performance. ncpus = multiprocessing.cpu_count() knl = self.get_kernel(**kwargs) knl = lp.split_iname(knl, "icase", ncpus, inner_tag="g.0") knl = lp.add_inames_for_unused_hw_axes(knl) return knl
[docs] def call_loopy_kernel(self, queue, **kwargs): """ :param source_box_extent: :param alpha: :param nlevels: :param extra_kernel_kwargs: """ if "source_box_extent" in kwargs: assert kwargs["source_box_extent"] > 0 source_box_extent = kwargs["source_box_extent"] else: source_box_extent = 1 extra_kernel_kwargs = {} if "extra_kernel_kwargs" in kwargs: extra_kernel_kwargs = kwargs["extra_kernel_kwargs"] if "alpha" in kwargs: alpha = kwargs["alpha"] assert alpha >= 0 and alpha < 1 else: alpha = 0 if "nlevels" in kwargs: nlevels = kwargs["nlevels"] assert nlevels > 0 if nlevels > 1 and self.special_radial_quadrature: logger.warn("When using tanh-sinh quadrature in the radial " "direction, it is often best to use a single level.") else: # Single level is equivalent to Duffy transform nlevels = 1 missing_measure = (alpha ** nlevels * source_box_extent) ** self.dim if abs(missing_measure) > 1e-6: from warnings import warn warn( "Droste probably has too few levels, missing measure = " + str(missing_measure) ) if "result_array" in kwargs: result_array = kwargs["result_array"] else: result_array = self.make_result_array(**kwargs) # root brick root_brick = np.zeros((self.dim, 2)) root_brick[:, 1] = source_box_extent # target points in 1D q_points = self.get_target_points(queue) assert len(q_points) == self.ntgt_points ** self.dim t = np.array([pt[-1] for pt in q_points[: self.ntgt_points]]) knl = self.get_cached_optimized_kernel() evt, res = knl( queue, alpha=alpha, result=result_array, root_brick=root_brick, target_nodes=t.astype(np.float64, copy=True), interaction_case_vecs=self.interaction_case_vecs.astype( np.float64, copy=True), interaction_case_scls=self.interaction_case_scls.reshape(-1).astype( np.float64, copy=True), n_cases=self.ncases, nfunctions=self.nfunctions, quad_order=self.ntgt_points, nlevels=nlevels, **self.brick_quadrature_kwargs, **extra_kernel_kwargs) cheb_table = res["result"] return cheb_table
def get_cache_key(self): return ( type(self).__name__, str(self.dim) + "D", self.integral_knl.__str__(), "quad_order-" + str(self.ntgt_points), "brick_order-" + str(self.nquad_points), "brick_radial_order-" + str(self.nradial_quad_points), ) def __call__(self, queue, **kwargs): """ :arg source_box_extent :arg alpha :arg nlevels :arg extra_kernel_kwargs """ if "cheb_coefs" in kwargs: assert len(kwargs["cheb_coefs"]) == self.n_q_points for ccoef in kwargs["cheb_coefs"]: assert len(ccoef) == self.nfunctions ** self.dim cheb_coefs = kwargs["cheb_coefs"] return self.postprocess_cheb_table( self.call_loopy_kernel(queue, **kwargs), cheb_coefs ) else: logger.info("Returning cheb table directly") return self.call_loopy_kernel(queue, **kwargs)
# }}} End full Droste method # {{{ reduced Droste method
[docs]class DrosteReduced(DrosteBase): """ Reduce the workload by only building part of the table and infer the rest of the table by symmetry. """ def __init__( self, integral_knl=None, quad_order=None, case_vecs=None, n_brick_quad_points=50, knl_symmetry_tags=None, special_radial_quadrature=False, nradial_quad_points=None, ): super(DrosteReduced, self).__init__( integral_knl, quad_order, case_vecs, n_brick_quad_points, special_radial_quadrature, nradial_quad_points) from volumential.list1_symmetry import CaseVecReduction # by default only self-interaction is counted if case_vecs is None: case_vecs = [np.array([0, 0, 0]), ] self.reduce_by_symmetry = CaseVecReduction(case_vecs, knl_symmetry_tags) logger.info( "Reduction ratio by symmetry = " + str(self.reduce_by_symmetry.get_full_reduction_ratio()) ) self.nbcases = len(self.reduce_by_symmetry.reduced_vecs) self.loop_domains = [None for bvec in self.reduce_by_symmetry.reduced_vecs] # state of the object that determines behavior of functions like # get_kernel() # get_kernel() returns the kernel for actual quadrature when # get_kernel_id = 0, # while it returns the kernel for expansion within the same case when = 1. self.current_base_case = 0 self.get_kernel_id = 0 def make_loop_domain(self, base_case_id): # The icase is just a dummy variable that allows resusing some of the # code (e.g. kernel evals) from the base class assert base_case_id >= 0 assert base_case_id < self.nbcases tgt_vars = self.make_tgt_vars() # noqa: F841 quad_vars = self.make_quad_vars() basis_vars = self.make_basis_vars() basis_eval_vars = self.make_basis_eval_vars() pwaffs = self.make_pwaffs() if self.special_radial_quadrature: quad_vars_subdomain = self.make_brick_domain( quad_vars[1:], self.nquad_points) & self.make_brick_domain( quad_vars[:1], self.nradial_quad_points) else: quad_vars_subdomain = self.make_brick_domain( quad_vars, self.nquad_points) loop_domain_common_parts = ( quad_vars_subdomain & self.make_brick_domain(basis_vars, self.nfunctions) & self.make_brick_domain(basis_eval_vars, self.nfunctions) & self.make_brick_domain(["iside"], 2) & self.make_brick_domain(["iaxis"], self.dim) & self.make_brick_domain(["ibrick_side"], 2) & self.make_brick_domain(["ibrick_axis"], self.dim) & self.make_brick_domain(["ilevel"], "nlevels") & self.make_brick_domain(["icase"], 1) ) # target brick domain depends on symmetry group of the case vec # upper bounds account for flippings, and lower bounds account for swappings flippable, swappable_groups = self.reduce_by_symmetry.parse_symmetry_tags( self.reduce_by_symmetry.reduced_invariant_groups[base_case_id] ) prev_swappable = -np.ones(self.dim, dtype=np.int32) for iaxis in range(self.dim): for group in swappable_groups: if iaxis in group: axid = list(group).index(iaxis) if axid == 0: pass else: prev_swappable[iaxis] = list(group)[axid - 1] from functools import reduce import operator tgt_domain_ubounds = reduce( operator.and_, [ pwaffs["t" + str(iaxis)].lt_set( pwaffs[0] + ( (self.ntgt_points + 1) // 2 if flippable[iaxis] else self.ntgt_points ) ) for iaxis in range(self.dim) ], ) tgt_domain_lbounds = reduce( operator.and_, [ ( pwaffs[0] if prev_swappable[iaxis] == -1 else pwaffs["t" + str(prev_swappable[iaxis])] ).le_set(pwaffs["t" + str(iaxis)]) for iaxis in range(self.dim) ], ) tgt_domain = tgt_domain_ubounds & tgt_domain_lbounds self.loop_domains[base_case_id] = loop_domain_common_parts & tgt_domain return self.loop_domains[base_case_id]
[docs] def make_result_array(self, **kwargs): """Allocate memory space for results. """ # by default uses double type returns if "result_dtype" in kwargs: result_dtype = kwargs["result_dtype"] else: if self.integral_knl.is_complex_valued: result_dtype = np.complex128 else: result_dtype = np.float64 # allocate return arrays if self.dim == 1: result_array = ( np.zeros( (self.nfunctions, self.ntgt_points, 1), result_dtype ) + np.nan) elif self.dim == 2: result_array = ( np.zeros( ( self.nfunctions, self.nfunctions, self.ntgt_points, self.ntgt_points, 1, ), result_dtype ) + np.nan ) elif self.dim == 3: result_array = ( np.zeros( ( self.nfunctions, self.nfunctions, self.nfunctions, self.ntgt_points, self.ntgt_points, self.ntgt_points, 1, ), result_dtype ) + np.nan ) else: raise NotImplementedError return result_array
[docs] def get_kernel_expansion_by_symmetry_code(self): """ Extra assignments that performs expansion by symmetry within the current case. """ # case_id = self.reduce_by_symmetry.reduced_vec_ids[self.current_base_case] # case_vec = self.reduce_by_symmetry.reduced_vecs[self.current_base_case] invariant_group = self.reduce_by_symmetry.reduced_invariant_groups[ self.current_base_case ] flippable, swappable_groups = self.reduce_by_symmetry.parse_symmetry_tags( invariant_group ) nflippables = int(sum(flippable)) assert len(flippable) == self.dim flippable_ids = [i for i in range(self.dim) if flippable[i]] base_tgt_ordering = self.make_dim_independent("TGT_VARS").split(",") base_fun_ordering = self.make_dim_independent("BASIS_VARS").split(",") assert len(base_tgt_ordering) == self.dim def flip_tgt(tgt_var): assert isinstance(tgt_var, str) return str(self.ntgt_points) + " - " + tgt_var + " - 1" def conjunct(var): # conjunct(v_i) = v_{dim-1-i} assert isinstance(var, str) vid = int(var[1:]) assert vid >= 0 and vid < self.dim return var[0] + str(self.dim - 1 - vid) from itertools import product, permutations ext_ids = [ ext for ext in product( [fid for fid in product([0, 1], repeat=nflippables)], *[ [sid for sid in permutations(sgroup)] for sgroup in swappable_groups ] ) ] # The idea is that, any member of the hyperoctahedral group # has the decomposition m = f * s, where f is a flip and s # is a swap. # Conversely, by iterating all combinations of flips and swaps, # the whole group is iterated over. expansion_code = [] for ext_index, ext_id in zip(range(len(ext_ids)), ext_ids): ext_tgt_ordering = list(base_tgt_ordering) ext_fun_ordering = list(base_fun_ordering) # apply s for swap in ext_id[1:]: ns = len(swap) for i in range(ns): original_part = sorted(swap) ext_tgt_ordering[original_part[i]] = base_tgt_ordering[swap[i]] ext_fun_ordering[original_part[i]] = base_fun_ordering[swap[i]] # apply f, also figure out the rule for sign changes ext_sign = "1" for fid in range(nflippables): if ext_id[0][fid]: iaxis = flippable_ids[fid] itgt = ext_tgt_ordering.index("t" + str(iaxis)) ext_tgt_ordering[itgt] = flip_tgt(ext_tgt_ordering[itgt]) ext_sign = ( ext_sign + " * if(" + self.basis_vars[iaxis] + " % 2 == 0, 1, -1)" ) ext_tgt_ordering.reverse() ext_fun_ordering.reverse() ext_instruction_ids = ':'.join([ 'result_ext_' + str(eid) for eid in range(len(ext_ids)) if eid != ext_index ]) expansion_code += [ self.make_dim_independent( """ for BASIS_VARS, TGT_VARS, icase result[EXT_BASIS_VARS, EXT_TGT_VARS, icase] = ( EXT_SIGN * result[REV_BASIS_VARS, REV_TGT_VARS, icase] ) {id=result_ext_EXT_ID,nosync=EXT_INSN_IDS} end """.replace( "EXT_TGT_VARS", ",".join(ext_tgt_ordering) ) .replace("EXT_BASIS_VARS", ",".join(ext_fun_ordering)) .replace("EXT_SIGN", ext_sign) .replace("EXT_ID", str(ext_index)) .replace("EXT_INSN_IDS", ext_instruction_ids) ) ] return expansion_code
[docs] def get_kernel(self, **kwargs): """Reduced Droste is a 2-staged algorithm. The first stage uses the kernel from DrosteBase to build part of the table. In the second stage, an expansion kernel is called to fill the empty entries. """ domain = self.make_loop_domain(base_case_id=self.current_base_case) extra_kernel_kwarg_types = () if "extra_kernel_kwarg_types" in kwargs: extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"] if self.get_kernel_id == 0: loopy_knl = lp.make_kernel( # NOQA [domain], self.get_kernel_code() # FIXME: cannot have expansion in the same kernel, since it # will require a global barrier # + self.get_kernel_expansion_by_symmetry_code() + self.get_sumpy_kernel_eval_insns(), [ lp.ValueArg("alpha", np.float64), lp.ValueArg("n_cases, nfunctions, quad_order, dim", np.int32), lp.GlobalArg("interaction_case_vecs", np.float64, "dim, n_cases"), lp.GlobalArg("interaction_case_scls", np.float64, "n_cases"), lp.GlobalArg("target_nodes", np.float64, "quad_order"), lp.GlobalArg( "result", None, ", ".join( ["nfunctions" for d in range(self.dim)] + ["quad_order" for d in range(self.dim)] ) + ", n_cases", ), ] + list(extra_kernel_kwarg_types) + ["...", ], name="brick_map", lang_version=(2018, 2), ) elif self.get_kernel_id == 1: loopy_knl = lp.make_kernel( # NOQA [domain], self.get_kernel_expansion_by_symmetry_code(), [ lp.ValueArg("n_cases, nfunctions, quad_order, dim", np.int32), lp.GlobalArg( "result", None, ", ".join( ["nfunctions" for d in range(self.dim)] + ["quad_order" for d in range(self.dim)] ) + ", n_cases", ), ] + list(extra_kernel_kwarg_types) + ["...", ], name="brick_map_expansion", lang_version=(2018, 2), ) else: raise NotImplementedError loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.set_options(loopy_knl, write_cl=False) loopy_knl = lp.set_options(loopy_knl, return_dict=True) try: loopy_knl = self.integral_knl.prepare_loopy_kernel(loopy_knl) except Exception: # noqa: B902 pass return loopy_knl
def get_optimized_kernel(self, ncpus=None, **kwargs): # The returned kernel depends on the state variable # self.current_base_case, self.get_kernel_id if ncpus is None: import multiprocessing # NOTE: this detects the number of logical cores, which # may result in suboptimal performance. ncpus = multiprocessing.cpu_count() knl = self.get_kernel(**kwargs) knl = lp.join_inames(knl, inames=self.basis_vars, new_iname="func") knl = lp.split_iname(knl, "func", ncpus, inner_tag="g.0") knl = lp.add_inames_for_unused_hw_axes(knl) return knl
[docs] def call_loopy_kernel_case(self, queue, base_case_id, **kwargs): """ Call the table builder on one base case, as given in :self.current_base_case: :arg source_box_extent :arg alpha :arg nlevels :arg extra_kernel_kwargs """ if base_case_id != self.current_base_case: self.current_base_case = base_case_id if "source_box_extent" in kwargs: assert kwargs["source_box_extent"] > 0 source_box_extent = kwargs["source_box_extent"] else: source_box_extent = 1 extra_kernel_kwargs = {} if "extra_kernel_kwargs" in kwargs: extra_kernel_kwargs = kwargs["extra_kernel_kwargs"] if "alpha" in kwargs: alpha = kwargs["alpha"] assert alpha >= 0 and alpha < 1 else: alpha = 0 if "nlevels" in kwargs: nlevels = kwargs["nlevels"] assert nlevels > 0 else: # Single level is equivalent to Duffy transform nlevels = 1 missing_measure = (alpha ** nlevels * source_box_extent) ** self.dim if abs(missing_measure) > 1e-6: logger.warn( "Droste probably has too few levels, missing measure = %e" % missing_measure ) if "result_array" in kwargs: result_array = kwargs["result_array"] else: result_array = self.make_result_array(**kwargs) # root brick root_brick = np.zeros((self.dim, 2)) root_brick[:, 1] = source_box_extent # target points in 1D q_points = self.get_target_points(queue) assert len(q_points) == self.ntgt_points ** self.dim t = np.array([pt[-1] for pt in q_points[: self.ntgt_points]]) base_case_vec = np.array( [ [self.reduce_by_symmetry.reduced_vecs[self.current_base_case][d]] for d in range(self.dim) ] ) base_case_scl = np.array( [ self.interaction_case_scls[ self.reduce_by_symmetry.reduced_vec_ids[self.current_base_case] ] ] ) self.get_kernel_id = 0 try: delattr(self, "_memoize_dic_get_cached_optimized_kernel") except Exception: # noqa: B902 pass knl = self.get_cached_optimized_kernel() evt, res = knl( queue, alpha=alpha, result=result_array, root_brick=root_brick, target_nodes=t.astype(np.float64, copy=True), interaction_case_vecs=base_case_vec.astype(np.float64, copy=True), interaction_case_scls=base_case_scl.astype(np.float64, copy=True), n_cases=1, nfunctions=self.nfunctions, quad_order=self.ntgt_points, nlevels=nlevels, **extra_kernel_kwargs, **self.brick_quadrature_kwargs) raw_cheb_table_case = res["result"] self.get_kernel_id = 1 try: delattr(self, "_memoize_dic_get_cached_optimized_kernel") except Exception: # noqa: B902 pass knl2 = self.get_cached_optimized_kernel() evt, res2 = knl2( queue, result=raw_cheb_table_case, n_cases=1, nfunctions=self.nfunctions, quad_order=self.ntgt_points, nlevels=nlevels, ) cheb_table_case = res2["result"] return cheb_table_case
def build_cheb_table(self, queue, **kwargs): # Build the table using Cheb modes as sources if self.integral_knl.is_complex_valued: dtype = np.complex128 else: dtype = np.float64 if self.dim == 1: cheb_table = ( np.zeros((self.nfunctions, self.ntgt_points, self.ncases), dtype) + np.nan) elif self.dim == 2: cheb_table = ( np.zeros((self.nfunctions, self.nfunctions, self.ntgt_points, self.ntgt_points, self.ncases,), dtype) + np.nan) elif self.dim == 3: cheb_table = ( np.zeros((self.nfunctions, self.nfunctions, self.nfunctions, self.ntgt_points, self.ntgt_points, self.ntgt_points, self.ncases,), dtype) + np.nan) else: raise NotImplementedError for base_case_id in range(self.nbcases): self.current_base_case = base_case_id case_id = self.reduce_by_symmetry.reduced_vec_ids[base_case_id] cheb_table[..., case_id] = self.call_loopy_kernel_case( queue, base_case_id, **kwargs )[..., 0] # {{{ expansion by symmetry flippable, swappable_groups = \ self.reduce_by_symmetry.parse_symmetry_tags( self.reduce_by_symmetry.symmetry_tags ) nflippables = int(sum(flippable)) assert len(flippable) == self.dim flippable_ids = [i for i in range(self.dim) if flippable[i]] from itertools import product, permutations ext_ids = [ ext for ext in product( [fid for fid in product([0, 1], repeat=nflippables)], *[ [sid for sid in permutations(sgroup)] for sgroup in swappable_groups ] ) ] base_case_vec = self.reduce_by_symmetry.full_vecs[case_id] assert (base_case_vec == self.reduce_by_symmetry.reduced_vecs[base_case_id]) for ext_index, ext_id in zip(range(len(ext_ids)), ext_ids): # start from the base vec ext_vec = list(base_case_vec) swapped_axes = list(range(self.dim)) # apply s # Note that fi and ti are in reverse order for swap in ext_id[1:]: ns = len(swap) for i in range(ns): original_part = sorted(swap) ext_vec[original_part[i]] = base_case_vec[swap[i]] swapped_axes[self.dim - 1 - original_part[i]] = ( self.dim - 1 - swap[i] ) swapped_basis_axes = swapped_axes swapped_tgt_axes = [ax + self.dim for ax in swapped_basis_axes] tmp_table_part = ( cheb_table[..., case_id] .transpose(swapped_basis_axes + swapped_tgt_axes) .copy() ) # apply f, also figure out the rule for sign changes for fid in range(nflippables): if ext_id[0][fid]: iaxis = flippable_ids[fid] if 0: # ext_vec[swapped_iaxis] == 0: continue else: ext_vec[iaxis] = -ext_vec[iaxis] # flips.append( # lambda x: # np.flip(x, (self.dim - 1 - iaxis) # + self.dim)) full_slice = [ slice(None, None) for axid in range(2 * self.dim) ] for basis_id in range(self.nfunctions): if basis_id % 2 == 0: continue sign_slice = full_slice sign_slice[self.dim - 1 - iaxis] = basis_id np.multiply.at(tmp_table_part, tuple(sign_slice), -1) tmp_table_part = np.flip( tmp_table_part, (self.dim - 1 - iaxis) + self.dim ) assert tuple(ext_vec) in self.reduce_by_symmetry.full_vecs ext_case_id = self.reduce_by_symmetry.full_vecs.index(tuple(ext_vec)) cheb_table[..., ext_case_id] = tmp_table_part # }}} End expansion by symmetry return cheb_table def get_cache_key(self): if self.reduce_by_symmetry.symmetry_tags is None: # Bn: the full n-dimensional hyperoctahedral group symmetry_info = 'B%d' % self.dim else: symmetry_info = "Span{%s}" % ",".join([ repr(tag) for tag in sorted(self.reduce_by_symmetry.symmetry_tags) ]) return ( type(self).__name__, str(self.dim) + "D", self.integral_knl.__str__(), "quad_order-" + str(self.ntgt_points), "brick_order-" + str(self.nquad_points), "brick_radial_order-" + str(self.nradial_quad_points), "case-" + str(self.current_base_case), "kernel_id-" + str(self.get_kernel_id), "symmetry-" + symmetry_info, ) def __call__(self, queue, **kwargs): """ :arg source_box_extent :arg alpha :arg nlevels :arg extra_kernel_kwargs """ if "cheb_coefs" in kwargs: assert len(kwargs["cheb_coefs"]) == self.n_q_points for ccoef in kwargs["cheb_coefs"]: assert len(ccoef) == self.nfunctions ** self.dim cheb_coefs = kwargs["cheb_coefs"] return self.postprocess_cheb_table( self.build_cheb_table(queue, **kwargs), cheb_coefs ) else: logger.info("Returning cheb table directly") return self.build_cheb_table(queue, **kwargs)
# }}} End reduced Droste method # {{{ inverse Droste method
[docs]class InverseDrosteReduced(DrosteReduced): r""" A variant of the Droste method. Instead of computing a volume potential, it computes the "inverse" to a Riesz potential, aka a "fractional Laplacian". Specifically, given an integral kernel G(r), this class supports the precomputation for integrals of the form .. math:: \int_B G(r) (u(x) - u(y)) dy For k-dimensional fractional Laplacian, :math:`G(r) = \frac{1}{r^{k+2s}}`. The core part of concern (that is to be modified based on DrosteReduces): .. code-block:: ... for BASIS_VARS, TGT_VARS, icase for ilevel for ibrick_axis, ibrick_side, QUAD_VARS PREPARE_BASIS_VALS <> density_val = DENSITY_VAL_ASSIGNMENT \ {id=density,dep=basis_evals} end end end ... """ def __init__(self, integral_knl, quad_order, case_vecs, n_brick_quad_points=50, knl_symmetry_tags=None, special_radial_quadrature=False, nradial_quad_points=None, auto_windowing=True): """ :param auto_windowing: auto-detect window radius. """ super(InverseDrosteReduced, self).__init__( integral_knl, quad_order, case_vecs, n_brick_quad_points, special_radial_quadrature, nradial_quad_points, knl_symmetry_tags) self.auto_windowing = auto_windowing def get_cache_key(self): return ( type(self).__name__, str(self.dim) + "D", self.integral_knl.__str__(), "quad_order-" + str(self.ntgt_points), "brick_order-" + str(self.nquad_points), "brick_radial_order-" + str(self.nradial_quad_points), "case-" + str(self.current_base_case), "kernel_id-" + str(self.get_kernel_id),)
[docs] def codegen_basis_tgt_eval(self, iaxis): """Generate instructions to evaluate Chebyshev polynomial basis at the target point, given that the target point lies in the source box. (Chebyshev polynomials of the first kind T_n). If the target point is not in the source box, the concerned instructions will return 0. """ # only valid for self-interactions assert all([self.reduce_by_symmetry.reduced_vecs[ self.current_base_case][d] == 0 for d in range(self.dim)]) code = """ # noqa <> T0_tgt_IAXIS = 1 <> T1_tgt_IAXIS = template_true_target[IAXIS] {dep=template_true_targets} <> Tprev_tgt_IAXIS = T0_tgt_IAXIS {id=t0_tgt_IAXIS} <> Tcur_tgt_IAXIS = T1_tgt_IAXIS {id=t1_tgt_IAXIS,dep=t0_tgt_IAXIS} for pIAXIS <> Tnext_tgt_IAXIS = (2 * template_true_target[IAXIS] * Tcur_tgt_IAXIS - Tprev_tgt_IAXIS) {id=tnext_tgt_IAXIS,dep=t1_tgt_IAXIS} Tprev_tgt_IAXIS = Tcur_tgt_IAXIS {id=tprev_tgt_updateIAXIS,dep=tnext_tgt_IAXIS} Tcur_tgt_IAXIS = Tnext_tgt_IAXIS {id=tcur_tgt_updateIAXIS,dep=tprev_tgt_updateIAXIS} end <> basis_tgt_evalIAXIS = ( T0_tgt_IAXIS * if(fIAXIS == 0, 1, 0) + T1_tgt_IAXIS * if(fIAXIS == 1, 1, 0) + simul_reduce(sum, pIAXIS, if(fIAXIS >= 2 and fIAXIS == pIAXIS + 2, Tcur_tgt_IAXIS, 0)) ) {id=tgtbasisIAXIS,dep=tcur_tgt_updateIAXIS} """.replace( "IAXIS", str(iaxis) ) return code
[docs] def codegen_der2_basis_tgt_eval(self, iaxis): r"""Generate instructions to evaluate the second order derivatives of Chebyshev polynomial basis at the target point, given that the target lies in the source box. (Chebyshev polynomials of the first kind T_n). If the target point is not in the source box, the concerned instructions will return 0. The evaluation is based on Chebyshev polynomials of the second kind :math:`U_n`. .. math:: \frac{d^2 T_n}{dx^2} = n \frac{(n+1)T_n - U_n}{x^2 - 1} """ # only valid for self-interactions assert all([self.reduce_by_symmetry.reduced_vecs[ self.current_base_case][d] == 0 for d in range(self.dim)]) code = """ # noqa <> U0_tgt_IAXIS = 1 <> U1_tgt_IAXIS = 2 * template_true_target[IAXIS] {dep=template_true_targets} <> Uprev_tgt_IAXIS = U0_tgt_IAXIS {id=u0_tgt_IAXIS} <> Ucur_tgt_IAXIS = U1_tgt_IAXIS {id=u1_tgt_IAXIS,dep=u0_tgt_IAXIS} for pIAXIS <> Unext_tgt_IAXIS = (2 * template_true_target[IAXIS] * Ucur_tgt_IAXIS - Uprev_tgt_IAXIS) {id=unext_tgt_IAXIS,dep=u1_tgt_IAXIS} Uprev_tgt_IAXIS = Ucur_tgt_IAXIS {id=uprev_tgt_updateIAXIS,dep=unext_tgt_IAXIS} Ucur_tgt_IAXIS = Unext_tgt_IAXIS {id=ucur_tgt_updateIAXIS,dep=uprev_tgt_updateIAXIS} end # U_n(target) <> basis2_tgt_evalIAXIS = ( U0_tgt_IAXIS * if(fIAXIS == 0, 1, 0) + U1_tgt_IAXIS * if(fIAXIS == 1, 1, 0) + simul_reduce(sum, pIAXIS, if(fIAXIS >= 2 and fIAXIS == pIAXIS + 2, Ucur_tgt_IAXIS, 0)) ) {id=tgtbasis2IAXIS,dep=ucur_tgt_updateIAXIS} # this temp var helps with type deduction <> f_order_IAXIS = fIAXIS <> der2_basis_tgt_evalIAXIS = f_order_IAXIS * ( ((f_order_IAXIS + 1) * basis_tgt_evalIAXIS - basis2_tgt_evalIAXIS) / (template_true_target[IAXIS]**2 - 1) ) * (2**2) / (root_extent[IAXIS]**2) {id=tgtd2basisIAXIS,dep=tgtbasisIAXIS:tgtbasis2IAXIS} """.replace( "IAXIS", str(iaxis) ) return code
[docs] def codegen_windowing_function(self): r"""Given :math:`dist = x - y`, compute the windowing function. """ code = [] if self.dim == 1: code.append("<> distsq = dist[0]*dist[0]") elif self.dim == 2: code.append("<> distsq = dist[0]*dist[0] + dist[1]*dist[1]") elif self.dim == 3: code.append("<> distsq = dist[0]*dist[0] + \ dist[1]*dist[1] + dist[2]*dist[2]") # renormalized distance code.append("<> rndist = sqrt(distsq) / delta") if True: # polynomial windowing logger.info("Using polynomial windowing function.") code.append(r""" <> windowing = if(rndist >= 1, 0, (1 - 35 * (rndist**4) + 84 * (rndist**5) - 70 * (rndist**6) + 20 * (rndist**7) ) ) """) elif False: # classical bump function logger.info("Using bump windowing function.") code.append( "<> windowing = exp(1) * exp(-(1/(1 - (distsq / (delta*delta)))))") else: # smooth transitions of 0-->1-->0 logger.info("Using smooth transition windowing function.") code.append("<> fv = if(rndist > 0, exp(-1 / rndist), 0)") code.append("<> fc = if(1 - rndist > 0, exp(-1 / (1 - rndist)), 0)") code.append("<> windowing = 1 - fv / (fv + fc)") return '\n'.join(code)
[docs] def make_dim_independent(self, knlstring): r"""Produce the correct :math:`\text{DENSITY_VAL_ASSIGNMENT} = u(x) - u(y)` for self-interactions. """ # detect for self-interactions if all([self.reduce_by_symmetry.reduced_vecs[ self.current_base_case][d] == 0 for d in range(self.dim)]): target_box_is_source = True else: target_box_is_source = False # replace REV_* first resknl = knlstring.replace("REV_BASIS_VARS", ",".join(self.basis_vars[::-1])) resknl = resknl.replace("REV_TGT_VARS", ",".join(self.tgt_vars[::-1])) resknl = resknl.replace("BASIS_VARS", ",".join(self.basis_vars)) resknl = resknl.replace("TGT_VARS", ",".join(self.tgt_vars)) resknl = resknl.replace("QUAD_VARS", ",".join(self.quad_vars)) if self.get_kernel_id == 0: resknl = resknl.replace( "POSTPROCESS_KNL_VAL", '\n'.join([ self.codegen_windowing_function(), "<> knl_val_post = windowing * knl_val {id=pp_kval}" ]) ) elif self.get_kernel_id == 1: resknl = resknl.replace( "POSTPROCESS_KNL_VAL", '\n'.join([ self.codegen_windowing_function(), "<> knl_val_post = (1 - windowing) * knl_val {id=pp_kval}" ]) ) else: pass # {{{ density evals basis_eval_insns = [self.codegen_basis_eval(i) for i in range(self.dim)] if target_box_is_source: basis_eval_insns += [ self.codegen_basis_tgt_eval(i) for i in range(self.dim)] if self.get_kernel_id == 0: # Given target x, # u(x) - u(y) p.v. integrated around a small region symmetric to x, # truncated to second order 0.5 * [(x - y)' * diag(Hess(u)(x)) * (x - y)] if target_box_is_source: basis_eval_insns += [ self.codegen_der2_basis_tgt_eval(i) for i in range(self.dim)] resknl = resknl.replace( "PREPARE_BASIS_VALS", "\n".join(basis_eval_insns + [ "... nop {id=basis_evals,dep=%s}" % ':'.join( ['basis%d' % i for i in range(self.dim)] + ['tgtbasis%d' % i for i in range(self.dim)] + ['tgtd2basis%d' % i for i in range(self.dim)] ), ]) ) else: resknl = resknl.replace( "PREPARE_BASIS_VALS", "\n".join(basis_eval_insns + [ "... nop {id=basis_evals,dep=%s}" % ':'.join( ['basis%d' % i for i in range(self.dim)] ), ]) ) if self.dim == 1: if target_box_is_source: resknl = resknl.replace( "DENSITY_VAL_ASSIGNMENT", ' '.join([ "0.5 * der2_basis_tgt_eval0 * (dist[0]**2)", ]) ) else: resknl = resknl.replace( "DENSITY_VAL_ASSIGNMENT", "- basis_eval0" ) elif self.dim == 2: if target_box_is_source: resknl = resknl.replace( "DENSITY_VAL_ASSIGNMENT", ' '.join([ " 0.5 * der2_basis_tgt_eval0 * basis_tgt_eval1 * (dist[0]**2)", # noqa: E501 "+ 0.5 * basis_tgt_eval0 * der2_basis_tgt_eval1 * (dist[1]**2)", # noqa: E501 ]) ) else: resknl = resknl.replace( "DENSITY_VAL_ASSIGNMENT", "- basis_eval0 * basis_eval1" ) elif self.dim == 3: if target_box_is_source: resknl = resknl.replace( "DENSITY_VAL_ASSIGNMENT", ' '.join([ " 0.5 * der2_basis_tgt_eval0 * basis_tgt_eval1 * basis_tgt_eval2 * (dist[0]**2)", # noqa: E501 "+ 0.5 * basis_tgt_eval0 * der2_basis_tgt_eval1 * basis_tgt_eval2 * (dist[1]**2)", # noqa: E501 "+ 0.5 * basis_tgt_eval0 * basis_tgt_eval1 * der2_basis_tgt_eval2 * (dist[2]**2)", # noqa: E501 ]) ) else: resknl = resknl.replace( "DENSITY_VAL_ASSIGNMENT", "- basis_eval0 * basis_eval1 * basis_eval2" ) else: # self.dim not in [1, 2, 3] raise NotImplementedError("No support for dimension %d" % self.dim) elif self.get_kernel_id == 1: if target_box_is_source: # u(x) - u(y) resknl = resknl.replace( "PREPARE_BASIS_VALS", "\n".join(basis_eval_insns + [ "... nop {id=basis_evals,dep=%s}" % ':'.join( ['basis%d' % i for i in range(self.dim)] + ['tgtbasis%d' % i for i in range(self.dim)] ), ]) ) resknl = resknl.replace( "DENSITY_VAL_ASSIGNMENT", ' - '.join([ ' * '.join( ["basis_tgt_eval%d" % i for i in range(self.dim)]), ' * '.join( ["basis_eval%d" % i for i in range(self.dim)]), ]) ) else: # - u(y) resknl = resknl.replace( "PREPARE_BASIS_VALS", "\n".join(basis_eval_insns + [ "... nop {id=basis_evals,dep=%s}" % ':'.join( ['basis%d' % i for i in range(self.dim)] ), ]) ) resknl = resknl.replace( "DENSITY_VAL_ASSIGNMENT", ' - ' + ' * '.join( ["basis_eval%d" % i for i in range(self.dim)] ) ) # }}} End density evals resknl = resknl.replace( "PROD_QUAD_WEIGHT", " * ".join( [ "quadrature_weights[QID]".replace("QID", qvar) for qvar in self.quad_vars ] ), ) if self.dim == 1: resknl = resknl.replace("TPLTGT_ASSIGNMENT", """target_nodes[t0]""") resknl = resknl.replace("QUAD_PT_ASSIGNMENT", """quadrature_nodes[q0]""") elif self.dim == 2: resknl = resknl.replace( "TPLTGT_ASSIGNMENT", """if(iaxis == 0, target_nodes[t0], target_nodes[t1])""", ) resknl = resknl.replace( "QUAD_PT_ASSIGNMENT", """if(iaxis == 0, quadrature_nodes[q0], quadrature_nodes[q1])""", ) elif self.dim == 3: resknl = resknl.replace( "TPLTGT_ASSIGNMENT", """if(iaxis == 0, target_nodes[t0], if( iaxis == 1, target_nodes[t1], target_nodes[t2]))""", ) resknl = resknl.replace( "QUAD_PT_ASSIGNMENT", """if(iaxis == 0, quadrature_nodes[q0], if( iaxis == 1, quadrature_nodes[q1], quadrature_nodes[q2]))""", ) else: raise NotImplementedError return resknl
[docs] def get_kernel(self, **kwargs): """Get loopy kernel for computation, the get_kernel_id determines what task to perform. - 0: Integrate :math:`W(r) G(r) [u(x) - u(y) + grad(u)(y - x)]` - 1: Add the integral of :math:`[1 - W(r)] G(r) [u(x) - u(y)]` - 2: Expansion by symmetry. """ domain = self.make_loop_domain(base_case_id=self.current_base_case) extra_kernel_kwarg_types = () if "extra_kernel_kwarg_types" in kwargs: extra_kernel_kwarg_types = kwargs["extra_kernel_kwarg_types"] extra_loopy_kernel_kwargs = {} if "extra_loopy_kernel_kwargs" in kwargs: extra_loopy_kernel_kwargs = kwargs["extra_loopy_kernel_kwargs"] if self.get_kernel_id == 0 or self.get_kernel_id == 1: loopy_knl = lp.make_kernel( # NOQA [domain], self.get_kernel_code() + self.get_sumpy_kernel_eval_insns(), [ lp.ValueArg("alpha", np.float64), lp.ValueArg("delta", np.float64), lp.ValueArg("n_cases, nfunctions, quad_order, dim", np.int32), lp.GlobalArg("interaction_case_vecs", np.float64, "dim, n_cases"), lp.GlobalArg("interaction_case_scls", np.float64, "n_cases"), lp.GlobalArg("target_nodes", np.float64, "quad_order"), lp.GlobalArg( "result", None, ", ".join( ["nfunctions" for d in range(self.dim)] + ["quad_order" for d in range(self.dim)] ) + ", n_cases", ), ] + list(extra_kernel_kwarg_types) + ["...", ], name="brick_map_%d" % self.get_kernel_id, lang_version=(2018, 2), **extra_loopy_kernel_kwargs ) elif self.get_kernel_id == 2: loopy_knl = lp.make_kernel( # NOQA [domain], self.get_kernel_expansion_by_symmetry_code(), [ lp.ValueArg("n_cases, nfunctions, quad_order, dim", np.int32), lp.GlobalArg( "result", None, ", ".join( ["nfunctions" for d in range(self.dim)] + ["quad_order" for d in range(self.dim)] ) + ", n_cases", ), ] + list(extra_kernel_kwarg_types) + ["...", ], name="brick_map_expansion", lang_version=(2018, 2), **extra_loopy_kernel_kwargs ) else: raise NotImplementedError loopy_knl = lp.fix_parameters(loopy_knl, dim=self.dim) loopy_knl = lp.set_options(loopy_knl, write_cl=False) loopy_knl = lp.set_options(loopy_knl, return_dict=True) # loopy_knl = lp.make_reduction_inames_unique(loopy_knl) try: loopy_knl = self.integral_knl.prepare_loopy_kernel(loopy_knl) except Exception: # noqa: B902 pass return loopy_knl
[docs] def call_loopy_kernel_case(self, queue, base_case_id, **kwargs): """ Call the table builder on one base case, as given in :self.current_base_case: :arg source_box_extent :arg alpha :arg delta :arg nlevels :arg extra_kernel_kwargs """ if base_case_id != self.current_base_case: self.current_base_case = base_case_id if "source_box_extent" in kwargs: assert kwargs["source_box_extent"] > 0 source_box_extent = kwargs["source_box_extent"] else: source_box_extent = 1 extra_kernel_kwargs = {} if "extra_kernel_kwargs" in kwargs: extra_kernel_kwargs = kwargs["extra_kernel_kwargs"] if "alpha" in kwargs: alpha = kwargs["alpha"] assert alpha >= 0 and alpha < 1 else: alpha = 0 # (template) target points in 1D over [0, 1] q_points = self.get_target_points(queue) assert len(q_points) == self.ntgt_points ** self.dim t = np.array([pt[-1] for pt in q_points[: self.ntgt_points]]) tt = t * source_box_extent delta_max = min(np.min(tt), source_box_extent - np.max(tt)) assert delta_max > 0 if delta_max < 1e-6: logger.warn("Severe delta constraint (< %f)" % delta_max) if ("delta" in kwargs) and (not self.auto_windowing): delta = kwargs["delta"] logger.info("Using window radius %f" % delta) assert delta > 0 and 2 * delta < source_box_extent else: assert self.auto_windowing delta = 0.9 * delta_max logger.info("Using auto-determined window radius %f" % delta) if delta > delta_max: delta = min(delta, delta_max) logger.info("Shrinked delta to %f to fit inside the source box" % delta) if "nlevels" in kwargs: nlevels = kwargs["nlevels"] assert nlevels > 0 else: # Single level is equivalent to Duffy transform nlevels = 1 missing_measure = (alpha ** nlevels * source_box_extent) ** self.dim if abs(missing_measure) > 1e-6: from warnings import warn warn( "Droste probably has too few levels, missing measure = " + str(missing_measure) ) if "result_array" in kwargs: result_array = kwargs["result_array"] else: result_array = self.make_result_array(**kwargs) # root brick root_brick = np.zeros((self.dim, 2)) root_brick[:, 1] = source_box_extent base_case_vec = np.array( [ [self.reduce_by_symmetry.reduced_vecs[self.current_base_case][d]] for d in range(self.dim) ] ) base_case_scl = np.array( [ self.interaction_case_scls[ self.reduce_by_symmetry.reduced_vec_ids[self.current_base_case] ] ] ) # --------- call kernel 0 ---------- self.get_kernel_id = 0 try: delattr(self, "_memoize_dic_get_cached_optimized_kernel") except Exception: # noqa: B902 pass knl = self.get_cached_optimized_kernel() result_array_0 = self.make_result_array(**kwargs) evt0, res0 = knl( queue, alpha=alpha, delta=delta, result=result_array_0, root_brick=root_brick, target_nodes=t.astype(np.float64, copy=True), interaction_case_vecs=base_case_vec.astype(np.float64, copy=True), interaction_case_scls=base_case_scl.astype(np.float64, copy=True), n_cases=1, nfunctions=self.nfunctions, quad_order=self.ntgt_points, nlevels=nlevels, **extra_kernel_kwargs, **self.brick_quadrature_kwargs ) # --------- call kernel 1 ---------- self.get_kernel_id = 1 try: delattr(self, "_memoize_dic_get_cached_optimized_kernel") except Exception: # noqa: B902 pass result_array_1 = self.make_result_array(**kwargs) knl = self.get_cached_optimized_kernel() evt1, res1 = knl( queue, alpha=alpha, delta=delta, result=result_array_1, root_brick=root_brick, target_nodes=t.astype(np.float64, copy=True), interaction_case_vecs=base_case_vec.astype(np.float64, copy=True), interaction_case_scls=base_case_scl.astype(np.float64, copy=True), n_cases=1, nfunctions=self.nfunctions, quad_order=self.ntgt_points, nlevels=nlevels, **extra_kernel_kwargs, **self.brick_quadrature_kwargs ) # --------- call kernel 2 ---------- self.get_kernel_id = 2 try: delattr(self, "_memoize_dic_get_cached_optimized_kernel") except Exception: # noqa: B902 pass knl2 = self.get_cached_optimized_kernel() result_array = res0['result'] + res1['result'] evt2, res2 = knl2( queue, result=result_array, n_cases=1, nfunctions=self.nfunctions, quad_order=self.ntgt_points, nlevels=nlevels, **extra_kernel_kwargs ) cheb_table_case = res2["result"] return cheb_table_case
# }}} End inverse Droste method