Source code for volumential.volume_fmm

from __future__ import absolute_import, division, print_function

__copyright__ = "Copyright (C) 2017 - 2018 Xiaoyu Wei"

__license__ = """
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""

__doc__ = """
.. autofunction:: drive_volume_fmm
.. autofunction:: interpolate_volume_potential
"""

import numpy as np
import pyopencl as cl
from pytools.obj_array import make_obj_array
from boxtree.fmm import TimingRecorder
from volumential.expansion_wrangler_interface import ExpansionWranglerInterface
from volumential.expansion_wrangler_fpnd import (
        FPNDSumpyExpansionWrangler, FPNDFMMLibExpansionWrangler)


import logging
logger = logging.getLogger(__name__)


# {{{ volume FMM driver

[docs]def drive_volume_fmm(traversal, expansion_wrangler, src_weights, src_func, direct_evaluation=False, timing_data=None, reorder_sources=True, reorder_potentials=True, **kwargs): """ Top-level driver routine for volume potential calculation via fast multiple method. This function, and the interface it utilizes, is adapted from boxtree/fmm.py The fast multipole method is a two-pass algorithm: 1. During the fist (upward) pass, the multipole expansions for all boxes at all levels are formed from bottom up. 2. In the second (downward) pass, the local expansions for all boxes at all levels at formed from top down. :arg traversal: A :class:`boxtree.traversal.FMMTraversalInfo` instance. :arg expansion_wrangler: An object implementing the expansion wrangler interface. :arg src_weights: Source 'density/weights/charges' time quad weights.. Passed unmodified to *expansion_wrangler*. :arg src_func: Source 'density/weights/charges' function. Passed unmodified to *expansion_wrangler*. :arg reorder_sources: Whether sources are in user order (if True, sources are reordered into tree order before conducting FMM). :arg reorder_potentials: Whether potentials should be in user order (if True, potentials are reordered into user order before return). Returns the potentials computed by *expansion_wrangler*. """ wrangler = expansion_wrangler assert issubclass(type(wrangler), ExpansionWranglerInterface) recorder = TimingRecorder() logger.info("start fmm") # accept unpacked inputs when doing fmm for just one source field dtype = wrangler.dtype if src_weights.ndim == 1: src_weights = make_obj_array([src_weights.astype(dtype)]) if src_func.ndim == 1: src_func = make_obj_array([src_func.astype(dtype)]) assert (ns := len(src_weights)) == len(src_func) if ns > 1: raise NotImplementedError("Multiple outputs are not yet supported") if isinstance(expansion_wrangler, FPNDSumpyExpansionWrangler): assert all(isinstance(sw, cl.array.Array) for sw in src_weights) assert all(isinstance(sf, cl.array.Array) for sf in src_func) elif isinstance(expansion_wrangler, FPNDFMMLibExpansionWrangler): traversal = traversal.get(wrangler.queue) if isinstance(src_weights, cl.array.Array): src_weights = src_weights.get(wrangler.queue) if isinstance(src_func, cl.array.Array): src_func = src_func.get(wrangler.queue) if reorder_sources: logger.debug("reorder source weights") for idx_s in range(ns): src_weights[idx_s] = wrangler.reorder_sources(src_weights[idx_s]) src_func[idx_s] = wrangler.reorder_targets(src_func[idx_s]) # {{{ Construct local multipoles logger.debug("construct local multipoles") mpole_exps, timing_future = wrangler.form_multipoles( traversal.level_start_source_box_nrs, traversal.source_boxes, src_weights ) recorder.add("form_multipoles", timing_future) # }}} # {{{ Propagate multipoles upward logger.debug("propagate multipoles upward") mpole_exps, timing_future = wrangler.coarsen_multipoles( traversal.level_start_source_parent_box_nrs, traversal.source_parent_boxes, mpole_exps, ) recorder.add("coarsen_multipoles", timing_future) # mpole_exps is called Phi in [1] # }}} # {{{ Direct evaluation from neighbor source boxes ("list 1") logger.debug("direct evaluation from neighbor source boxes ('list 1')") # look up in the prebuilt table # this step also constructs the output array potentials, timing_future = wrangler.eval_direct( traversal.target_boxes, traversal.neighbor_source_boxes_starts, traversal.neighbor_source_boxes_lists, src_func[0], # FIXME: handle multiple source fields ) recorder.add("eval_direct", timing_future) # Return list 1 only, for debugging # 'list1_only' takes precedence over 'exclude_list1' if 'list1_only' in kwargs and kwargs['list1_only']: if reorder_potentials: logger.debug("reorder potentials") result = wrangler.reorder_potentials(potentials) logger.debug("finalize potentials") result = wrangler.finalize_potentials(result) logger.info("fmm complete with list 1 only") logger.info("fmm complete") logger.warn("only list 1 results are returned") return result # Do not include list 1 if 'exclude_list1' in kwargs and kwargs['exclude_list1']: logger.info("Using zeros for list 1") logger.warn("list 1 interactions are not included") potentials = wrangler.output_zeros() # these potentials are called alpha in [1] # }}} # {{{ (Optional) direct evaluation of everything and return if direct_evaluation: print("Warning: NOT USING FMM (forcing global p2p)") if len(src_weights) != len(src_func): print("Using P2P with different src/tgt discretizations can be " "unstable when targets are close to the sources while not " "be exactly the same") # list 2 and beyond # First call global p2p, then substract list 1 from sumpy import P2P p2p = P2P( wrangler.queue.context, wrangler.code.target_kernels, exclude_self=wrangler.code.exclude_self, value_dtypes=[wrangler.dtype], ) if hasattr(wrangler, "self_extra_kwargs"): p2p_extra_kwargs = dict(wrangler.self_extra_kwargs) else: p2p_extra_kwargs = {} if hasattr(wrangler, "self_extra_kwargs"): p2p_extra_kwargs.update(wrangler.kernel_extra_kwargs) for iw, sw in enumerate(src_weights): evt, (ref_pot,) = p2p( wrangler.queue, traversal.tree.targets, traversal.tree.sources, (sw,), **p2p_extra_kwargs ) potentials[iw] += ref_pot l1_potentials, timing_future = wrangler.eval_direct_p2p( traversal.target_boxes, traversal.neighbor_source_boxes_starts, traversal.neighbor_source_boxes_lists, src_weights, ) potentials = potentials - l1_potentials # list 3 assert traversal.from_sep_close_smaller_starts is None # list 4 assert traversal.from_sep_close_bigger_starts is None if reorder_potentials: logger.debug("reorder potentials") result = wrangler.reorder_potentials(potentials) logger.debug("finalize potentials") result = wrangler.finalize_potentials(result) logger.info("direct p2p complete") return result # }}} End Stage # {{{ Translate separated siblings' ("list 2") mpoles to local logger.debug("translate separated siblings' ('list 2') mpoles to local") local_exps, timing_future = wrangler.multipole_to_local( traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, traversal.from_sep_siblings_starts, traversal.from_sep_siblings_lists, mpole_exps, ) recorder.add("multipole_to_local", timing_future) # local_exps represents both Gamma and Delta in [1] # }}} # {{{ Evaluate sep. smaller mpoles ("list 3") at particles logger.debug("evaluate sep. smaller mpoles at particles ('list 3 far')") # (the point of aiming this stage at particles is specifically to keep its # contribution *out* of the downward-propagating local expansions) mpole_result, timing_future = wrangler.eval_multipoles( traversal.target_boxes_sep_smaller_by_source_level, traversal.from_sep_smaller_by_level, mpole_exps, ) recorder.add("eval_multipoles", timing_future) potentials = potentials + mpole_result # these potentials are called beta in [1] # volume fmm does not work with list 3 close currently # but list 3 should be empty with our use cases assert traversal.from_sep_close_smaller_starts is None # if traversal.from_sep_close_smaller_starts is not None: # logger.debug("evaluate separated close smaller interactions directly " # "('list 3 close')") # potentials = potentials + wrangler.eval_direct( # traversal.target_boxes, traversal.from_sep_close_smaller_starts, # traversal.from_sep_close_smaller_lists, src_weights) # }}} # {{{ Form locals for separated bigger source boxes ("list 4") logger.debug("form locals for separated bigger source boxes ('list 4 far')") local_result, timing_future = wrangler.form_locals( traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, traversal.from_sep_bigger_starts, traversal.from_sep_bigger_lists, src_weights, ) recorder.add("form_locals", timing_future) local_exps = local_exps + local_result # volume fmm does not work with list 4 currently assert traversal.from_sep_close_bigger_starts is None # if traversal.from_sep_close_bigger_starts is not None: # logger.debug("evaluate separated close bigger interactions directly " # "('list 4 close')") # potentials = potentials + wrangler.eval_direct( # traversal.target_or_target_parent_boxes, # traversal.from_sep_close_bigger_starts, # traversal.from_sep_close_bigger_lists, src_weights) # }}} # {{{ Propagate local_exps downward logger.debug("propagate local_exps downward") # import numpy.linalg as la local_exps, timing_future = wrangler.refine_locals( traversal.level_start_target_or_target_parent_box_nrs, traversal.target_or_target_parent_boxes, local_exps, ) recorder.add("refine_locals", timing_future) # }}} # {{{ Evaluate locals logger.debug("evaluate locals") local_result, timing_future = wrangler.eval_locals( traversal.level_start_target_box_nrs, traversal.target_boxes, local_exps ) recorder.add("eval_locals", timing_future) potentials = potentials + local_result # }}} # {{{ Reorder potentials if reorder_potentials: logger.debug("reorder potentials") result = wrangler.reorder_potentials(potentials) logger.debug("finalize potentials") result = wrangler.finalize_potentials(result) logger.info("fmm complete") # }}} End Reorder potentials if timing_data is not None: timing_data.update(recorder.summarize()) return result
# }}} End volume FMM driver # {{{ free form interpolation of potentials def compute_barycentric_lagrange_params(q_order): # 1d quad points and weights q_points_1d, q_weights_1d = np.polynomial.legendre.leggauss(q_order) q_points_1d = (q_points_1d + 1) * 0.5 q_weights_1d *= 0.5 # interpolation weights for barycentric Lagrange interpolation from scipy.interpolate import BarycentricInterpolator as Interpolator interp = Interpolator(xi=q_points_1d, yi=None) interp_weights = interp.wi interp_points = interp.xi return (interp_points, interp_weights)
[docs]def interpolate_volume_potential(target_points, traversal, wrangler, potential, potential_in_tree_order=False, target_radii=None, lbl_lookup=None, **kwargs): """ Interpolate the volume potential, only works for tensor-product quadrature formulae. target_points and potential should be an cl array. :arg wrangler: Used only for general info (nothing sumpy kernel specific). May also be None if the needed information is passed by kwargs. :arg potential_in_tree_order: Whether the potential is in tree order (as opposed to in user order). :lbl_lookup: a :class:`boxtree.LeavesToBallsLookup` that has the lookup information for target points. Can be None if the lookup lists are provided separately in kwargs. If it is None and no other information is provided, the lookup will be built from scratch. """ if wrangler is not None: dim = next(iter(wrangler.near_field_table.values()))[0].dim tree = wrangler.tree queue = wrangler.queue q_order = wrangler.quad_order dtype = wrangler.dtype else: dim = kwargs['dim'] tree = kwargs['tree'] queue = kwargs['queue'] q_order = kwargs['q_order'] dtype = kwargs['dtype'] ctx = queue.context coord_dtype = tree.coord_dtype n_points = len(target_points[0]) from volumential.expansion_wrangler_fpnd import FPNDFMMLibExpansionWrangler if isinstance(wrangler, FPNDFMMLibExpansionWrangler): tree = tree.to_device(queue) traversal = traversal.to_device(queue) assert dim == len(target_points) evt = None if ("balls_near_box_starts" in kwargs) and ("balls_near_box_lists" in kwargs): balls_near_box_starts = kwargs["balls_near_box_starts"] balls_near_box_lists = kwargs["balls_near_box_lists"] else: # Building the lookup takes O(n*log(n)) if lbl_lookup is None: from boxtree.area_query import LeavesToBallsLookupBuilder lookup_builder = LeavesToBallsLookupBuilder(ctx) if target_radii is None: # Set this number small enough so that all points found # are inside the box target_radii = cl.array.to_device( queue, np.ones(n_points, dtype=coord_dtype) * 1e-12) lbl_lookup, evt = lookup_builder( queue, tree, target_points, target_radii) balls_near_box_starts = lbl_lookup.balls_near_box_starts balls_near_box_lists = lbl_lookup.balls_near_box_lists pout = cl.array.zeros(queue, n_points, dtype=dtype) multiplicity = cl.array.zeros(queue, n_points, dtype=dtype) if evt is not None: pout.add_event(evt) # map all boxes to a template [0,1]^2 so that the interpolation # weights and modes can be precomputed (blpoints, blweights) = compute_barycentric_lagrange_params(q_order) blpoints = cl.array.to_device(queue, blpoints) blweights = cl.array.to_device(queue, blweights) # {{{ loopy kernel for interpolation if dim == 1: code_target_coords_assignment = """target_coords_x[target_point_id]""" if dim == 2: code_target_coords_assignment = """if( iaxis == 0, target_coords_x[target_point_id], target_coords_y[target_point_id])""" elif dim == 3: code_target_coords_assignment = """if( iaxis == 0, target_coords_x[target_point_id], if( iaxis == 1, target_coords_y[target_point_id], target_coords_z[target_point_id]))""" else: raise NotImplementedError if dim == 1: code_mode_index_assignment = """mid""" elif dim == 2: code_mode_index_assignment = """if( iaxis == 0, mid / Q_ORDER, mid % Q_ORDER)""".replace( "Q_ORDER", "q_order" ) elif dim == 3: code_mode_index_assignment = """if( iaxis == 0, mid / (Q_ORDER * Q_ORDER), if( iaxis == 1, mid % (Q_ORDER * Q_ORDER) / Q_ORDER, mid % (Q_ORDER * Q_ORDER) % Q_ORDER))""".replace( "Q_ORDER", "q_order" ) else: raise NotImplementedError import loopy # FIXME: noqa for specific lines does not work here # flake8: noqa lpknl = loopy.make_kernel( # noqa [ "{ [ tbox, iaxis ] : 0 <= tbox < n_tgt_boxes " "and 0 <= iaxis < dim }", "{ [ tpt, mid, mjd, mkd ] : tpt_begin <= tpt < tpt_end " "and 0 <= mid < n_box_modes and 0 <= mjd < q_order " "and 0 <= mkd < q_order }", ], """ for tbox <> target_box_id = target_boxes[tbox] <> tpt_begin = balls_near_box_starts[target_box_id] <> tpt_end = balls_near_box_starts[target_box_id+1] <> box_level = box_levels[target_box_id] <> box_mode_beg = box_target_starts[target_box_id] <> n_box_modes = box_target_counts_cumul[target_box_id] <> box_extent = root_extent * (1.0 / (2**box_level)) for iaxis <> box_center[iaxis] = box_centers[iaxis, target_box_id] {dup=iaxis} end for tpt <> target_point_id = balls_near_box_lists[tpt] for iaxis <> real_coord[iaxis] = TARGET_COORDS_ASSIGNMENT {dup=iaxis} end # Count how many times the potential is computed multiplicity[target_point_id] = multiplicity[target_point_id] + 1 {atomic} # Map target point to template box for iaxis <> tplt_coord[iaxis] = (real_coord[iaxis] - box_center[iaxis] ) / box_extent + 0.5 {dup=iaxis} end # Precompute denominators for iaxis <> denom[iaxis] = 0.0 {id=reinit_denom,dup=iaxis} end for iaxis, mjd <> diff[iaxis, mjd] = if( \ tplt_coord[iaxis] == barycentric_lagrange_points[mjd], \ 1, \ tplt_coord[iaxis] - barycentric_lagrange_points[mjd]) \ {id=diff, dep=reinit_denom, dup=iaxis:mjd} denom[iaxis] = denom[iaxis] + \ barycentric_lagrange_weights[mjd] / diff[iaxis, mjd] \ {id=denom, dep=diff, dup=iaxis:mjd} end for mid # Find the coeff of each mode <> mode_id = box_mode_beg + mid <> mode_id_user = user_mode_ids[mode_id] <> mode_coeff = potential[mode_id_user] # Mode id in each direction for iaxis idx[iaxis] = MODE_INDEX_ASSIGNMENT {id=mode_indices,dup=iaxis} end # Interpolate mode value in each direction for iaxis <> numerator[iaxis] = (barycentric_lagrange_weights[idx[iaxis]] / diff[iaxis, idx[iaxis]]) {id=numer,dep=diff:mode_indices,dup=iaxis} <> mode_val[iaxis] = numerator[iaxis] / denom[iaxis] {id=mode_val,dep=numer:denom,dup=iaxis} end # Fix when target point coincide with a quad point for mkd, iaxis mode_val[iaxis] = if( tplt_coord[iaxis] == barycentric_lagrange_points[mkd], if(mkd == idx[iaxis], 1, 0), mode_val[iaxis]) {id=fix_mode_val, dep=mode_val:mode_indices, dup=iaxis} end <> prod_mode_val = product(iaxis, mode_val[iaxis]) {id=pmod,dep=fix_mode_val,dup=iaxis} end p_out[target_point_id] = p_out[target_point_id] + sum(mid, mode_coeff * prod_mode_val ) {id=p_out,dep=pmod,atomic} end end """.replace( "TARGET_COORDS_ASSIGNMENT", code_target_coords_assignment ) .replace("MODE_INDEX_ASSIGNMENT", code_mode_index_assignment) .replace("Q_ORDER", "q_order"), [ loopy.TemporaryVariable("idx", np.int32, "dim,"), # loopy.TemporaryVariable("denom", dtype, "dim,"), # loopy.TemporaryVariable("diff", dtype, "dim, q_order"), loopy.GlobalArg("box_centers", None, "dim, aligned_nboxes"), loopy.GlobalArg("balls_near_box_lists", None, None), loopy.GlobalArg("multiplicity", None, None, for_atomic=True), loopy.GlobalArg("p_out", None, None, for_atomic=True), loopy.ValueArg("aligned_nboxes", np.int32), loopy.ValueArg("dim", np.int32), loopy.ValueArg("q_order", np.int32), "...", ], lang_version=(2018, 2), ) # }}} End loopy kernel for interpolation # loopy does not directly support object arrays if dim == 1: target_coords_knl_kwargs = {"target_coords_x": target_points[0]} elif dim == 2: target_coords_knl_kwargs = { "target_coords_x": target_points[0], "target_coords_y": target_points[1], } elif dim == 3: target_coords_knl_kwargs = { "target_coords_x": target_points[0], "target_coords_y": target_points[1], "target_coords_z": target_points[2], } else: raise NotImplementedError if potential_in_tree_order: user_mode_ids = cl.array.arange( queue, len(tree.user_source_ids), dtype=tree.user_source_ids.dtype) else: # fetching from user_source_ids converts potential to tree order user_mode_ids = tree.user_source_ids lpknl = loopy.set_options(lpknl, return_dict=True) lpknl = loopy.fix_parameters(lpknl, dim=int(dim), q_order=int(q_order)) lpknl = loopy.split_iname(lpknl, "tbox", 128, outer_tag="g.0", inner_tag="l.0") evt, res_dict = lpknl( queue, p_out=pout, multiplicity=multiplicity, box_centers=tree.box_centers, box_levels=tree.box_levels, balls_near_box_starts=balls_near_box_starts, balls_near_box_lists=balls_near_box_lists, barycentric_lagrange_weights=blweights, barycentric_lagrange_points=blpoints, box_target_starts=tree.box_target_starts, box_target_counts_cumul=tree.box_target_counts_cumul, potential=potential, user_mode_ids=user_mode_ids, target_boxes=traversal.target_boxes, root_extent=tree.root_extent, n_tgt_boxes=len(traversal.target_boxes), **target_coords_knl_kwargs) assert pout is res_dict["p_out"] assert multiplicity is res_dict["multiplicity"] pout.add_event(evt) multiplicity.add_event(evt) return pout / multiplicity
# }}} End free form interpolation of potentials # vim: filetype=pyopencl:fdm=marker