LCOV - code coverage report
Current view: top level - ext/amrex/2d-coverage-g++-24.08/include - AMReX_MultiFabUtil.H (source / functions) Hit Total Coverage
Test: coverage_merged.info Lines: 22 36 61.1 %
Date: 2024-11-18 05:28:54 Functions: 2 20 10.0 %

          Line data    Source code
       1             : #ifndef AMREX_MultiFabUtil_H_
       2             : #define AMREX_MultiFabUtil_H_
       3             : #include <AMReX_Config.H>
       4             : 
       5             : #include <AMReX_MultiFab.H>
       6             : #include <AMReX_iMultiFab.H>
       7             : #include <AMReX_LayoutData.H>
       8             : #include <AMReX_MFIter.H>
       9             : #include <AMReX_Array.H>
      10             : #include <AMReX_Vector.H>
      11             : #include <AMReX_MultiFabUtil_C.H>
      12             : 
      13             : #include <AMReX_MultiFabUtilI.H>
      14             : 
      15             : namespace amrex
      16             : {
      17             :     //! Average nodal-based MultiFab onto cell-centered MultiFab.
      18             :     void average_node_to_cellcenter (MultiFab& cc, int dcomp,
      19             :                                      const MultiFab& nd, int scomp,
      20             :                                      int ncomp, int ngrow = 0);
      21             : 
      22             :     /**
      23             :      * \brief Average edge-based MultiFab onto cell-centered MultiFab.
      24             :      *
      25             :      * This fills in \p ngrow ghost cells in the cell-centered MultiFab. Both cell centered and
      26             :      * edge centered MultiFabs need to have \p ngrow ghost values.
      27             :      */
      28             :     void average_edge_to_cellcenter (MultiFab& cc, int dcomp,
      29             :                                      const Vector<const MultiFab*>& edge,
      30             :                                      int ngrow = 0);
      31             :     //! Average face-based MultiFab onto cell-centered MultiFab.
      32             :     void average_face_to_cellcenter (MultiFab& cc, int dcomp,
      33             :                                      const Vector<const MultiFab*>& fc,
      34             :                                      int ngrow = 0);
      35             :     //! Average face-based FabArray onto cell-centered FabArray.
      36             :     template <typename CMF, typename FMF,
      37             :               std::enable_if_t<IsFabArray_v<CMF> && IsFabArray_v<FMF>, int> = 0>
      38             :     void average_face_to_cellcenter (CMF& cc, int dcomp,
      39             :                                      const Array<const FMF*,AMREX_SPACEDIM>& fc,
      40             :                                      int ngrow = 0);
      41             :     //! Average face-based MultiFab onto cell-centered MultiFab with geometric weighting.
      42             :     void average_face_to_cellcenter (MultiFab& cc,
      43             :                                      const Vector<const MultiFab*>& fc,
      44             :                                      const Geometry& geom);
      45             :     //! Average face-based MultiFab onto cell-centered MultiFab with geometric weighting.
      46             :     void average_face_to_cellcenter (MultiFab& cc,
      47             :                                      const Array<const MultiFab*,AMREX_SPACEDIM>& fc,
      48             :                                      const Geometry& geom);
      49             :     //! Average cell-centered MultiFab onto face-based MultiFab with geometric weighting.
      50             :     void average_cellcenter_to_face (const Vector<MultiFab*>& fc,
      51             :                                      const MultiFab& cc,
      52             :                                      const Geometry& geom,
      53             :                                      int ncomp = 1,
      54             :                                      bool use_harmonic_averaging = false);
      55             :     //! Average cell-centered MultiFab onto face-based MultiFab with geometric weighting.
      56             :     void average_cellcenter_to_face (const Array<MultiFab*,AMREX_SPACEDIM>& fc,
      57             :                                      const MultiFab& cc,
      58             :                                      const Geometry& geom,
      59             :                                      int ncomp = 1,
      60             :                                      bool use_harmonic_averaging = false);
      61             : 
      62             :     //! Average fine face-based FabArray onto crse face-based FabArray.
      63             :     template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> = 0>
      64             :     void average_down_faces (const Vector<const MF*>& fine,
      65             :                              const Vector<MF*>& crse,
      66             :                              const IntVect& ratio,
      67             :                              int ngcrse = 0);
      68             :     //! Average fine face-based FabArray onto crse face-based FabArray.
      69             :     template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> = 0>
      70             :     void average_down_faces (const Vector<const MF*>& fine,
      71             :                              const Vector<MF*>& crse,
      72             :                              int ratio,
      73             :                              int ngcrse = 0);
      74             :     //! Average fine face-based FabArray onto crse face-based FabArray.
      75             :     template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> = 0>
      76             :     void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
      77             :                              const Array<MF*,AMREX_SPACEDIM>& crse,
      78             :                              const IntVect& ratio,
      79             :                              int ngcrse = 0);
      80             :     //! Average fine face-based FabArray onto crse face-based FabArray.
      81             :     template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> = 0>
      82             :     void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
      83             :                              const Array<MF*,AMREX_SPACEDIM>& crse,
      84             :                              int ratio,
      85             :                              int ngcrse = 0);
      86             :     /**
      87             :      * \brief This version does average down for one face direction.
      88             :      *
      89             :      * It uses the IndexType of MultiFabs to determine the direction.
      90             :      * It is expected that one direction is nodal and the rest are cell-centered.
      91             :      */
      92             :     template <typename FAB>
      93             :     void average_down_faces (const FabArray<FAB>& fine, FabArray<FAB>& crse,
      94             :                              const IntVect& ratio, int ngcrse=0);
      95             : 
      96             :     //  This version takes periodicity into account.
      97             :     template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> = 0>
      98             :     void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
      99             :                              const Array<MF*,AMREX_SPACEDIM>& crse,
     100             :                              const IntVect& ratio, const Geometry& crse_geom);
     101             :     //  This version takes periodicity into account.
     102             :     template <typename FAB>
     103             :     void average_down_faces (const FabArray<FAB>& fine, FabArray<FAB>& crse,
     104             :                              const IntVect& ratio, const Geometry& crse_geom);
     105             : 
     106             :     //! Average fine edge-based MultiFab onto crse edge-based MultiFab.
     107             :     void average_down_edges (const Vector<const MultiFab*>& fine,
     108             :                              const Vector<MultiFab*>& crse,
     109             :                              const IntVect& ratio,
     110             :                              int ngcrse = 0);
     111             :     void average_down_edges (const Array<const MultiFab*,AMREX_SPACEDIM>& fine,
     112             :                              const Array<MultiFab*,AMREX_SPACEDIM>& crse,
     113             :                              const IntVect& ratio,
     114             :                              int ngcrse = 0);
     115             :     //! This version does average down for one direction.
     116             :     //! It uses the IndexType of MultiFabs to determine the direction.
     117             :     //! It is expected that one direction is cell-centered and the rest are nodal.
     118             :     void average_down_edges (const MultiFab& fine, MultiFab& crse,
     119             :                              const IntVect& ratio, int ngcrse=0);
     120             : 
     121             :     //! Average fine node-based MultiFab onto crse node-centered MultiFab.
     122             :     template <typename FAB>
     123             :     void average_down_nodal (const FabArray<FAB>& S_fine,
     124             :                              FabArray<FAB>& S_crse,
     125             :                              const IntVect& ratio,
     126             :                              int ngcrse = 0,
     127             :                              bool mfiter_is_definitely_safe=false);
     128             : 
     129             :     /**
     130             :      * \brief Volume weighed average of fine MultiFab onto coarse MultiFab.
     131             :      *
     132             :      * Both MultiFabs are assumed to be cell-centered. This routine DOES NOT assume that
     133             :      * the crse BoxArray is a coarsened version of the fine BoxArray.
     134             :      */
     135             :     void average_down (const MultiFab& S_fine, MultiFab& S_crse,
     136             :                        const Geometry& fgeom, const Geometry& cgeom,
     137             :                        int scomp, int ncomp, const IntVect& ratio);
     138             :     void average_down (const MultiFab& S_fine, MultiFab& S_crse,
     139             :                        const Geometry& fgeom, const Geometry& cgeom,
     140             :                        int scomp, int ncomp, int rr);
     141             : 
     142             :     //! Average MultiFab onto crse MultiFab without volume weighting. This
     143             :     //! routine DOES NOT assume that the crse BoxArray is a coarsened version of
     144             :     //! the fine BoxArray. Work for both cell-centered and nodal MultiFabs.
     145             :     template<typename FAB>
     146             :     void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
     147             :                        int scomp, int ncomp, const IntVect& ratio);
     148             :     template<typename FAB>
     149             :     void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
     150             :                        int scomp, int ncomp, int rr);
     151             : 
     152             :     //! Add a coarsened version of the data contained in the S_fine MultiFab to
     153             :     //! S_crse, including ghost cells.
     154             :     void sum_fine_to_coarse (const MultiFab& S_Fine, MultiFab& S_crse,
     155             :                              int scomp, int ncomp,
     156             :                              const IntVect& ratio,
     157             :                              const Geometry& cgeom, const Geometry& fgeom);
     158             : 
     159             :     //! Output state data for a single zone
     160             :     void print_state (const MultiFab& mf, const IntVect& cell, int n=-1,
     161             :                       const IntVect& ng = IntVect::TheZeroVector());
     162             : 
     163             :     //! Write each fab individually
     164             :     void writeFabs (const MultiFab& mf, const std::string& name);
     165             :     void writeFabs (const MultiFab& mf, int comp, int ncomp, const std::string& name);
     166             : 
     167             :     //! Extract a slice from the given cell-centered MultiFab at coordinate
     168             :     //! "coord" along direction "dir".
     169             :     std::unique_ptr<MultiFab> get_slice_data(int dir, Real coord,
     170             :                                              const MultiFab& cc,
     171             :                                              const Geometry& geom, int start_comp, int ncomp,
     172             :                                              bool interpolate=false);
     173             : 
     174             :     /**
     175             :      * \brief Get data in a cell of MultiFab/FabArray
     176             :      *
     177             :      * This returns a Vector containing the data in a given cell, if it's
     178             :      * available on a process. The returned Vector is empty if a process
     179             :      * does not have the given cell.
     180             :      */
     181             :     template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> FOO = 0>
     182             :     Vector<typename MF::value_type> get_cell_data (MF const& mf, IntVect const& cell);
     183             : 
     184             :     /**
     185             :      * \brief Get data in a line of MultiFab/FabArray
     186             :      *
     187             :      * This returns a MultiFab/FabArray containing the data in a line
     188             :      * specified by a direction and a cell.
     189             :      */
     190             :     template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> FOO = 0>
     191             :     MF get_line_data (MF const& mf, int dir, IntVect const& cell);
     192             : 
     193             :     //! Return an iMultiFab that has the same BoxArray and DistributionMapping
     194             :     //! as the coarse MultiFab cmf. Cells covered by the coarsened fine grids
     195             :     //! are set to fine_value, whereas other cells are set to crse_value.
     196             :     template <typename FAB>
     197             :     iMultiFab makeFineMask (const FabArray<FAB>& cmf, const BoxArray& fba, const IntVect& ratio,
     198             :                             int crse_value = 0, int fine_value = 1);
     199             :     iMultiFab makeFineMask (const BoxArray& cba, const DistributionMapping& cdm,
     200             :                             const BoxArray& fba, const IntVect& ratio,
     201             :                             int crse_value = 0, int fine_value = 1);
     202             :     template <typename FAB>
     203             :     iMultiFab makeFineMask (const FabArray<FAB>& cmf, const BoxArray& fba, const IntVect& ratio,
     204             :                             Periodicity const& period, int crse_value, int fine_value);
     205             :     iMultiFab makeFineMask (const BoxArray& cba, const DistributionMapping& cdm,
     206             :                             const IntVect& cnghost, const BoxArray& fba, const IntVect& ratio,
     207             :                             Periodicity const& period, int crse_value, int fine_value);
     208             :     template <typename FAB>
     209             :     iMultiFab makeFineMask (const FabArray<FAB>& cmf, const FabArray<FAB>& fmf,
     210             :                             const IntVect& cnghost, const IntVect& ratio,
     211             :                             Periodicity const& period, int crse_value, int fine_value);
     212             :     template <typename FAB>
     213             :     iMultiFab makeFineMask (const FabArray<FAB>& cmf, const FabArray<FAB>& fmf,
     214             :                             const IntVect& cnghost, const IntVect& ratio,
     215             :                             Periodicity const& period, int crse_value, int fine_value,
     216             :                             LayoutData<int>& has_cf);
     217             : 
     218             :     MultiFab makeFineMask (const BoxArray& cba, const DistributionMapping& cdm,
     219             :                            const BoxArray& fba, const IntVect& ratio,
     220             :                            Real crse_value, Real fine_value);
     221             : 
     222             :     //! Computes divergence of face-data stored in the umac MultiFab.
     223             :     void computeDivergence (MultiFab& divu, const Array<MultiFab const*,AMREX_SPACEDIM>& umac,
     224             :                             const Geometry& geom);
     225             : 
     226             :     //! Computes gradient of face-data stored in the umac MultiFab.
     227             :     void computeGradient (MultiFab& grad, const Array<MultiFab const*,AMREX_SPACEDIM>& umac,
     228             :                           const Geometry& geom);
     229             : 
     230             :     //! Convert iMultiFab to MultiFab
     231             :     MultiFab ToMultiFab (const iMultiFab& imf);
     232             :     //! Convert iMultiFab to Long
     233             :     FabArray<BaseFab<Long> > ToLongMultiFab (const iMultiFab& imf);
     234             : 
     235             :     //! Periodic shift MultiFab
     236             :     MultiFab periodicShift (MultiFab const& mf, IntVect const& offset,
     237             :                             Periodicity const& period);
     238             : 
     239             :     //! example: auto mf = amrex::cast<MultiFab>(imf);
     240             :     template <typename T, typename U>
     241             :     T cast (U const& mf_in)
     242             :     {
     243             :         T mf_out(mf_in.boxArray(), mf_in.DistributionMap(), mf_in.nComp(), mf_in.nGrowVect());
     244             : 
     245             : #ifdef AMREX_USE_OMP
     246             : #pragma omp parallel if (Gpu::notInLaunchRegion())
     247             : #endif
     248             :         for (MFIter mfi(mf_in); mfi.isValid(); ++mfi)
     249             :         {
     250             :             const Long n = mfi.fabbox().numPts() * mf_in.nComp();
     251             :             auto      * pdst = mf_out[mfi].dataPtr();
     252             :             auto const* psrc = mf_in [mfi].dataPtr();
     253             :             AMREX_HOST_DEVICE_PARALLEL_FOR_1D ( n, i,
     254             :             {
     255             :                 pdst[i] = static_cast<typename U::value_type>(psrc[i]); // NOLINT(bugprone-signed-char-misuse)
     256             :             });
     257             :         }
     258             :         return mf_out;
     259             :     }
     260             : 
     261             :     /**
     262             :      * \brief Reduce FabArray/MultiFab data to a plane.
     263             :      *
     264             :      * This function takes a FabArray/MultiFab and reduces its data to a
     265             :      * plane.  The return data are stored in a BaseFab with only one cell in
     266             :      * the normal direction of the plane.  The index range of the BaseFab in
     267             :      * the other directions is the same as the provided domain Box.  If data
     268             :      * do not exist along a certain line, the value is set to the minimum,
     269             :      * maximum and zero, for reduce max, min and sum, respectively.  The
     270             :      * reduction is local and the user may need to do MPI communication
     271             :      * afterwards if needed.
     272             :      *
     273             :      * In the example code below, the sum along each line at (i,j) in the
     274             :      * z-direction is computed and stored at (i,j,0) of the returned
     275             :      * BaseFab.
     276             :      \verbatim
     277             :          int dir = 2; // z-direction
     278             :          auto const& domain_box = geom.Domain();
     279             :          auto const& ma = mf.const_arrays();
     280             :          auto rr = ReduceToPlane<ReduceOpSum,Real>(dir, domain_box, mf,
     281             :              [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) -> Real
     282             :              {
     283             :                  return ma[box_no](i,j,k); // data at (i,j,k) of Box box_no
     284             :              });
     285             :      \endverbatim
     286             :      *
     287             :      * Below is another example.  This finds the maximum value in the
     288             :      * x-direction and stores the maximum value and the i-index.  An MPI
     289             :      * reduce is then called to further reduce the data to the root process
     290             :      * 0.
     291             :      \verbatim
     292             :          int dir = 0; // x-direction
     293             :          auto const& domain_box = geom.Domain().surroundingNodes(); // nodal data
     294             :          auto const& ma = mf.const_arrays();
     295             :          auto rr = ReduceToPlane<ReduceOpMax,KeyValuePair<Real,int>>
     296             :              (dir, domain_box, mf,
     297             :               [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k)
     298             :                   -> KeyValuePair<Real,int>
     299             :              {
     300             :                  return {ma[box_no](i,j,k), i};
     301             :              });
     302             :          ParallelReduce::Max(rr.dataPtr(), rr.size(), root,
     303             :                              ParallelDescriptor::Communicator());
     304             :          // Process root now has the final results.
     305             :      \endverbatim
     306             :      *
     307             :      * \tparam Op  reduce operator (e.g., ReduceOpSum, ReduceOpMin and ReduceOpMax)
     308             :      * \tparam T   data type of reduction result
     309             :      * \tparam FAB FabArray/MultiFab type
     310             :      * \tparam F   callable type like a lambda function
     311             :      *
     312             :      * \param direction normal direction of the plane (e.g., 0, 1 and 2)
     313             :      * \param domain    domain Box
     314             :      * \param mf        a FabArray/MultiFab object specifying the iteration space
     315             :      * \param f         a callable object returning T.  It takes four ints,
     316             :      *                  where the first int is the local box index and the others
     317             :      *                  are spatial indices for x, y, and z-directions.
     318             :      *
     319             :      * \return reduction result (BaseFab<T>)
     320             :      */
     321             :     template <typename Op, typename T, typename FAB, typename F,
     322             :               std::enable_if_t<IsBaseFab<FAB>::value
     323             : #ifndef AMREX_USE_CUDA
     324             :                                && IsCallableR<T,F,int,int,int,int>::value
     325             : #endif
     326             :                                , int> FOO = 0>
     327             :     BaseFab<T>
     328             :     ReduceToPlane (int direction, Box const& domain, FabArray<FAB> const& mf, F const& f);
     329             : 
     330             :     /**
     331             :      * \brief Sum MultiFab data to line
     332             :      *
     333             :      * Return a HostVector that contains the sum of the given MultiFab data in the plane
     334             :      * with the given normal direction.  The size of the vector is
     335             :      * domain.length(direction) x ncomp.  The vector is actually a 2D array, where the
     336             :      * element for component icomp at spatial index k is at [icomp*ncomp+k].
     337             :      *
     338             :      * \param mf MultiFab data for summing
     339             :      * \param icomp starting component
     340             :      * \param ncomp number of components
     341             :      * \param domain the domain
     342             :      * \param direction the direction of the line
     343             :      * \param local If false, reduce across MPI processes.
     344             :      */
     345             :     Gpu::HostVector<Real> sumToLine (MultiFab const& mf, int icomp, int ncomp,
     346             :                                      Box const& domain, int direction, bool local = false);
     347             : 
     348             :     /** \brief Volume weighted sum for a vector of MultiFabs
     349             :      *
     350             :      * Return a volume weighted sum of MultiFabs of AMR data.  The sum is
     351             :      * perform on a single component of the data.  If the MultiFabs are
     352             :      * built with EB Factories, the cut cell volume fraction will be
     353             :      * included in the weight.
     354             :      */
     355             :     Real volumeWeightedSum (Vector<MultiFab const*> const& mf, int icomp,
     356             :                             Vector<Geometry> const& geom,
     357             :                             Vector<IntVect> const& ratio,
     358             :                             bool local = false);
     359             : 
     360             :     /**
     361             :      * \brief Fourth-order interpolation from fine to coarse level.
     362             :      *
     363             :      * This is for high-order "average-down" of finite-difference data.  If
     364             :      * ghost cell data are used, it's the caller's responsibility to fill
     365             :      * the ghost cells before calling this function.
     366             :      *
     367             :      * \param cmf   coarse data
     368             :      * \param scomp starting component
     369             :      * \param ncomp number of component
     370             :      * \param fmf   fine data
     371             :      * \param ratio refinement ratio.
     372             :      */
     373             :     void FourthOrderInterpFromFineToCoarse (MultiFab& cmf, int scomp, int ncomp,
     374             :                                             MultiFab const& fmf,
     375             :                                             IntVect const& ratio);
     376             : 
     377             :     /**
     378             :      * \brief Fill MultiFab with random numbers from uniform distribution
     379             :      *
     380             :      * The uniform distribution range is [0.0, 1.0) for CPU and SYCL, it's
     381             :      * (0,1] for CUDA and HIP. All cells including ghost cells are filled.
     382             :      *
     383             :      * \param mf    MultiFab
     384             :      * \param scomp starting component
     385             :      * \param ncomp number of component
     386             :      */
     387             :     void FillRandom (MultiFab& mf, int scomp, int ncomp);
     388             : 
     389             :     /**
     390             :      * \brief Fill MultiFab with random numbers from normal distribution
     391             :      *
     392             :      * All cells including ghost cells are filled.
     393             :      *
     394             :      * \param mf     MultiFab
     395             :      * \param scomp  starting component
     396             :      * \param ncomp  number of component
     397             :      * \param mean   mean of normal distribution
     398             :      * \param stddev standard deviation of normal distribution
     399             :      */
     400             :     void FillRandomNormal (MultiFab& mf, int scomp, int ncomp, Real mean, Real stddev);
     401             : 
     402             :     /**
     403             :      * \brief Convexify AMR data
     404             :      *
     405             :      * This function "convexifies" the AMR data by removing cells that are
     406             :      * covered by fine levels from coarse level MultiFabs. This could be
     407             :      * useful for visualization. The returned MultiFabs on coarse levels
     408             :      * have different BoxArrays from the original BoxArrays. For the finest
     409             :      * level, the data is simply copied to the returned object. The returned
     410             :      * MultiFabs have no ghost cells. For nodal data, the nodes on the
     411             :      * coarse/fine interface exist on both levels.
     412             :      */
     413             :     [[nodiscard]] Vector<MultiFab> convexify (Vector<MultiFab const*> const& mf,
     414             :                                 Vector<IntVect> const& refinement_ratio);
     415             : }
     416             : 
     417             : namespace amrex {
     418             : 
     419             : template <typename FAB>
     420             : iMultiFab
     421             : makeFineMask (const FabArray<FAB>& cmf, const BoxArray& fba, const IntVect& ratio,
     422             :               int crse_value, int fine_value)
     423             : {
     424             :     return makeFineMask(cmf.boxArray(), cmf.DistributionMap(), cmf.nGrowVect(),
     425             :                         fba, ratio, Periodicity::NonPeriodic(), crse_value, fine_value);
     426             : }
     427             : 
     428             : template <typename FAB>
     429             : iMultiFab
     430             : makeFineMask (const FabArray<FAB>& cmf, const BoxArray& fba, const IntVect& ratio,
     431             :               Periodicity const& period, int crse_value, int fine_value)
     432             : {
     433             :     return makeFineMask(cmf.boxArray(), cmf.DistributionMap(), cmf.nGrowVect(),
     434             :                         fba, ratio, period, crse_value, fine_value);
     435             : }
     436             : 
     437             : template <typename FAB>
     438             : iMultiFab
     439             : makeFineMask (const FabArray<FAB>& cmf, const FabArray<FAB>& fmf,
     440             :               const IntVect& cnghost, const IntVect& ratio,
     441             :               Periodicity const& period, int crse_value, int fine_value)
     442             : {
     443             :     iMultiFab mask(cmf.boxArray(), cmf.DistributionMap(), 1, cnghost);
     444             :     mask.setVal(crse_value);
     445             : 
     446             :     iMultiFab foo(amrex::coarsen(fmf.boxArray(),ratio), fmf.DistributionMap(),
     447             :                   1, 0, MFInfo().SetAlloc(false));
     448             :     const FabArrayBase::CPC& cpc = mask.getCPC(cnghost,foo,IntVect::TheZeroVector(),period);
     449             :     mask.setVal(fine_value, cpc, 0, 1);
     450             : 
     451             :     return mask;
     452             : }
     453             : 
     454             : template <typename FAB>
     455             : iMultiFab
     456             : makeFineMask (const FabArray<FAB>& cmf, const FabArray<FAB>& fmf,
     457             :               const IntVect& cnghost, const IntVect& ratio,
     458             :               Periodicity const& period, int crse_value, int fine_value,
     459             :               LayoutData<int>& has_cf)
     460             : {
     461             :     iMultiFab mask(cmf.boxArray(), cmf.DistributionMap(), 1, cnghost);
     462             :     mask.setVal(crse_value);
     463             : 
     464             :     iMultiFab foo(amrex::coarsen(fmf.boxArray(),ratio), fmf.DistributionMap(),
     465             :                   1, 0, MFInfo().SetAlloc(false));
     466             :     const FabArrayBase::CPC& cpc = mask.getCPC(cnghost,foo,IntVect::TheZeroVector(),period);
     467             :     mask.setVal(fine_value, cpc, 0, 1);
     468             : 
     469             :     has_cf = mask.RecvLayoutMask(cpc);
     470             : 
     471             :     return mask;
     472             : }
     473             : 
     474             : //! Average fine node-based MultiFab onto crse node-based MultiFab.
     475             : //! This routine assumes that the crse BoxArray is a coarsened version of the fine BoxArray.
     476             : template <typename FAB>
     477           0 : void average_down_nodal (const FabArray<FAB>& fine, FabArray<FAB>& crse,
     478             :                          const IntVect& ratio, int ngcrse, bool mfiter_is_definitely_safe)
     479             : {
     480             :     AMREX_ASSERT(fine.is_nodal());
     481             :     AMREX_ASSERT(crse.is_nodal());
     482             :     AMREX_ASSERT(crse.nComp() == fine.nComp());
     483             : 
     484           0 :     int ncomp = crse.nComp();
     485             :     using value_type = typename FAB::value_type;
     486             : 
     487           0 :     if (mfiter_is_definitely_safe || isMFIterSafe(fine, crse))
     488             :     {
     489             : #ifdef AMREX_USE_OMP
     490             : #pragma omp parallel if (Gpu::notInLaunchRegion())
     491             : #endif
     492           0 :         for (MFIter mfi(crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
     493             :         {
     494           0 :             const Box& bx = mfi.growntilebox(ngcrse);
     495           0 :             Array4<value_type> const& crsearr = crse.array(mfi);
     496           0 :             Array4<value_type const> const& finearr = fine.const_array(mfi);
     497             : 
     498           0 :             AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx,
     499             :             {
     500             :                 amrex_avgdown_nodes(tbx,crsearr,finearr,0,0,ncomp,ratio);
     501             :             });
     502             :         }
     503             :     }
     504             :     else
     505             :     {
     506           0 :         FabArray<FAB> ctmp(amrex::coarsen(fine.boxArray(),ratio), fine.DistributionMap(),
     507             :                            ncomp, ngcrse);
     508           0 :         average_down_nodal(fine, ctmp, ratio, ngcrse);
     509           0 :         crse.ParallelCopy(ctmp,0,0,ncomp,ngcrse,ngcrse);
     510             :     }
     511           0 : }
     512             : 
     513             : // *************************************************************************************************************
     514             : 
     515             : // Average fine cell-based MultiFab onto crse cell-centered MultiFab.
     516             : // We do NOT assume that the coarse layout is a coarsened version of the fine layout.
     517             : // This version does NOT use volume-weighting
     518             : template<typename FAB>
     519          46 : void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse, int scomp, int ncomp, int rr)
     520             : {
     521          92 :     average_down(S_fine,S_crse,scomp,ncomp,rr*IntVect::TheUnitVector());
     522          46 : }
     523             : 
     524             : template<typename FAB>
     525       30730 : void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
     526             :                    int scomp, int ncomp, const IntVect& ratio)
     527             : {
     528             :     BL_PROFILE("amrex::average_down");
     529             :     AMREX_ASSERT(S_crse.nComp() == S_fine.nComp());
     530             :     AMREX_ASSERT((S_crse.is_cell_centered() && S_fine.is_cell_centered()) ||
     531             :                  (S_crse.is_nodal()         && S_fine.is_nodal()));
     532             : 
     533             :     using value_type = typename FAB::value_type;
     534             : 
     535       30730 :     bool is_cell_centered = S_crse.is_cell_centered();
     536             : 
     537             :     //
     538             :     // Coarsen() the fine stuff on processors owning the fine data.
     539             :     //
     540       61460 :     BoxArray crse_S_fine_BA = S_fine.boxArray(); crse_S_fine_BA.coarsen(ratio);
     541             : 
     542       30730 :     if (crse_S_fine_BA == S_crse.boxArray() && S_fine.DistributionMap() == S_crse.DistributionMap())
     543             :     {
     544             : #ifdef AMREX_USE_GPU
     545             :         if (Gpu::inLaunchRegion() && S_crse.isFusingCandidate()) {
     546             :             auto const& crsema = S_crse.arrays();
     547             :             auto const& finema = S_fine.const_arrays();
     548             :             if (is_cell_centered) {
     549             :                 ParallelFor(S_crse, IntVect(0), ncomp,
     550             :                             [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
     551             :                             {
     552             :                                 amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
     553             :                             });
     554             :             } else {
     555             :                 ParallelFor(S_crse, IntVect(0), ncomp,
     556             :                             [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
     557             :                             {
     558             :                                 amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
     559             :                             });
     560             :             }
     561             :             if (!Gpu::inNoSyncRegion()) {
     562             :                 Gpu::streamSynchronize();
     563             :             }
     564             :         } else
     565             : #endif
     566             :         {
     567             : #ifdef AMREX_USE_OMP
     568             : #pragma omp parallel if (Gpu::notInLaunchRegion())
     569             : #endif
     570        7232 :             for (MFIter mfi(S_crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
     571             :             {
     572             :                 //  NOTE: The tilebox is defined at the coarse level.
     573        3616 :                 const Box& bx = mfi.tilebox();
     574        3616 :                 Array4<value_type> const& crsearr = S_crse.array(mfi);
     575        3616 :                 Array4<value_type const> const& finearr = S_fine.const_array(mfi);
     576             : 
     577        3616 :                 if (is_cell_centered) {
     578           0 :                     AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
     579             :                                                       {
     580             :                                                           amrex_avgdown(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
     581             :                                                       });
     582             :                 } else {
     583      225048 :                     AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
     584             :                                                       {
     585             :                                                           amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
     586             :                                                       });
     587             :                 }
     588             :             }
     589             :         }
     590             :     }
     591             :     else
     592             :     {
     593       81342 :         FabArray<FAB> crse_S_fine(crse_S_fine_BA, S_fine.DistributionMap(), ncomp, 0, MFInfo(),DefaultFabFactory<FAB>());
     594             : 
     595             : #ifdef AMREX_USE_GPU
     596             :         if (Gpu::inLaunchRegion() && crse_S_fine.isFusingCandidate()) {
     597             :             auto const& crsema = crse_S_fine.arrays();
     598             :             auto const& finema = S_fine.const_arrays();
     599             :             if (is_cell_centered) {
     600             :                 ParallelFor(crse_S_fine, IntVect(0), ncomp,
     601             :                             [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
     602             :                             {
     603             :                                 amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
     604             :                             });
     605             :             } else {
     606             :                 ParallelFor(crse_S_fine, IntVect(0), ncomp,
     607             :                             [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
     608             :                             {
     609             :                                 amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
     610             :                             });
     611             :             }
     612             :             if (!Gpu::inNoSyncRegion()) {
     613             :                 Gpu::streamSynchronize();
     614             :             }
     615             :         } else
     616             : #endif
     617             :         {
     618             : #ifdef AMREX_USE_OMP
     619             : #pragma omp parallel if (Gpu::notInLaunchRegion())
     620             : #endif
     621       85778 :             for (MFIter mfi(crse_S_fine,TilingIfNotGPU()); mfi.isValid(); ++mfi)
     622             :             {
     623             :                 //  NOTE: The tilebox is defined at the coarse level.
     624       58664 :                 const Box& bx = mfi.tilebox();
     625       58664 :                 Array4<value_type> const& crsearr = crse_S_fine.array(mfi);
     626       58664 :                 Array4<value_type const> const& finearr = S_fine.const_array(mfi);
     627             : 
     628             :                 //  NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab
     629             :                 //        because the crse fab is a temporary which was made starting at comp 0, it is
     630             :                 //        not part of the actual crse multifab which came in.
     631             : 
     632       58664 :                 if (is_cell_centered) {
     633           0 :                     AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
     634             :                                                       {
     635             :                                                           amrex_avgdown(i,j,k,n,crsearr,finearr,0,scomp,ratio);
     636             :                                                       });
     637             :                 } else {
     638    17710800 :                     AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
     639             :                                                       {
     640             :                                                           amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,0,scomp,ratio);
     641             :                                                       });
     642             :                 }
     643             :             }
     644             :         }
     645             : 
     646       27114 :         S_crse.ParallelCopy(crse_S_fine,0,scomp,ncomp);
     647             :     }
     648       30730 : }
     649             : 
     650             : 
     651             : 
     652             : 
     653             : 
     654             : /**
     655             :  * \brief Returns part of a norm based on two MultiFabs.
     656             :  *
     657             :  * The MultiFabs MUST have the same underlying BoxArray.
     658             :  * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
     659             :  * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
     660             :  */
     661             : template <typename F>
     662             : Real
     663             : NormHelper (const MultiFab& x, int xcomp,
     664             :             const MultiFab& y, int ycomp,
     665             :             F const& f,
     666             :             int numcomp, IntVect nghost, bool local)
     667             : {
     668             :     BL_ASSERT(x.boxArray() == y.boxArray());
     669             :     BL_ASSERT(x.DistributionMap() == y.DistributionMap());
     670             :     BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
     671             : 
     672             :     Real sm = Real(0.0);
     673             : #ifdef AMREX_USE_GPU
     674             :     if (Gpu::inLaunchRegion()) {
     675             :         auto const& xma = x.const_arrays();
     676             :         auto const& yma = y.const_arrays();
     677             :         sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<Real>{}, x, nghost,
     678             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<Real>
     679             :         {
     680             :             Real t = Real(0.0);
     681             :             auto const& xfab = xma[box_no];
     682             :             auto const& yfab = yma[box_no];
     683             :             for (int n = 0; n < numcomp; ++n) {
     684             :                 t += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
     685             :             }
     686             :             return t;
     687             :         });
     688             :     } else
     689             : #endif
     690             :     {
     691             : #ifdef AMREX_USE_OMP
     692             : #pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
     693             : #endif
     694             :         for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
     695             :         {
     696             :             Box const& bx = mfi.growntilebox(nghost);
     697             :             Array4<Real const> const& xfab = x.const_array(mfi);
     698             :             Array4<Real const> const& yfab = y.const_array(mfi);
     699             :             AMREX_LOOP_4D(bx, numcomp, i, j, k, n,
     700             :             {
     701             :                 sm += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
     702             :             });
     703             :         }
     704             :     }
     705             : 
     706             :     if (!local) {
     707             :         ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
     708             :     }
     709             : 
     710             :     return sm;
     711             : }
     712             : 
     713             : /**
     714             :  * \brief Returns part of a norm based on three MultiFabs
     715             :  *
     716             :  * The MultiFabs MUST have the same underlying BoxArray.
     717             :  * The Predicate pf is used to test the mask
     718             :  * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
     719             :  * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
     720             :  */
     721             : template <typename MMF, typename Pred, typename F>
     722             : Real
     723             : NormHelper (const MMF& mask,
     724             :                const MultiFab& x, int xcomp,
     725             :                const MultiFab& y, int ycomp,
     726             :                Pred const& pf,
     727             :                F const& f,
     728             :                int numcomp, IntVect nghost, bool local)
     729             : {
     730             :     BL_ASSERT(x.boxArray() == y.boxArray());
     731             :     BL_ASSERT(x.boxArray() == mask.boxArray());
     732             :     BL_ASSERT(x.DistributionMap() == y.DistributionMap());
     733             :     BL_ASSERT(x.DistributionMap() == mask.DistributionMap());
     734             :     BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
     735             :     BL_ASSERT(mask.nGrowVect().allGE(nghost));
     736             : 
     737             :     Real sm = Real(0.0);
     738             : #ifdef AMREX_USE_GPU
     739             :     if (Gpu::inLaunchRegion()) {
     740             :         auto const& xma = x.const_arrays();
     741             :         auto const& yma = y.const_arrays();
     742             :         auto const& mma = mask.const_arrays();
     743             :         sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<Real>{}, x, nghost,
     744             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<Real>
     745             :         {
     746             :             Real t = Real(0.0);
     747             :             if (pf(mma[box_no](i,j,k))) {
     748             :                 auto const& xfab = xma[box_no];
     749             :                 auto const& yfab = yma[box_no];
     750             :                 for (int n = 0; n < numcomp; ++n) {
     751             :                     t += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
     752             :                 }
     753             :             }
     754             :             return t;
     755             :         });
     756             :     } else
     757             : #endif
     758             :     {
     759             : #ifdef AMREX_USE_OMP
     760             : #pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
     761             : #endif
     762             :         for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
     763             :         {
     764             :             Box const& bx = mfi.growntilebox(nghost);
     765             :             Array4<Real const> const& xfab = x.const_array(mfi);
     766             :             Array4<Real const> const& yfab = y.const_array(mfi);
     767             :             auto const& mfab = mask.const_array(mfi);
     768             :             AMREX_LOOP_4D(bx, numcomp, i, j, k, n,
     769             :             {
     770             :                 if (pf(mfab(i,j,k))) {
     771             :                     sm += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
     772             :                 }
     773             :             });
     774             :         }
     775             :     }
     776             : 
     777             :     if (!local) {
     778             :         ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
     779             :     }
     780             : 
     781             :     return sm;
     782             : }
     783             : 
     784             : template <typename CMF, typename FMF,
     785             :           std::enable_if_t<IsFabArray_v<CMF> && IsFabArray_v<FMF>, int> FOO>
     786             : void average_face_to_cellcenter (CMF& cc, int dcomp,
     787             :                                  const Array<const FMF*,AMREX_SPACEDIM>& fc,
     788             :                                  int ngrow)
     789             : {
     790             :     AMREX_ASSERT(cc.nComp() >= dcomp + AMREX_SPACEDIM);
     791             :     AMREX_ASSERT(fc[0]->nComp() == 1);
     792             : 
     793             : #ifdef AMREX_USE_GPU
     794             :     if (Gpu::inLaunchRegion() && cc.isFusingCandidate()) {
     795             :         auto const& ccma = cc.arrays();
     796             :         AMREX_D_TERM(auto const& fxma = fc[0]->const_arrays();,
     797             :                      auto const& fyma = fc[1]->const_arrays();,
     798             :                      auto const& fzma = fc[2]->const_arrays(););
     799             :         ParallelFor(cc, IntVect(ngrow),
     800             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
     801             :         {
     802             : #if (AMREX_SPACEDIM == 1)
     803             :             GeometryData gd{};
     804             :             gd.coord = 0;
     805             : #endif
     806             :             amrex_avg_fc_to_cc(i,j,k, ccma[box_no], AMREX_D_DECL(fxma[box_no],
     807             :                                                                  fyma[box_no],
     808             :                                                                  fzma[box_no]),
     809             :                                dcomp
     810             : #if (AMREX_SPACEDIM == 1)
     811             :                                , gd
     812             : #endif
     813             :                 );
     814             :         });
     815             :         if (!Gpu::inNoSyncRegion()) {
     816             :             Gpu::streamSynchronize();
     817             :         }
     818             :     } else
     819             : #endif
     820             :     {
     821             : #ifdef AMREX_USE_OMP
     822             : #pragma omp parallel if (Gpu::notInLaunchRegion())
     823             : #endif
     824             :         for (MFIter mfi(cc,TilingIfNotGPU()); mfi.isValid(); ++mfi)
     825             :         {
     826             :             const Box bx = mfi.growntilebox(ngrow);
     827             :             auto const& ccarr = cc.array(mfi);
     828             :             AMREX_D_TERM(auto const& fxarr = fc[0]->const_array(mfi);,
     829             :                          auto const& fyarr = fc[1]->const_array(mfi);,
     830             :                          auto const& fzarr = fc[2]->const_array(mfi););
     831             : 
     832             : #if (AMREX_SPACEDIM == 1)
     833             :             AMREX_HOST_DEVICE_PARALLEL_FOR_3D( bx, i, j, k,
     834             :             {
     835             :                 GeometryData gd;
     836             :                 gd.coord = 0;
     837             :                 amrex_avg_fc_to_cc(i,j,k, ccarr, fxarr, dcomp, gd);
     838             :             });
     839             : #else
     840             :             AMREX_HOST_DEVICE_PARALLEL_FOR_3D( bx, i, j, k,
     841             :             {
     842             :                 amrex_avg_fc_to_cc(i,j,k, ccarr, AMREX_D_DECL(fxarr,fyarr,fzarr), dcomp);
     843             :             });
     844             : #endif
     845             :         }
     846             :     }
     847             : }
     848             : 
     849             : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int>>
     850             : void average_down_faces (const Vector<const MF*>& fine,
     851             :                          const Vector<MF*>& crse,
     852             :                          const IntVect& ratio, int ngcrse)
     853             : {
     854             :     AMREX_ASSERT(fine.size() == AMREX_SPACEDIM && crse.size() == AMREX_SPACEDIM);
     855             :     average_down_faces(Array<const MF*,AMREX_SPACEDIM>
     856             :                                {{AMREX_D_DECL(fine[0],fine[1],fine[2])}},
     857             :                        Array<MF*,AMREX_SPACEDIM>
     858             :                                {{AMREX_D_DECL(crse[0],crse[1],crse[2])}},
     859             :                        ratio, ngcrse);
     860             : }
     861             : 
     862             : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int>>
     863             : void average_down_faces (const Vector<const MF*>& fine,
     864             :                          const Vector<MF*>& crse, int ratio, int ngcrse)
     865             : {
     866             :     average_down_faces(fine,crse,IntVect{ratio},ngcrse);
     867             : }
     868             : 
     869             : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int>>
     870             : void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
     871             :                          const Array<MF*,AMREX_SPACEDIM>& crse,
     872             :                          int ratio, int ngcrse)
     873             : {
     874             :     average_down_faces(fine,crse,IntVect{ratio},ngcrse);
     875             : }
     876             : 
     877             : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int>>
     878             : void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
     879             :                          const Array<MF*,AMREX_SPACEDIM>& crse,
     880             :                          const IntVect& ratio, int ngcrse)
     881             : {
     882             :     for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
     883             :         average_down_faces(*fine[idim], *crse[idim], ratio, ngcrse);
     884             :     }
     885             : }
     886             : 
     887             : template <typename FAB>
     888             : void average_down_faces (const FabArray<FAB>& fine, FabArray<FAB>& crse,
     889             :                          const IntVect& ratio, int ngcrse)
     890             : {
     891             :     BL_PROFILE("average_down_faces");
     892             : 
     893             :     AMREX_ASSERT(crse.nComp() == fine.nComp());
     894             :     AMREX_ASSERT(fine.ixType() == crse.ixType());
     895             :     const auto type = fine.ixType();
     896             :     int dir;
     897             :     for (dir = 0; dir < AMREX_SPACEDIM; ++dir) {
     898             :         if (type.nodeCentered(dir)) { break; }
     899             :     }
     900             :     auto tmptype = type;
     901             :     tmptype.unset(dir);
     902             :     if (dir >= AMREX_SPACEDIM || !tmptype.cellCentered()) {
     903             :         amrex::Abort("average_down_faces: not face index type");
     904             :     }
     905             :     const int ncomp = crse.nComp();
     906             :     if (isMFIterSafe(fine, crse))
     907             :     {
     908             : #ifdef AMREX_USE_GPU
     909             :         if (Gpu::inLaunchRegion() && crse.isFusingCandidate()) {
     910             :             auto const& crsema = crse.arrays();
     911             :             auto const& finema = fine.const_arrays();
     912             :             ParallelFor(crse, IntVect(ngcrse), ncomp,
     913             :             [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
     914             :             {
     915             :                 amrex_avgdown_faces(i,j,k,n, crsema[box_no], finema[box_no], 0, 0, ratio, dir);
     916             :             });
     917             :             if (!Gpu::inNoSyncRegion()) {
     918             :                 Gpu::streamSynchronize();
     919             :             }
     920             :         } else
     921             : #endif
     922             :         {
     923             : #ifdef AMREX_USE_OMP
     924             : #pragma omp parallel if (Gpu::notInLaunchRegion())
     925             : #endif
     926             :             for (MFIter mfi(crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
     927             :             {
     928             :                 const Box& bx = mfi.growntilebox(ngcrse);
     929             :                 auto const& crsearr = crse.array(mfi);
     930             :                 auto const& finearr = fine.const_array(mfi);
     931             :                 AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
     932             :                 {
     933             :                     amrex_avgdown_faces(i,j,k,n, crsearr, finearr, 0, 0, ratio, dir);
     934             :                 });
     935             :             }
     936             :         }
     937             :     }
     938             :     else
     939             :     {
     940             :         FabArray<FAB> ctmp(amrex::coarsen(fine.boxArray(),ratio), fine.DistributionMap(),
     941             :                       ncomp, ngcrse, MFInfo(), DefaultFabFactory<FAB>());
     942             :         average_down_faces(fine, ctmp, ratio, ngcrse);
     943             :         crse.ParallelCopy(ctmp,0,0,ncomp,ngcrse,ngcrse);
     944             :     }
     945             : }
     946             : 
     947             : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int>>
     948             : void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
     949             :                          const Array<MF*,AMREX_SPACEDIM>& crse,
     950             :                          const IntVect& ratio, const Geometry& crse_geom)
     951             : {
     952             :     for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
     953             :         average_down_faces(*fine[idim], *crse[idim], ratio, crse_geom);
     954             :     }
     955             : }
     956             : 
     957             : template <typename FAB>
     958             : void average_down_faces (const FabArray<FAB>& fine, FabArray<FAB>& crse,
     959             :                          const IntVect& ratio, const Geometry& crse_geom)
     960             : {
     961             :     FabArray<FAB> ctmp(amrex::coarsen(fine.boxArray(),ratio), fine.DistributionMap(),
     962             :                   crse.nComp(), 0);
     963             :     average_down_faces(fine, ctmp, ratio, 0);
     964             :     crse.ParallelCopy(ctmp,0,0,crse.nComp(),0,0,crse_geom.periodicity());
     965             : }
     966             : 
     967             : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> FOO>
     968             : Vector<typename MF::value_type> get_cell_data (MF const& mf, IntVect const& cell)
     969             : {
     970             :     using T = typename MF::value_type;
     971             :     const int ncomp = mf.nComp();
     972             :     Gpu::DeviceVector<T> dv(ncomp);
     973             :     auto* dp = dv.data();
     974             :     bool found = false;
     975             :     auto loc = cell.dim3();
     976             :     for (MFIter mfi(mf); mfi.isValid() && !found; ++mfi)
     977             :     {
     978             :         Box const& box = mfi.validbox();
     979             :         if (box.contains(cell)) {
     980             :             found = true;
     981             :             auto const& fab = mf.const_array(mfi);
     982             :             amrex::ParallelFor(1, [=] AMREX_GPU_DEVICE (int) noexcept
     983             :             {
     984             :                 for (int n = 0; n < ncomp; ++n) {
     985             :                     dp[n] = fab(loc.x,loc.y,loc.z,n);
     986             :                 }
     987             :             });
     988             :         }
     989             :     }
     990             :     Vector<T> hv;
     991             :     if (found) {
     992             :         hv.resize(ncomp);
     993             :         Gpu::copy(Gpu::deviceToHost, dv.begin(), dv.end(), hv.begin());
     994             :     }
     995             :     return hv;
     996             : }
     997             : 
     998             : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> FOO>
     999             : MF get_line_data (MF const& mf, int dir, IntVect const& cell)
    1000             : {
    1001             :     BoxArray const& ba = mf.boxArray();
    1002             :     DistributionMapping const& dm = mf.DistributionMap();
    1003             :     const auto nboxes = static_cast<int>(ba.size());
    1004             : 
    1005             :     BoxList bl(ba.ixType());
    1006             :     Vector<int> procmap;
    1007             :     Vector<int> index_map;
    1008             :     for (int i = 0; i < nboxes; ++i) {
    1009             :         Box const& b = ba[i];
    1010             :         IntVect lo = cell;
    1011             :         lo[dir] = b.smallEnd(dir);
    1012             :         if (b.contains(lo)) {
    1013             :             IntVect hi = lo;
    1014             :             hi[dir] = b.bigEnd(dir);
    1015             :             Box b1d(lo,hi,b.ixType());
    1016             :             bl.push_back(b1d);
    1017             :             procmap.push_back(dm[i]);
    1018             :             index_map.push_back(i);
    1019             :         }
    1020             :     }
    1021             : 
    1022             :     if (bl.isEmpty()) {
    1023             :         return MF();
    1024             :     } else {
    1025             :         BoxArray rba(std::move(bl));
    1026             :         DistributionMapping rdm(std::move(procmap));
    1027             :         MF rmf(rba, rdm, mf.nComp(), IntVect(0),
    1028             :                MFInfo().SetArena(mf.arena()));
    1029             : #ifdef AMREX_USE_OMP
    1030             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    1031             : #endif
    1032             :         for (MFIter mfi(rmf); mfi.isValid(); ++mfi) {
    1033             :             Box const& b = mfi.validbox();
    1034             :             auto const& dfab = rmf.array(mfi);
    1035             :             auto const& sfab = mf.const_array(index_map[mfi.index()]);
    1036             :             amrex::ParallelFor(b, mf.nComp(),
    1037             :             [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
    1038             :             {
    1039             :                 dfab(i,j,k,n) = sfab(i,j,k,n);
    1040             :             });
    1041             :         }
    1042             :         return rmf;
    1043             :     }
    1044             : }
    1045             : 
    1046             : template <typename Op, typename T, typename FAB, typename F,
    1047             :           std::enable_if_t<IsBaseFab<FAB>::value
    1048             : #ifndef AMREX_USE_CUDA
    1049             :                            && IsCallableR<T,F,int,int,int,int>::value
    1050             : #endif
    1051             :                            , int> FOO>
    1052             : BaseFab<T>
    1053             : ReduceToPlane (int direction, Box const& domain, FabArray<FAB> const& mf, F const& f)
    1054             : {
    1055             :     Box domain2d = domain;
    1056             :     domain2d.setRange(direction, 0);
    1057             : 
    1058             :     T initval;
    1059             :     Op().init(initval);
    1060             : 
    1061             :     BaseFab<T> r(domain2d);
    1062             :     r.template setVal<RunOn::Device>(initval);
    1063             :     auto const& ar = r.array();
    1064             : 
    1065             :     for (MFIter mfi(mf,MFItInfo().UseDefaultStream().DisableDeviceSync());
    1066             :          mfi.isValid(); ++mfi)
    1067             :     {
    1068             :         Box bx = mfi.validbox() & domain;
    1069             :         if (bx.ok()) {
    1070             :             int box_no = mfi.LocalIndex();
    1071             : #if defined(AMREX_USE_GPU)
    1072             :             Box b2d = bx;
    1073             :             b2d.setRange(direction,0);
    1074             :             const auto blo = amrex::lbound(bx);
    1075             :             const auto len = amrex::length(bx);
    1076             :             constexpr int nthreads = 128;
    1077             :             auto nblocks = static_cast<int>(b2d.numPts());
    1078             : #ifdef AMREX_USE_SYCL
    1079             :             constexpr std::size_t shared_mem_bytes = sizeof(T)*Gpu::Device::warp_size;
    1080             :             amrex::launch<nthreads>(nblocks, shared_mem_bytes, Gpu::gpuStream(),
    1081             :                                     [=] AMREX_GPU_DEVICE (Gpu::Handler const& h)
    1082             :             {
    1083             :                 int bid = h.blockIdx();
    1084             :                 int tid = h.threadIdx();
    1085             : #else
    1086             :             amrex::launch<nthreads>(nblocks, Gpu::gpuStream(),
    1087             :                                     [=] AMREX_GPU_DEVICE ()
    1088             :             {
    1089             :                 int bid = blockIdx.x;
    1090             :                 int tid = threadIdx.x;
    1091             : #endif
    1092             :                 T tmp;
    1093             :                 Op().init(tmp);
    1094             :                 T* p;
    1095             :                 if (direction == 0) {
    1096             :                     int k = bid /   len.y;
    1097             :                     int j = bid - k*len.y;
    1098             :                     k += blo.z;
    1099             :                     j += blo.y;
    1100             :                     for (int i = blo.x + tid; i < blo.x+len.x; i += nthreads) {
    1101             :                         Op().local_update(tmp, f(box_no,i,j,k));
    1102             :                     }
    1103             :                     p = ar.ptr(0,j,k);
    1104             :                 } else if (direction == 1) {
    1105             :                     int k = bid /   len.x;
    1106             :                     int i = bid - k*len.x;
    1107             :                     k += blo.z;
    1108             :                     i += blo.x;
    1109             :                     for (int j = blo.y + tid; j < blo.y+len.y; j += nthreads) {
    1110             :                         Op().local_update(tmp, f(box_no,i,j,k));
    1111             :                     }
    1112             :                     p = ar.ptr(i,0,k);
    1113             :                 } else {
    1114             :                     int j = bid /   len.x;
    1115             :                     int i = bid - j*len.x;
    1116             :                     j += blo.y;
    1117             :                     i += blo.x;
    1118             :                     for (int k = blo.z + tid; k < blo.z+len.z; k += nthreads) {
    1119             :                         Op().local_update(tmp, f(box_no,i,j,k));
    1120             :                     }
    1121             :                     p = ar.ptr(i,j,0);
    1122             :                 }
    1123             : #ifdef AMREX_USE_SYCL
    1124             :                 Op().template parallel_update<T>(*p, tmp, h);
    1125             : #else
    1126             :                 Op().template parallel_update<T,nthreads>(*p, tmp);
    1127             : #endif
    1128             :             });
    1129             : #else
    1130             :             // CPU
    1131             :             if (direction == 0) {
    1132             :                 AMREX_LOOP_3D(bx, i, j, k,
    1133             :                 {
    1134             :                     Op().local_update(ar(0,j,k), f(box_no,i,j,k));
    1135             :                 });
    1136             :             } else if (direction == 1) {
    1137             :                 AMREX_LOOP_3D(bx, i, j, k,
    1138             :                 {
    1139             :                     Op().local_update(ar(i,0,k), f(box_no,i,j,k));
    1140             :                 });
    1141             :             } else {
    1142             :                 AMREX_LOOP_3D(bx, i, j, k,
    1143             :                 {
    1144             :                     Op().local_update(ar(i,j,0), f(box_no,i,j,k));
    1145             :                 });
    1146             :             }
    1147             : #endif
    1148             :         }
    1149             :     }
    1150             :     Gpu::streamSynchronize();
    1151             : 
    1152             :     return r;
    1153             : }
    1154             : 
    1155             : }
    1156             : 
    1157             : #endif

Generated by: LCOV version 1.14