LCOV - code coverage report
Current view: top level - ext/amrex/2d-coverage-g++-24.08/include - AMReX_FabArray.H (source / functions) Hit Total Coverage
Test: coverage_merged.info Lines: 230 259 88.8 %
Date: 2024-11-18 05:28:54 Functions: 311 828 37.6 %

          Line data    Source code
       1             : 
       2             : #ifndef BL_FABARRAY_H
       3             : #define BL_FABARRAY_H
       4             : #include <AMReX_Config.H>
       5             : 
       6             : #include <AMReX_BLassert.H>
       7             : #include <AMReX_Array.H>
       8             : #include <AMReX_Vector.H>
       9             : #include <AMReX_Box.H>
      10             : #include <AMReX.H>
      11             : #include <AMReX_BoxArray.H>
      12             : #include <AMReX_BoxDomain.H>
      13             : #include <AMReX_FabFactory.H>
      14             : #include <AMReX_DistributionMapping.H>
      15             : #include <AMReX_Geometry.H>
      16             : #include <AMReX_ParallelDescriptor.H>
      17             : #include <AMReX_Utility.H>
      18             : #include <AMReX_ccse-mpi.H>
      19             : #include <AMReX_BLProfiler.H>
      20             : #include <AMReX_Periodicity.H>
      21             : #include <AMReX_Print.H>
      22             : #include <AMReX_FabArrayBase.H>
      23             : #include <AMReX_MFIter.H>
      24             : #include <AMReX_MakeType.H>
      25             : #include <AMReX_TypeTraits.H>
      26             : #include <AMReX_LayoutData.H>
      27             : #include <AMReX_BaseFab.H>
      28             : #include <AMReX_BaseFabUtility.H>
      29             : #include <AMReX_MFParallelFor.H>
      30             : #include <AMReX_TagParallelFor.H>
      31             : #include <AMReX_ParReduce.H>
      32             : 
      33             : #include <AMReX_Gpu.H>
      34             : 
      35             : #ifdef AMREX_USE_EB
      36             : #include <AMReX_EBFabFactory.H>
      37             : #endif
      38             : 
      39             : #ifdef AMREX_USE_OMP
      40             : #include <omp.h>
      41             : #endif
      42             : 
      43             : #include <algorithm>
      44             : #include <cstring>
      45             : #include <limits>
      46             : #include <map>
      47             : #include <memory>
      48             : #include <utility>
      49             : #include <set>
      50             : #include <string>
      51             : #include <vector>
      52             : 
      53             : 
      54             : namespace amrex {
      55             : 
      56             : template <typename T, std::enable_if_t<!IsBaseFab<T>::value,int> = 0>
      57             : Long nBytesOwned (T const&) noexcept { return 0; }
      58             : 
      59             : template <typename T>
      60     6848511 : Long nBytesOwned (BaseFab<T> const& fab) noexcept { return fab.nBytesOwned(); }
      61             : 
      62             : /**
      63             :  * \brief FabArray memory allocation information
      64             :  */
      65             : struct MFInfo {
      66             :     // alloc: allocate memory or not
      67             :     bool    alloc = true;
      68             :     bool    alloc_single_chunk = FabArrayBase::getAllocSingleChunk();
      69             :     Arena*  arena = nullptr;
      70             :     Vector<std::string> tags;
      71             : 
      72           0 :     MFInfo& SetAlloc (bool a) noexcept { alloc = a; return *this; }
      73             : 
      74             :     MFInfo& SetAllocSingleChunk (bool a) noexcept { alloc_single_chunk = a; return *this; }
      75             : 
      76             :     MFInfo& SetArena (Arena* ar) noexcept { arena = ar; return *this; }
      77             : 
      78             :     MFInfo& SetTag () noexcept { return *this; }
      79             : 
      80             :     MFInfo& SetTag (const char* t) noexcept {
      81             :         tags.emplace_back(t);
      82             :         return *this;
      83             :     }
      84             : 
      85             :     MFInfo& SetTag (const std::string& t) noexcept {
      86             :         tags.emplace_back(t);
      87             :         return *this;
      88             :     }
      89             : 
      90             :     template <typename T, typename... Ts>
      91             :     MFInfo& SetTag (T&& t, Ts&&... ts) noexcept {
      92             :         tags.emplace_back(std::forward<T>(t));
      93             :         return SetTag(std::forward<Ts>(ts)...);
      94             :     }
      95             : };
      96             : 
      97             : struct TheFaArenaDeleter {
      98             :     using pointer = char*;
      99             :     void operator()(pointer p) const noexcept {
     100             :         The_Comms_Arena()->free(p);
     101             :     }
     102             : };
     103             : using TheFaArenaPointer = std::unique_ptr<char, TheFaArenaDeleter>;
     104             : 
     105             : // Data used in non-blocking fill boundary.
     106             : template <class FAB>
     107             : struct FBData {
     108             : 
     109             :     const FabArrayBase::FB*  fb = nullptr;
     110             :     int                 scomp;
     111             :     int                 ncomp;
     112             : 
     113             :     //
     114             :     char*               the_recv_data = nullptr;
     115             :     char*               the_send_data = nullptr;
     116             :     Vector<int>         recv_from;
     117             :     Vector<char*>       recv_data;
     118             :     Vector<std::size_t> recv_size;
     119             :     Vector<MPI_Request> recv_reqs;
     120             :     Vector<MPI_Status>  recv_stat;
     121             :     //
     122             :     Vector<char*>       send_data;
     123             :     Vector<MPI_Request> send_reqs;
     124             :     int                 tag;
     125             : 
     126             : };
     127             : 
     128             : // Data used in non-blocking parallel copy.
     129             : template <class FAB>
     130             : struct PCData {
     131             : 
     132             :     const FabArrayBase::CPC*  cpc = nullptr;
     133             :     const FabArray<FAB>*      src = nullptr;
     134             :     FabArrayBase::CpOp  op;
     135             :     int                 tag = -1;
     136             :     int                 actual_n_rcvs = -1;
     137             :     int                 SC = -1, NC = -1, DC = -1;
     138             : 
     139             :     char*               the_recv_data = nullptr;
     140             :     char*               the_send_data = nullptr;
     141             :     Vector<int>         recv_from;
     142             :     Vector<char*>       recv_data;
     143             :     Vector<std::size_t> recv_size;
     144             :     Vector<MPI_Request> recv_reqs;
     145             :     Vector<MPI_Request> send_reqs;
     146             : 
     147             : };
     148             : 
     149             : template <typename T>
     150             : struct MultiArray4
     151             : {
     152             :     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
     153             :     Array4<T> const& operator[] (int li) const noexcept {
     154             :         AMREX_IF_ON_DEVICE((return dp[li];))
     155             :         AMREX_IF_ON_HOST((return hp[li];))
     156             :     }
     157             : 
     158             :     AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
     159             :     explicit operator bool() const noexcept {
     160             :         AMREX_IF_ON_DEVICE((return dp != nullptr;))
     161             :         AMREX_IF_ON_HOST((return hp != nullptr;))
     162             :     }
     163             : 
     164             : #ifdef AMREX_USE_GPU
     165             :     Array4<T> const* AMREX_RESTRICT dp = nullptr;
     166             : #endif
     167             :     Array4<T> const* AMREX_RESTRICT hp = nullptr;
     168             : };
     169             : 
     170             : template <class FAB> class FabArray;
     171             : 
     172             : template <class DFAB, class SFAB,
     173             :           std::enable_if_t<std::conjunction_v<
     174             :               IsBaseFab<DFAB>, IsBaseFab<SFAB>,
     175             :               std::is_convertible<typename SFAB::value_type,
     176             :                                   typename DFAB::value_type>>, int> BAR = 0>
     177             : void
     178             : Copy (FabArray<DFAB>& dst, FabArray<SFAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
     179             : {
     180             :     Copy(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
     181             : }
     182             : 
     183             : template <class DFAB, class SFAB,
     184             :           std::enable_if_t<std::conjunction_v<
     185             :               IsBaseFab<DFAB>, IsBaseFab<SFAB>,
     186             :               std::is_convertible<typename SFAB::value_type,
     187             :                                   typename DFAB::value_type>>, int> BAR = 0>
     188             : void
     189      333923 : Copy (FabArray<DFAB>& dst, FabArray<SFAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
     190             : {
     191             :     BL_PROFILE("amrex::Copy()");
     192             : 
     193             :     using DT = typename DFAB::value_type;
     194             : 
     195      333923 :     if (dst.local_size() == 0) { return; }
     196             : 
     197             :     // avoid self copy
     198             :     if constexpr (std::is_same_v<typename SFAB::value_type, typename DFAB::value_type>) {
     199      333923 :         if (dst.atLocalIdx(0).dataPtr(dstcomp) == src.atLocalIdx(0).dataPtr(srccomp)) {
     200           0 :             return;
     201             :         }
     202             :     }
     203             : 
     204             : #ifdef AMREX_USE_GPU
     205             :     if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
     206             :         auto const& srcarr = src.const_arrays();
     207             :         auto const& dstarr = dst.arrays();
     208             :         ParallelFor(dst, nghost, numcomp,
     209             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
     210             :         {
     211             :             dstarr[box_no](i,j,k,dstcomp+n) = DT(srcarr[box_no](i,j,k,srccomp+n));
     212             :         });
     213             :         Gpu::streamSynchronize();
     214             :     } else
     215             : #endif
     216             :     {
     217             : #ifdef AMREX_USE_OMP
     218             : #pragma omp parallel if (Gpu::notInLaunchRegion())
     219             : #endif
     220      691949 :         for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
     221             :         {
     222      358026 :             const Box& bx = mfi.growntilebox(nghost);
     223      358026 :             if (bx.ok())
     224             :             {
     225      358026 :                 auto const& srcFab = src.const_array(mfi);
     226      358026 :                 auto const& dstFab = dst.array(mfi);
     227   123814000 :                 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
     228             :                 {
     229             :                     dstFab(i,j,k,dstcomp+n) = DT(srcFab(i,j,k,srccomp+n));
     230             :                 });
     231             :             }
     232             :         }
     233             :     }
     234             : }
     235             : 
     236             : template <class FAB,
     237             :           class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
     238             : void
     239             : Add (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
     240             : {
     241             :     Add(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
     242             : }
     243             : 
     244             : template <class FAB,
     245             :           class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
     246             : void
     247        7894 : Add (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
     248             : {
     249             :     BL_PROFILE("amrex::Add()");
     250             : 
     251             : #ifdef AMREX_USE_GPU
     252             :     if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
     253             :         auto const& dstfa = dst.arrays();
     254             :         auto const& srcfa = src.const_arrays();
     255             :         ParallelFor(dst, nghost, numcomp,
     256             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
     257             :         {
     258             :             dstfa[box_no](i,j,k,n+dstcomp) += srcfa[box_no](i,j,k,n+srccomp);
     259             :         });
     260             :         if (!Gpu::inNoSyncRegion()) {
     261             :             Gpu::streamSynchronize();
     262             :         }
     263             :     } else
     264             : #endif
     265             :     {
     266             : #ifdef AMREX_USE_OMP
     267             : #pragma omp parallel if (Gpu::notInLaunchRegion())
     268             : #endif
     269       20608 :         for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
     270             :         {
     271       12714 :             const Box& bx = mfi.growntilebox(nghost);
     272       12714 :             if (bx.ok())
     273             :             {
     274       12714 :                 auto const srcFab = src.array(mfi);
     275       12714 :                 auto       dstFab = dst.array(mfi);
     276    18568900 :                 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
     277             :                 {
     278             :                     dstFab(i,j,k,n+dstcomp) += srcFab(i,j,k,n+srccomp);
     279             :                 });
     280             :             }
     281             :         }
     282             :     }
     283        7894 : }
     284             : 
     285             : /**
     286             :  * \brief An Array of FortranArrayBox(FAB)-like Objects
     287             :  *
     288             :  * The FabArray<FAB> class implements a collection (stored as an array) of
     289             :  * Fortran array box-like ( \p FAB ) objects.  The parameterized type \p FAB is intended to be
     290             :  * any class derived from BaseFab<T>.  For example, \p FAB may be a BaseFab of
     291             :  * integers, so we could write:
     292             :  *
     293             :  *   FabArray<BaseFab<int> > int_fabs;
     294             :  *
     295             :  * Then int_fabs is a FabArray that can hold a collection of BaseFab<int>
     296             :  * objects.
     297             :  *
     298             :  * FabArray is not just a general container class for Fortran arrays.  It is
     299             :  * intended to hold "grid" data for use in finite difference calculations in
     300             :  * which the data is defined on a union of (usually disjoint) rectangular
     301             :  * regions embedded in a uniform index space.  This region, called the valid
     302             :  * region, is represented by a BoxArray.  For the purposes of this discussion,
     303             :  * the Kth Box in the BoxArray represents the interior region of the Kth grid.
     304             :  *
     305             :  * Since the intent is to be used with finite difference calculations a
     306             :  * FabArray also includes the notion of a boundary region for each grid.  The
     307             :  * boundary region is specified by the ngrow parameter which tells the FabArray
     308             :  * to allocate each \p FAB to be ngrow cells larger in all directions than the
     309             :  * underlying Box.  The larger region covered by the union of all the \p FABs is
     310             :  * called the region of definition.  The underlying notion is that the valid
     311             :  * region contains the grid interior data and the region of definition includes
     312             :  * the interior region plus the boundary areas.
     313             :  *
     314             :  * Operations are available to copy data from the valid regions into these
     315             :  * boundary areas where the two overlap.  The number of components, that is,
     316             :  * the number of values that can be stored in each cell of a \p FAB, is either
     317             :  * given as an argument to the constructor or is inherent in the definition of
     318             :  * the underlying \p FAB.  Each \p FAB in the FabArray will have the same number of
     319             :  * components.
     320             :  *
     321             :  * In summary, a FabArray is an array of \p FABs.  The Kth element contains a \p FAB
     322             :  * that holds the data for the Kth grid, a Box that defines the valid region
     323             :  * of the Kth grid.
     324             :  *
     325             :  * A typical use for a FabArray would be to hold the solution vector or
     326             :  * right-hand-side when solving a linear system of equations on a union of
     327             :  * rectangular grids.  The copy operations would be used to copy data from the
     328             :  * valid regions of neighboring grids into the boundary regions after each
     329             :  * relaxation step of the iterative method.  If a multigrid method is used, a
     330             :  * FabArray could be used to hold the data at each level in the multigrid
     331             :  * hierarchy.
     332             :  *
     333             :  * This class is a concrete class not a polymorphic one.
     334             :  *
     335             :  * This class does NOT provide a copy constructor or assignment operator.
     336             :  *
     337             :  * \tparam FAB FortranArrayBox-like object. Typically a derived class of BaseFab. Not to be confused with FabArrayBase.
     338             :  */
     339             : template <class FAB>
     340             : class FabArray
     341             :     :
     342             :     public FabArrayBase
     343             : {
     344             : public:
     345             : 
     346             :     struct FABType {
     347             :         using value_type = FAB;
     348             :     };
     349             : 
     350             :     /*
     351             :     * if FAB is a BaseFab or its child, value_type = FAB::value_type
     352             :     * else                              value_type = FAB;
     353             :     */
     354             :     using value_type = typename std::conditional_t<IsBaseFab<FAB>::value, FAB, FABType>::value_type;
     355             : 
     356             :     using fab_type = FAB;
     357             : 
     358             :     //
     359             :     //! Constructs an empty FabArray<FAB>.
     360             :     FabArray () noexcept;
     361             : 
     362             :     /**
     363             :      * \brief Construct an empty FabArray<FAB> that has a default Arena.
     364             :      *
     365             :      * If `define` is called later with a nullptr as MFInfo's arena, the
     366             :      * default Arena `a` will be used.  If the arena in MFInfo is not a
     367             :      * nullptr, the MFInfo's arena will be used.
     368             :      */
     369             :     explicit FabArray (Arena* a) noexcept;
     370             : 
     371             :     /**
     372             :     * \brief Construct a FabArray<FAB> with a valid region defined by bxs
     373             :     * and a region of definition defined by the grow factor ngrow
     374             :     * and the number of components nvar.
     375             :     */
     376             :     FabArray (const BoxArray&            bxs,
     377             :               const DistributionMapping& dm,
     378             :               int                        nvar,
     379             :               int                        ngrow,
     380             : #ifdef AMREX_STRICT_MODE
     381             :               const MFInfo&              info,
     382             :               const FabFactory<FAB>&     factory);
     383             : #else
     384             :               const MFInfo&              info = MFInfo(),
     385             :               const FabFactory<FAB>&     factory = DefaultFabFactory<FAB>());
     386             : #endif
     387             : 
     388             :     FabArray (const BoxArray&            bxs,
     389             :               const DistributionMapping& dm,
     390             :               int                        nvar,
     391             :               const IntVect&             ngrow,
     392             : #ifdef AMREX_STRICT_MODE
     393             :               const MFInfo&              info,
     394             :               const FabFactory<FAB>&     factory);
     395             : #else
     396             :               const MFInfo&              info = MFInfo(),
     397             :               const FabFactory<FAB>&     factory = DefaultFabFactory<FAB>());
     398             : #endif
     399             : 
     400             :     FabArray (const FabArray<FAB>& rhs, MakeType maketype, int scomp, int ncomp);
     401             : 
     402             :     //! The destructor -- deletes all FABs in the array.
     403             :     ~FabArray ();
     404             : 
     405             :     FabArray (FabArray<FAB>&& rhs) noexcept;
     406             :     FabArray<FAB>& operator= (FabArray<FAB>&& rhs) noexcept;
     407             : 
     408             :     FabArray (const FabArray<FAB>& rhs) = delete;
     409             :     FabArray<FAB>& operator= (const FabArray<FAB>& rhs) = delete;
     410             : 
     411             :     /**
     412             :     * \brief Define this FabArray identically to that performed by
     413             :     * the constructor having an analogous function signature.
     414             :     * This is only valid if this FabArray was defined using
     415             :     * the default constructor.
     416             :     */
     417             :     void define (const BoxArray& bxs,
     418             :                  const DistributionMapping& dm,
     419             :                  int                        nvar,
     420             :                  int                        ngrow,
     421             : #ifdef AMREX_STRICT_MODE
     422             :                  const MFInfo&              info,
     423             :                  const FabFactory<FAB>&     factory);
     424             : #else
     425             :                  const MFInfo&              info = MFInfo(),
     426             :                  const FabFactory<FAB>&     factory = DefaultFabFactory<FAB>());
     427             : #endif
     428             : 
     429             :     void define (const BoxArray& bxs,
     430             :                  const DistributionMapping& dm,
     431             :                  int                        nvar,
     432             :                  const IntVect&             ngrow,
     433             : #ifdef AMREX_STRICT_MODE
     434             :                  const MFInfo&              info,
     435             :                  const FabFactory<FAB>&     factory);
     436             : #else
     437             :                  const MFInfo&              info = MFInfo(),
     438             :                  const FabFactory<FAB>&     factory = DefaultFabFactory<FAB>());
     439             : #endif
     440             : 
     441           0 :     const FabFactory<FAB>& Factory () const noexcept { return *m_factory; }
     442             : 
     443             :     // Provides access to the Arena this FabArray was build with.
     444             :     Arena* arena () const noexcept { return m_dallocator.arena(); }
     445             : 
     446             :     const Vector<std::string>& tags () const noexcept { return m_tags; }
     447             : 
     448             :     bool hasEBFabFactory () const noexcept {
     449             : #ifdef AMREX_USE_EB
     450             :         const auto *const f = dynamic_cast<EBFArrayBoxFactory const*>(m_factory.get());
     451             :         return (f != nullptr);
     452             : #else
     453             :         return false;
     454             : #endif
     455             :     }
     456             : 
     457             :     //! Return the data pointer to the single chunk memory if this object
     458             :     //! uses a single contiguous chunk of memory, nullptr otherwise.
     459             :     [[nodiscard]] value_type* singleChunkPtr () noexcept {
     460             :         return m_single_chunk_arena ? (value_type*)m_single_chunk_arena->data() : nullptr;
     461             :     }
     462             : 
     463             :     //! Return the data pointer to the single chunk memory if this object
     464             :     //! uses a single contiguous chunk of memory, nullptr otherwise.
     465             :     [[nodiscard]] value_type const* singleChunkPtr () const noexcept {
     466             :         return m_single_chunk_arena ? (value_type const*)m_single_chunk_arena->data() : nullptr;
     467             :     }
     468             : 
     469             :     //! Return the size of the single chunk memory if this object uses a
     470             :     //! single contiguous chunk of memory, 0 otherwise.
     471             :     [[nodiscard]] std::size_t singleChunkSize () const noexcept { return m_single_chunk_size; }
     472             : 
     473             :     bool isAllRegular () const noexcept {
     474             : #ifdef AMREX_USE_EB
     475             :         const auto *const f = dynamic_cast<EBFArrayBoxFactory const*>(m_factory.get());
     476             :         if (f) {
     477             :             return f->isAllRegular();
     478             :         } else {
     479             :             return true;
     480             :         }
     481             : #else
     482             :         return true;
     483             : #endif
     484             :     }
     485             : 
     486             :     /**
     487             :     * \brief Return true if the FabArray is well-defined.  That is,
     488             :     * the FabArray has a BoxArray and DistributionMapping, the
     489             :     * FABs are allocated for each Box in the BoxArray and the
     490             :     * sizes of the FABs and the number of components are consistent
     491             :     * with the definition of the FabArray.
     492             :     */
     493             :     bool ok () const;
     494             : 
     495             :     /** Has define() been called on this rank?
     496             :      *
     497             :      * \return true if `define` has been called on this `FabArray`.  Note that all constructors except `FabArray ()`
     498             :      * and `FabArray(Arena*a)` call `define`, even if the `MFInfo` argument has `alloc=false`.  One could
     499             :      * also use `FabArrayBase::empty()` to find whether `define` is called or not, although they are not exactly
     500             :      * the same.
     501             :      */
     502             :     bool isDefined () const;
     503             : 
     504             :     //! Return a constant reference to the FAB associated with mfi.
     505     1218901 :     const FAB& operator[] (const MFIter& mfi) const noexcept { return *(this->fabPtr(mfi)); }
     506             : 
     507             :     //! Return a constant reference to the FAB associated with mfi.
     508             :     const FAB& get (const MFIter& mfi) const noexcept { return *(this->fabPtr(mfi)); }
     509             : 
     510             :     //! Returns a reference to the FAB associated mfi.
     511     5583243 :     FAB& operator[] (const MFIter& mfi) noexcept { return *(this->fabPtr(mfi)); }
     512             : 
     513             :     //! Returns a reference to the FAB associated mfi.
     514      345562 :     FAB& get (const MFIter& mfi) noexcept { return *(this->fabPtr(mfi)); }
     515             : 
     516             :     //! Return a constant reference to the FAB associated with the Kth element.
     517     4920839 :     const FAB& operator[] (int K) const noexcept { return *(this->fabPtr(K)); }
     518             : 
     519             :     //! Return a constant reference to the FAB associated with the Kth element.
     520             :     const FAB& get (int K) const noexcept { return *(this->fabPtr(K)); }
     521             : 
     522             :     //! Return a reference to the FAB associated with the Kth element.
     523         795 :     FAB& operator[] (int K) noexcept { return *(this->fabPtr(K)); }
     524             : 
     525             :     //! Return a reference to the FAB associated with the Kth element.
     526    14750677 :     FAB& get (int K) noexcept { return *(this->fabPtr(K)); }
     527             : 
     528             :     //! Return a reference to the FAB associated with local index L
     529      333923 :     FAB& atLocalIdx (int L) noexcept { return *m_fabs_v[L]; }
     530      333923 :     const FAB& atLocalIdx (int L) const noexcept { return *m_fabs_v[L]; }
     531             : 
     532             :     //! Return pointer to FAB
     533             :     FAB      * fabPtr (const MFIter& mfi) noexcept;
     534             :     FAB const* fabPtr (const MFIter& mfi) const noexcept;
     535             :     FAB      * fabPtr (int K) noexcept;  // Here K is global index
     536             :     FAB const* fabPtr (int K) const noexcept;
     537             : 
     538             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     539             :     void prefetchToHost (const MFIter& mfi) const noexcept;
     540             : 
     541             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     542             :     void prefetchToDevice (const MFIter& mfi) const noexcept;
     543             : 
     544             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     545             :     Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi) const noexcept;
     546             :     //
     547             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     548             :     Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi) noexcept;
     549             :     //
     550             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     551             :     Array4<typename FabArray<FAB>::value_type const> array (int K) const noexcept;
     552             :     //
     553             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     554             :     Array4<typename FabArray<FAB>::value_type> array (int K) noexcept;
     555             : 
     556             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     557             :     Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi) const noexcept;
     558             :     //
     559             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     560             :     Array4<typename FabArray<FAB>::value_type const> const_array (int K) const noexcept;
     561             : 
     562             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     563             :     Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi, int start_comp) const noexcept;
     564             :     //
     565             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     566             :     Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi, int start_comp) noexcept;
     567             :     //
     568             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     569             :     Array4<typename FabArray<FAB>::value_type const> array (int K, int start_comp) const noexcept;
     570             :     //
     571             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     572             :     Array4<typename FabArray<FAB>::value_type> array (int K, int start_comp) noexcept;
     573             : 
     574             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     575             :     Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi, int start_comp) const noexcept;
     576             :     //
     577             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     578             :     Array4<typename FabArray<FAB>::value_type const> const_array (int K, int start_comp) const noexcept;
     579             : 
     580             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     581             :     MultiArray4<typename FabArray<FAB>::value_type> arrays () noexcept;
     582             : 
     583             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     584             :     MultiArray4<typename FabArray<FAB>::value_type const> arrays () const noexcept;
     585             : 
     586             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     587             :     MultiArray4<typename FabArray<FAB>::value_type const> const_arrays () const noexcept;
     588             : 
     589             :     //! Explicitly set the Kth FAB in the FabArray to point to elem.
     590             :     void setFab (int boxno, std::unique_ptr<FAB> elem);
     591             : 
     592             :     //! Explicitly set the Kth FAB in the FabArray to point to elem.
     593             :     template <class F=FAB, std::enable_if_t<std::is_move_constructible_v<F>,int> = 0>
     594             :     void setFab (int boxno, FAB&& elem);
     595             : 
     596             :     //! Explicitly set the FAB associated with mfi in the FabArray to point to elem.
     597             :     void setFab (const MFIter&mfi, std::unique_ptr<FAB> elem);
     598             : 
     599             :     //! Explicitly set the FAB associated with mfi in the FabArray to point to elem.
     600             :     template <class F=FAB, std::enable_if_t<std::is_move_constructible_v<F>,int> = 0>
     601             :     void setFab (const MFIter&mfi, FAB&& elem);
     602             : 
     603             :     //! Release ownership of the FAB. This function is not thread safe.
     604             :     AMREX_NODISCARD
     605             :     FAB* release (int K);
     606             : 
     607             :     //! Release ownership of the FAB. This function is not thread safe.
     608             :     AMREX_NODISCARD
     609             :     FAB* release (const MFIter& mfi);
     610             : 
     611             :     //! Releases FAB memory in the FabArray.
     612             :     void clear ();
     613             : 
     614             :     /**
     615             :      * \brief Perform local copy of FabArray data.
     616             :      *
     617             :      * The two FabArrays must have the same BoxArray and
     618             :      * DistributionMapping, although they could have different data types.
     619             :      * For example, this could be used to copy from FabArray<BaseFab<float>>
     620             :      * to FabArray<BaseFab<double>>.
     621             :      *
     622             :      * \param src    source FabArray
     623             :      * \param scomp  starting component of source
     624             :      * \param dcomp  starting component of this FabArray
     625             :      * \param ncomp  number of components
     626             :      * \param nghost number of ghost cells
     627             :      */
     628             :     template <typename SFAB, typename DFAB = FAB,
     629             :               std::enable_if_t<std::conjunction_v<
     630             :                   IsBaseFab<DFAB>, IsBaseFab<SFAB>,
     631             :                   std::is_convertible<typename SFAB::value_type,
     632             :                                       typename DFAB::value_type>>, int> = 0>
     633             :     void LocalCopy (FabArray<SFAB> const& src, int scomp, int dcomp, int ncomp,
     634             :                     IntVect const& nghost);
     635             : 
     636             :     /**
     637             :      * \brief Perform local addition of FabArray data.
     638             :      *
     639             :      * The two FabArrays must have the same BoxArray and
     640             :      * DistributionMapping.
     641             :      *
     642             :      * \param src    source FabArray
     643             :      * \param scomp  starting component of source
     644             :      * \param dcomp  starting component of this FabArray
     645             :      * \param ncomp  number of components
     646             :      * \param nghost number of ghost cells
     647             :      */
     648             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     649             :     void LocalAdd (FabArray<FAB> const& src, int scomp, int dcomp, int ncomp,
     650             :                    IntVect const& nghost);
     651             : 
     652             :     //! Set all components in the entire region of each FAB to val.
     653             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     654             :     void setVal (value_type val);
     655             : 
     656             :     //! Set all components in the entire region of each FAB to val.
     657             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     658             :     FabArray<FAB>& operator= (value_type val);
     659             : 
     660             :     /**
     661             :     * \brief Set the value of num_comp components in the valid region of
     662             :     * each FAB in the FabArray, starting at component comp to val.
     663             :     * Also set the value of nghost boundary cells.
     664             :     */
     665             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     666             :     void setVal (value_type val,
     667             :                  int        comp,
     668             :                  int        ncomp,
     669             :                  int        nghost = 0);
     670             : 
     671             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     672             :     void setVal (value_type val,
     673             :                  int        comp,
     674             :                  int        ncomp,
     675             :                  const IntVect& nghost);
     676             : 
     677             :     /**
     678             :     * \brief Set the value of num_comp components in the valid region of
     679             :     * each FAB in the FabArray, starting at component comp, as well
     680             :     * as nghost boundary cells, to val, provided they also intersect
     681             :     * with the Box region.
     682             :     */
     683             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     684             :     void setVal (value_type val,
     685             :                  const Box& region,
     686             :                  int        comp,
     687             :                  int        ncomp,
     688             :                  int        nghost = 0);
     689             : 
     690             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     691             :     void setVal (value_type val,
     692             :                  const Box& region,
     693             :                  int        comp,
     694             :                  int        ncomp,
     695             :                  const IntVect& nghost);
     696             :     /**
     697             :     * \brief Set all components in the valid region of each FAB in the
     698             :     * FabArray to val, including nghost boundary cells.
     699             :     */
     700             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     701             :     void setVal (value_type val, int nghost);
     702             : 
     703             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     704             :     void setVal (value_type val, const IntVect& nghost);
     705             : 
     706             :     /**
     707             :     * \brief Set all components in the valid region of each FAB in the
     708             :     * FabArray to val, including nghost boundary cells, that also
     709             :     * intersect the Box region.
     710             :     */
     711             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     712             :     void setVal (value_type val, const Box& region, int nghost);
     713             : 
     714             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     715             :     void setVal (value_type val, const Box& region, const IntVect& nghost);
     716             : 
     717             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     718             :     void abs (int comp, int ncomp, int nghost = 0);
     719             : 
     720             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     721             :     void abs (int comp, int ncomp, const IntVect& nghost);
     722             : 
     723             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     724             :     void plus (value_type val, int comp, int num_comp, int nghost = 0);
     725             : 
     726             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     727             :     void plus (value_type val, const Box& region, int comp, int num_comp, int nghost = 0);
     728             : 
     729             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     730             :     void mult (value_type val, int comp, int num_comp, int nghost = 0);
     731             : 
     732             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     733             :     void mult (value_type val, const Box& region, int comp, int num_comp, int nghost = 0);
     734             : 
     735             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     736             :     void invert (value_type numerator, int comp, int num_comp, int nghost = 0);
     737             : 
     738             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     739             :     void invert (value_type numerator, const Box& region, int comp, int num_comp, int nghost = 0);
     740             : 
     741             :     //! Set all values in the boundary region to val.
     742             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     743             :     void setBndry (value_type val);
     744             : 
     745             :     //! Set ncomp values in the boundary region, starting at start_comp to val.
     746             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     747             :     void setBndry (value_type val, int strt_comp, int ncomp);
     748             : 
     749             :    //! Set all values outside the Geometry domain to val.
     750             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     751             :     void setDomainBndry (value_type val, const Geometry& geom);
     752             : 
     753             :     //! Set ncomp values outside the Geometry domain to val, starting at start_comp.
     754             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     755             :     void setDomainBndry (value_type val, int strt_comp, int ncomp, const Geometry& geom);
     756             : 
     757             :     /**
     758             :     * \brief Returns the sum of component "comp"
     759             :     *
     760             :     * \param comp   component
     761             :     * \param nghost number of ghost cells
     762             :     * \param local  If true, MPI communication is skipped.
     763             :     */
     764             :     template <typename F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
     765             :     typename F::value_type
     766             :     sum (int comp, IntVect const& nghost, bool local = false) const;
     767             : 
     768             :     /**
     769             :     * \brief This function copies data from src to this FabArray.  Each FAB
     770             :     * in fa is intersected with all FABs in this FabArray and a copy
     771             :     * is performed on the region of intersection.  The intersection
     772             :     * is restricted to the valid regions.
     773             :     */
     774             :     void ParallelAdd (const FabArray<FAB>& src,
     775             :                       const Periodicity&   period = Periodicity::NonPeriodic())
     776             :        { ParallelCopy(src,period,FabArray::ADD); }
     777           0 :     void ParallelCopy (const FabArray<FAB>& src,
     778             :                        const Periodicity&   period = Periodicity::NonPeriodic(),
     779             :                        CpOp                 op = FabArrayBase::COPY)
     780           0 :        { ParallelCopy(src,0,0,nComp(),0,0,period,op); }
     781             : 
     782             :     [[deprecated("Use FabArray::ParallelCopy() instead.")]]
     783             :     void copy (const FabArray<FAB>& src,
     784             :                const Periodicity&   period = Periodicity::NonPeriodic(),
     785             :                CpOp                 op = FabArrayBase::COPY)
     786             :         { ParallelCopy(src,period,op); }
     787             : 
     788             :     void ParallelAdd_nowait (const FabArray<FAB>& src,
     789             :                              const Periodicity& period = Periodicity::NonPeriodic())
     790             :         { ParallelCopy_nowait(src,period,FabArray::ADD); }
     791             :     void ParallelCopy_nowait (const FabArray<FAB>& src,
     792             :                               const Periodicity&   period = Periodicity::NonPeriodic(),
     793             :                               CpOp                 op = FabArrayBase::COPY)
     794             :        { ParallelCopy_nowait(src,0,0,nComp(),0,0,period,op); }
     795             : 
     796             :     /**
     797             :     * \brief This function copies data from src to this FabArray.  Each FAB
     798             :     * in src is intersected with all FABs in this FabArray and a copy
     799             :     * is performed on the region of intersection.  The intersection
     800             :     * is restricted to the num_comp components starting at src_comp
     801             :     * in the FabArray src, with the destination components in this
     802             :     * FabArray starting at dest_comp.
     803             :     */
     804             :     void ParallelAdd (const FabArray<FAB>& src,
     805             :                       int                  src_comp,
     806             :                       int                  dest_comp,
     807             :                       int                  num_comp,
     808             :                       const Periodicity&   period = Periodicity::NonPeriodic())
     809             :        { ParallelCopy(src,src_comp,dest_comp,num_comp, period, FabArrayBase::ADD); }
     810       27148 :     void ParallelCopy (const FabArray<FAB>& src,
     811             :                        int                  src_comp,
     812             :                        int                  dest_comp,
     813             :                        int                  num_comp,
     814             :                        const Periodicity&   period = Periodicity::NonPeriodic(),
     815             :                        CpOp                 op = FabArrayBase::COPY)
     816       27148 :        { ParallelCopy(src,src_comp,dest_comp,num_comp,0,0,period,op); }
     817             : 
     818             :     [[deprecated("Use FabArray::ParallelCopy() instead.")]]
     819             :     void copy (const FabArray<FAB>& src,
     820             :                int                  src_comp,
     821             :                int                  dest_comp,
     822             :                int                  num_comp,
     823             :                const Periodicity&   period = Periodicity::NonPeriodic(),
     824             :                CpOp                 op = FabArrayBase::COPY)
     825             :         { ParallelCopy(src,src_comp,dest_comp,num_comp, period, op); }
     826             : 
     827             :     void ParallelAdd_nowait (const FabArray<FAB>& src,
     828             :                              int                  src_comp,
     829             :                              int                  dest_comp,
     830             :                              int                  num_comp,
     831             :                              const Periodicity&   period = Periodicity::NonPeriodic())
     832             :        { ParallelCopy_nowait(src,src_comp,dest_comp,num_comp, period, FabArrayBase::ADD); }
     833             :     void ParallelCopy_nowait (const FabArray<FAB>& src,
     834             :                               int                  src_comp,
     835             :                               int                  dest_comp,
     836             :                               int                  num_comp,
     837             :                               const Periodicity&   period = Periodicity::NonPeriodic(),
     838             :                               CpOp                 op = FabArrayBase::COPY)
     839             :        { ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,0,0,period,op); }
     840             : 
     841             :     //! Similar to the above function, except that source and destination are grown by src_nghost and dst_nghost, respectively
     842             :     void ParallelAdd (const FabArray<FAB>& src,
     843             :                       int                  src_comp,
     844             :                       int                  dest_comp,
     845             :                       int                  num_comp,
     846             :                       int                  src_nghost,
     847             :                       int                  dst_nghost,
     848             :                       const Periodicity&   period = Periodicity::NonPeriodic())
     849             :        { ParallelCopy(src,src_comp,dest_comp,num_comp,IntVect(src_nghost),IntVect(dst_nghost),period,
     850             :                       FabArrayBase::ADD); }
     851             :     void ParallelAdd (const FabArray<FAB>& src,
     852             :                       int                  src_comp,
     853             :                       int                  dest_comp,
     854             :                       int                  num_comp,
     855             :                       const IntVect&       src_nghost,
     856             :                       const IntVect&       dst_nghost,
     857             :                       const Periodicity&   period = Periodicity::NonPeriodic())
     858             :        { ParallelCopy(src,src_comp,dest_comp,num_comp,src_nghost,dst_nghost,period,FabArrayBase::ADD); }
     859       32653 :     void ParallelCopy (const FabArray<FAB>& src,
     860             :                        int                  src_comp,
     861             :                        int                  dest_comp,
     862             :                        int                  num_comp,
     863             :                        int                  src_nghost,
     864             :                        int                  dst_nghost,
     865             :                        const Periodicity&   period = Periodicity::NonPeriodic(),
     866             :                        CpOp                 op = FabArrayBase::COPY)
     867       32653 :        { ParallelCopy(src,src_comp,dest_comp,num_comp,IntVect(src_nghost),IntVect(dst_nghost),period,op); }
     868             :     void ParallelCopy (const FabArray<FAB>& src,
     869             :                        int                  scomp,
     870             :                        int                  dcomp,
     871             :                        int                  ncomp,
     872             :                        const IntVect&       snghost,
     873             :                        const IntVect&       dnghost,
     874             :                        const Periodicity&   period = Periodicity::NonPeriodic(),
     875             :                        CpOp                 op = FabArrayBase::COPY,
     876             :                        const FabArrayBase::CPC* a_cpc = nullptr);
     877             : 
     878             :     void ParallelAdd_nowait (const FabArray<FAB>& src,
     879             :                              int                  src_comp,
     880             :                              int                  dest_comp,
     881             :                              int                  num_comp,
     882             :                              int                  src_nghost,
     883             :                              int                  dst_nghost,
     884             :                              const Periodicity&   period = Periodicity::NonPeriodic())
     885             :        { ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,IntVect(src_nghost),
     886             :                                IntVect(dst_nghost),period,FabArrayBase::ADD); }
     887             : 
     888             :     void ParallelAdd_nowait (const FabArray<FAB>& src,
     889             :                              int                  src_comp,
     890             :                              int                  dest_comp,
     891             :                              int                  num_comp,
     892             :                              const IntVect&       src_nghost,
     893             :                              const IntVect&       dst_nghost,
     894             :                              const Periodicity&   period = Periodicity::NonPeriodic())
     895             :        { ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,src_nghost,
     896             :                              dst_nghost,period,FabArrayBase::ADD); }
     897             : 
     898             :     void ParallelCopy_nowait (const FabArray<FAB>& src,
     899             :                               int                  src_comp,
     900             :                               int                  dest_comp,
     901             :                               int                  num_comp,
     902             :                               int                  src_nghost,
     903             :                               int                  dst_nghost,
     904             :                               const Periodicity&   period = Periodicity::NonPeriodic(),
     905             :                               CpOp                 op = FabArrayBase::COPY)
     906             :        { ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,IntVect(src_nghost),
     907             :                              IntVect(dst_nghost),period,op); }
     908             : 
     909             :     void ParallelCopy_nowait (const FabArray<FAB>& src,
     910             :                               int                  scomp,
     911             :                               int                  dcomp,
     912             :                               int                  ncomp,
     913             :                               const IntVect&       snghost,
     914             :                               const IntVect&       dnghost,
     915             :                               const Periodicity&   period = Periodicity::NonPeriodic(),
     916             :                               CpOp                 op = FabArrayBase::COPY,
     917             :                               const FabArrayBase::CPC* a_cpc = nullptr,
     918             :                               bool                 to_ghost_cells_only = false);
     919             : 
     920             :     void ParallelCopy_finish ();
     921             : 
     922             :     void ParallelCopyToGhost (const FabArray<FAB>& src,
     923             :                               int                  scomp,
     924             :                               int                  dcomp,
     925             :                               int                  ncomp,
     926             :                               const IntVect&       snghost,
     927             :                               const IntVect&       dnghost,
     928             :                               const Periodicity&   period = Periodicity::NonPeriodic());
     929             : 
     930             :     void ParallelCopyToGhost_nowait (const FabArray<FAB>& src,
     931             :                                      int                  scomp,
     932             :                                      int                  dcomp,
     933             :                                      int                  ncomp,
     934             :                                      const IntVect&       snghost,
     935             :                                      const IntVect&       dnghost,
     936             :                                      const Periodicity&   period = Periodicity::NonPeriodic());
     937             : 
     938             :     void ParallelCopyToGhost_finish();
     939             : 
     940             :     [[deprecated("Use FabArray::ParallelCopy() instead.")]]
     941             :     void copy (const FabArray<FAB>& src,
     942             :                int                  src_comp,
     943             :                int                  dest_comp,
     944             :                int                  num_comp,
     945             :                int                  src_nghost,
     946             :                int                  dst_nghost,
     947             :                const Periodicity&   period = Periodicity::NonPeriodic(),
     948             :                CpOp                 op = FabArrayBase::COPY)
     949             :        { ParallelCopy(src,src_comp,dest_comp,num_comp,IntVect(src_nghost),IntVect(dst_nghost),period,op); }
     950             : 
     951             :     [[deprecated("Use FabArray::ParallelCopy() instead.")]]
     952             :     void copy (const FabArray<FAB>& src,
     953             :                int                  src_comp,
     954             :                int                  dest_comp,
     955             :                int                  num_comp,
     956             :                const IntVect&       src_nghost,
     957             :                const IntVect&       dst_nghost,
     958             :                const Periodicity&   period = Periodicity::NonPeriodic(),
     959             :                CpOp                 op = FabArrayBase::COPY)
     960             :         { ParallelCopy(src,src_comp,dest_comp,num_comp,src_nghost,dst_nghost,period,op); }
     961             : 
     962             :     //! Copy from src to this.  this and src have the same BoxArray, but different DistributionMapping
     963             :     void Redistribute (const FabArray<FAB>& src,
     964             :                        int                  scomp,
     965             :                        int                  dcomp,
     966             :                        int                  ncomp,
     967             :                        const IntVect&       nghost);
     968             : 
     969             :     /**
     970             :     * \brief Copy the values contained in the intersection of the
     971             :     * valid + nghost region of this FabArray with the FAB dest into dest.
     972             :     * Note that FAB dest is assumed to be identical on each process.
     973             :     */
     974             :     void copyTo (FAB& dest, int  nghost = 0) const;
     975             : 
     976             :     /**
     977             :     * \brief Copy the values contained in the intersection of the
     978             :     * num_comp component valid + nghost region of this FabArray, starting at
     979             :     * component src_comp, with the FAB dest into dest, starting at
     980             :     * component dest_comp in dest.
     981             :     * Note that FAB dest is assumed to be identical on each process.
     982             :     */
     983             :     void copyTo (FAB& dest, int  scomp, int  dcomp, int  ncomp, int  nghost = 0) const;
     984             : 
     985             :     //! Shift the boxarray by vector v
     986             :     void shift (const IntVect& v);
     987             : 
     988             :     bool defined (int K) const noexcept;
     989             :     bool defined (const MFIter& mfi) const noexcept;
     990             : 
     991             :     /**
     992             :     * \brief Copy on intersection within a FabArray.  Data is copied from
     993             :     * valid regions to intersecting regions of definition.  The
     994             :     * purpose is to fill in the boundary regions of each FAB in
     995             :     * the FabArray.  If cross=true, corner cells are not filled.
     996             :     * If the length of periodic is provided, periodic boundaries are
     997             :     * also filled.  Note that FabArray itself does not contains
     998             :     * any periodicity information.
     999             :     * FillBoundary expects that its cell-centered version of its BoxArray
    1000             :     * is non-overlapping.
    1001             :     */
    1002             :     template <typename BUF=value_type>
    1003             :     void FillBoundary (bool cross = false);
    1004             : 
    1005             :     template <typename BUF=value_type>
    1006             :     void FillBoundary (const Periodicity& period, bool cross = false);
    1007             : 
    1008             :     template <typename BUF=value_type>
    1009             :     void FillBoundary (const IntVect& nghost, const Periodicity& period, bool cross = false);
    1010             : 
    1011             :     //! Same as FillBoundary(), but only copies ncomp components starting at scomp.
    1012             :     template <typename BUF=value_type>
    1013             :     void FillBoundary (int scomp, int ncomp, bool cross = false);
    1014             : 
    1015             :     template <typename BUF=value_type>
    1016             :     void FillBoundary (int scomp, int ncomp, const Periodicity& period, bool cross = false);
    1017             : 
    1018             :     template <typename BUF=value_type>
    1019             :     void FillBoundary (int scomp, int ncomp, const IntVect& nghost, const Periodicity& period, bool cross = false);
    1020             : 
    1021             :     template <typename BUF=value_type>
    1022             :     void FillBoundary_nowait (bool cross = false);
    1023             : 
    1024             :     template <typename BUF=value_type>
    1025             :     void FillBoundary_nowait (const Periodicity& period, bool cross = false);
    1026             : 
    1027             :     template <typename BUF=value_type>
    1028             :     void FillBoundary_nowait (const IntVect& nghost, const Periodicity& period, bool cross = false);
    1029             : 
    1030             :     template <typename BUF=value_type>
    1031             :     void FillBoundary_nowait (int scomp, int ncomp, bool cross = false);
    1032             : 
    1033             :     template <typename BUF=value_type>
    1034             :     void FillBoundary_nowait (int scomp, int ncomp, const Periodicity& period, bool cross = false);
    1035             : 
    1036             :     template <typename BUF=value_type>
    1037             :     void FillBoundary_nowait (int scomp, int ncomp, const IntVect& nghost, const Periodicity& period, bool cross = false);
    1038             : 
    1039             :     template <typename BUF=value_type,
    1040             :               class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
    1041             :     void FillBoundary_finish ();
    1042             : 
    1043             :     void FillBoundary_test ();
    1044             : 
    1045             : 
    1046             :     /**
    1047             :      * \brief Fill ghost cells and synchronize nodal data. Ghost regions are
    1048             :      * filled with data from the intersecting valid regions. The
    1049             :      * synchronization will override valid regions by the intersecting valid
    1050             :      * regions with a higher precedence.  The smaller the global box index
    1051             :      * is, the higher precedence the box has.  With periodic boundaries, for
    1052             :      * cells in the same box, those near the lower corner have higher
    1053             :      * precedence than those near the upper corner.
    1054             :      *
    1055             :      * \param period periodic length if it's non-zero
    1056             :      */
    1057             :     void FillBoundaryAndSync (const Periodicity& period = Periodicity::NonPeriodic());
    1058             :     /**
    1059             :      * \brief Fill ghost cells and synchronize nodal data. Ghost regions are
    1060             :      * filled with data from the intersecting valid regions. The
    1061             :      * synchronization will override valid regions by the intersecting valid
    1062             :      * regions with a higher precedence.  The smaller the global box index
    1063             :      * is, the higher precedence the box has.  With periodic boundaries, for
    1064             :      * cells in the same box, those near the lower corner have higher
    1065             :      * precedence than those near the upper corner.
    1066             :      *
    1067             :      * \param scomp starting component
    1068             :      * \param ncomp number of components
    1069             :      * \param nghost number of ghost cells to fill
    1070             :      * \param period periodic length if it's non-zero
    1071             :      */
    1072             :     void FillBoundaryAndSync (int scomp, int ncomp, const IntVect& nghost,
    1073             :                               const Periodicity& period);
    1074             :     void FillBoundaryAndSync_nowait (const Periodicity& period = Periodicity::NonPeriodic());
    1075             :     void FillBoundaryAndSync_nowait (int scomp, int ncomp, const IntVect& nghost,
    1076             :                                      const Periodicity& period);
    1077             :     void FillBoundaryAndSync_finish ();
    1078             : 
    1079             :     /**
    1080             :      * \brief Synchronize nodal data.  The synchronization will override
    1081             :      * valid regions by the intersecting valid regions with a higher
    1082             :      * precedence.  The smaller the global box index is, the higher
    1083             :      * precedence the box has.  With periodic boundaries, for cells in the
    1084             :      * same box, those near the lower corner have higher precedence than
    1085             :      * those near the upper corner.
    1086             :      *
    1087             :      * \param period periodic length if it's non-zero
    1088             :      */
    1089             :     void OverrideSync (const Periodicity& period = Periodicity::NonPeriodic());
    1090             :     /**
    1091             :      * \brief Synchronize nodal data.  The synchronization will override
    1092             :      * valid regions by the intersecting valid regions with a higher
    1093             :      * precedence.  The smaller the global box index is, the higher
    1094             :      * precedence the box has.  With periodic boundaries, for cells in the
    1095             :      * same box, those near the lower corner have higher precedence than
    1096             :      * those near the upper corner.
    1097             :      *
    1098             :      * \param scomp starting component
    1099             :      * \param ncomp number of components
    1100             :      * \param period periodic length if it's non-zero
    1101             :      */
    1102             :     void OverrideSync (int scomp, int ncomp, const Periodicity& period);
    1103             :     void OverrideSync_nowait (const Periodicity& period = Periodicity::NonPeriodic());
    1104             :     void OverrideSync_nowait (int scomp, int ncomp, const Periodicity& period);
    1105             :     void OverrideSync_finish ();
    1106             : 
    1107             :     /**
    1108             :     * \brief Sum values in overlapped cells.  The destination is limited to valid cells.
    1109             :     */
    1110             :     void SumBoundary (const Periodicity& period = Periodicity::NonPeriodic());
    1111             :     void SumBoundary (int scomp, int ncomp, const Periodicity& period = Periodicity::NonPeriodic());
    1112             :     void SumBoundary_nowait (const Periodicity& period = Periodicity::NonPeriodic());
    1113             :     void SumBoundary_nowait (int scomp, int ncomp, const Periodicity& period = Periodicity::NonPeriodic());
    1114             : 
    1115             :     /**
    1116             :     * \brief Sum values in overlapped cells.  The destination is limited to valid + ngrow cells.
    1117             :     */
    1118             :     void SumBoundary (int scomp, int ncomp, IntVect const& nghost,
    1119             :                       const Periodicity& period = Periodicity::NonPeriodic());
    1120             :     void SumBoundary_nowait (int scomp, int ncomp, IntVect const& nghost,
    1121             :                              const Periodicity& period = Periodicity::NonPeriodic());
    1122             : 
    1123             :     /**
    1124             :     * \brief Sum values in overlapped cells.
    1125             :     *        For computing the overlap, the dst is grown by dst_ngrow, while the src uses src_ngrow.
    1126             :     */
    1127             :     void SumBoundary (int scomp, int ncomp, IntVect const& src_nghost, IntVect const& dst_nghost,
    1128             :                       const Periodicity& period = Periodicity::NonPeriodic());
    1129             :     void SumBoundary_nowait (int scomp, int ncomp, IntVect const& src_nghost, IntVect const& dst_nghost,
    1130             :                              const Periodicity& period = Periodicity::NonPeriodic());
    1131             :     void SumBoundary_finish ();
    1132             : 
    1133             :     /** \brief Fill ghost cells with values from their corresponding cells across periodic
    1134             :     * boundaries, regardless of whether the corresponding cells are valid. This differs from
    1135             :     * FillBoundary, which only fills from valid cells, and does not fill from ghost cells.
    1136             :     * The BoxArray is allowed to be overlapping.
    1137             :     */
    1138             :     void EnforcePeriodicity (const Periodicity& period);
    1139             :     void EnforcePeriodicity (int scomp, int ncomp, const Periodicity& period);
    1140             :     void EnforcePeriodicity (int scomp, int ncomp, const IntVect& nghost,
    1141             :                              const Periodicity& period);
    1142             : 
    1143             :     // covered   : ghost cells covered by valid cells of this FabArray
    1144             :     //             (including periodically shifted valid cells)
    1145             :     // notcovered: ghost cells not covered by valid cells
    1146             :     //             (including ghost cells outside periodic boundaries)
    1147             :     // physbnd   : boundary cells outside the domain (excluding periodic boundaries)
    1148             :     // interior  : interior cells (i.e., valid cells)
    1149             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
    1150             :     void BuildMask (const Box& phys_domain, const Periodicity& period,
    1151             :                     value_type covered, value_type notcovered,
    1152             :                     value_type physbnd, value_type interior);
    1153             : 
    1154             :     // The following are private functions.  But we have to make them public for cuda.
    1155             : 
    1156             :     template <typename BUF=value_type,
    1157             :               class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
    1158             :     void FBEP_nowait (int scomp, int ncomp, const IntVect& nghost,
    1159             :                       const Periodicity& period, bool cross,
    1160             :                       bool enforce_periodicity_only = false,
    1161             :                       bool override_sync = false);
    1162             : 
    1163             :     void FB_local_copy_cpu (const FB& TheFB, int scomp, int ncomp);
    1164             :     void PC_local_cpu (const CPC& thecpc, FabArray<FAB> const& src,
    1165             :                        int scomp, int dcomp, int ncomp, CpOp op);
    1166             : 
    1167             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
    1168             :     void setVal (value_type val, const CommMetaData& thecmd, int scomp, int ncomp);
    1169             : 
    1170             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
    1171             :     LayoutData<int> RecvLayoutMask (const CommMetaData& thecmd);
    1172             : 
    1173             : #ifdef AMREX_USE_GPU
    1174             : 
    1175             :     void FB_local_copy_gpu (const FB& TheFB, int scomp, int ncomp);
    1176             :     void PC_local_gpu (const CPC& thecpc, FabArray<FAB> const& src,
    1177             :                        int scomp, int dcomp, int ncomp, CpOp op);
    1178             : 
    1179             :     void CMD_local_setVal_gpu (value_type x, const CommMetaData& thecmd, int scomp, int ncomp);
    1180             :     void CMD_remote_setVal_gpu (value_type x, const CommMetaData& thecmd, int scomp, int ncomp);
    1181             : 
    1182             : #if defined(__CUDACC__)
    1183             : 
    1184             :     void FB_local_copy_cuda_graph_1 (const FB& TheFB, int scomp, int ncomp);
    1185             :     void FB_local_copy_cuda_graph_n (const FB& TheFB, int scomp, int ncomp);
    1186             : 
    1187             : #endif
    1188             : #endif
    1189             : 
    1190             : #ifdef AMREX_USE_MPI
    1191             : 
    1192             : #ifdef AMREX_USE_GPU
    1193             : #if defined(__CUDACC__)
    1194             : 
    1195             :     void FB_pack_send_buffer_cuda_graph (const FB& TheFB, int scomp, int ncomp,
    1196             :                                          Vector<char*>& send_data,
    1197             :                                          Vector<std::size_t> const& send_size,
    1198             :                                          Vector<const CopyComTagsContainer*> const& send_cctc);
    1199             : 
    1200             :     void FB_unpack_recv_buffer_cuda_graph (const FB& TheFB, int dcomp, int ncomp,
    1201             :                                            Vector<char*> const& recv_data,
    1202             :                                            Vector<std::size_t> const& recv_size,
    1203             :                                            Vector<const CopyComTagsContainer*> const& recv_cctc,
    1204             :                                            bool is_thread_safe);
    1205             : 
    1206             : #endif
    1207             : 
    1208             :     template <typename BUF = value_type>
    1209             :     static void pack_send_buffer_gpu (FabArray<FAB> const& src, int scomp, int ncomp,
    1210             :                                       Vector<char*> const& send_data,
    1211             :                                       Vector<std::size_t> const& send_size,
    1212             :                                       Vector<const CopyComTagsContainer*> const& send_cctc);
    1213             : 
    1214             :     template <typename BUF = value_type>
    1215             :     static void unpack_recv_buffer_gpu (FabArray<FAB>& dst, int dcomp, int ncomp,
    1216             :                                         Vector<char*> const& recv_data,
    1217             :                                         Vector<std::size_t> const& recv_size,
    1218             :                                         Vector<const CopyComTagsContainer*> const& recv_cctc,
    1219             :                                         CpOp op, bool is_thread_safe);
    1220             : 
    1221             : #endif
    1222             : 
    1223             :     template <typename BUF = value_type>
    1224             :     static void pack_send_buffer_cpu (FabArray<FAB> const& src, int scomp, int ncomp,
    1225             :                                       Vector<char*> const& send_data,
    1226             :                                       Vector<std::size_t> const& send_size,
    1227             :                                       Vector<const CopyComTagsContainer*> const& send_cctc);
    1228             : 
    1229             :     template <typename BUF = value_type>
    1230             :     static void unpack_recv_buffer_cpu (FabArray<FAB>& dst, int dcomp, int ncomp,
    1231             :                                         Vector<char*> const& recv_data,
    1232             :                                         Vector<std::size_t> const& recv_size,
    1233             :                                         Vector<const CopyComTagsContainer*> const& recv_cctc,
    1234             :                                         CpOp op, bool is_thread_safe);
    1235             : 
    1236             : #endif
    1237             : 
    1238             :     /**
    1239             :      * \brief Return infinity norm
    1240             :      *
    1241             :      * \param comp           starting component
    1242             :      * \param ncomp          number of components
    1243             :      * \param nghost         number of ghost cells
    1244             :      * \param local          If true, MPI communication is skipped.
    1245             :      * \param ignore_covered ignore covered cells. Only relevant for cell-centered EB data.
    1246             :      */
    1247             :     template <typename F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
    1248             :     typename F::value_type
    1249             :     norminf (int comp, int ncomp, IntVect const& nghost, bool local = false,
    1250             :              [[maybe_unused]] bool ignore_covered = false) const;
    1251             : 
    1252             :     /**
    1253             :      * \brief Return infinity norm in masked region
    1254             :      *
    1255             :      * \param mask   only mask=true region is included
    1256             :      * \param comp   starting component
    1257             :      * \param ncomp  number of components
    1258             :      * \param nghost number of ghost cells
    1259             :      * \param local  If true, MPI communication is skipped.
    1260             :      */
    1261             :     template <typename IFAB, typename F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
    1262             :     typename F::value_type
    1263             :     norminf (FabArray<IFAB> const& mask, int comp, int ncomp, IntVect const& nghost,
    1264             :              bool local = false) const;
    1265             : 
    1266             : protected:
    1267             : 
    1268             :     std::unique_ptr<FabFactory<FAB> > m_factory;
    1269             :     DataAllocator m_dallocator;
    1270             :     std::unique_ptr<detail::SingleChunkArena> m_single_chunk_arena;
    1271             :     Long m_single_chunk_size = 0;
    1272             : 
    1273             :     //! has define() been called?
    1274             :     bool define_function_called = false;
    1275             : 
    1276             :     //
    1277             :     //! The data.
    1278             :     std::vector<FAB*> m_fabs_v;
    1279             : 
    1280             : #ifdef AMREX_USE_GPU
    1281             :     mutable void* m_dp_arrays = nullptr;
    1282             : #endif
    1283             :     mutable void* m_hp_arrays = nullptr;
    1284             :     mutable MultiArray4<value_type> m_arrays;
    1285             :     mutable MultiArray4<value_type const> m_const_arrays;
    1286             : 
    1287             :     Vector<std::string> m_tags;
    1288             : 
    1289             :     //! for shared memory
    1290             :     struct ShMem {
    1291             : 
    1292      886192 :         ShMem () noexcept = default;
    1293             : 
    1294     1025381 :         ~ShMem () { // NOLINT
    1295             : #if defined(BL_USE_MPI3)
    1296             :             if (win != MPI_WIN_NULL) { MPI_Win_free(&win); }
    1297             : #endif
    1298             : #ifdef BL_USE_TEAM
    1299             :             if (alloc) {
    1300             :                 amrex::update_fab_stats(-n_points, -n_values, sizeof(value_type));
    1301             :             }
    1302             : #endif
    1303     1025381 :         }
    1304           5 :         ShMem (ShMem&& rhs) noexcept
    1305           5 :                  : alloc(rhs.alloc), n_values(rhs.n_values), n_points(rhs.n_points)
    1306             : #if defined(BL_USE_MPI3)
    1307             :                  , win(rhs.win)
    1308             : #endif
    1309             :         {
    1310           5 :             rhs.alloc = false;
    1311             : #if defined(BL_USE_MPI3)
    1312             :             rhs.win = MPI_WIN_NULL;
    1313             : #endif
    1314           5 :         }
    1315       41641 :         ShMem& operator= (ShMem&& rhs) noexcept {
    1316       41641 :             if (&rhs != this) {
    1317       41641 :                 alloc = rhs.alloc;
    1318       41641 :                 n_values = rhs.n_values;
    1319       41641 :                 n_points = rhs.n_points;
    1320       41641 :                 rhs.alloc = false;
    1321             : #if defined(BL_USE_MPI3)
    1322             :                 win = rhs.win;
    1323             :                 rhs.win = MPI_WIN_NULL;
    1324             : #endif
    1325             :             }
    1326       41641 :             return *this;
    1327             :         }
    1328             :         ShMem (const ShMem&) = delete;
    1329             :         ShMem& operator= (const ShMem&) = delete;
    1330             :         bool  alloc{false};
    1331             :         Long  n_values{0};
    1332             :         Long  n_points{0};
    1333             : #if defined(BL_USE_MPI3)
    1334             :         MPI_Win win = MPI_WIN_NULL;
    1335             : #endif
    1336             :     };
    1337             :     ShMem shmem;
    1338             : 
    1339             :     bool SharedMemory () const noexcept { return shmem.alloc; }
    1340             : 
    1341             : private:
    1342             :     using Iterator = typename std::vector<FAB*>::iterator;
    1343             : 
    1344             :     void AllocFabs (const FabFactory<FAB>& factory, Arena* ar,
    1345             :                     const Vector<std::string>& tags,
    1346             :                     bool alloc_single_chunk);
    1347             : 
    1348             :     void setFab_assert (int K, FAB const& fab) const;
    1349             : 
    1350             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
    1351             :     void build_arrays () const;
    1352             : 
    1353             :     void clear_arrays ();
    1354             : 
    1355             : public:
    1356             : 
    1357             : #ifdef BL_USE_MPI
    1358             : 
    1359             :     //! Prepost nonblocking receives
    1360             :     template <typename BUF=value_type>
    1361             :     static void PostRcvs (const MapOfCopyComTagContainers&       RcvTags,
    1362             :                    char*&                                 the_recv_data,
    1363             :                    Vector<char*>&                         recv_data,
    1364             :                    Vector<std::size_t>&                   recv_size,
    1365             :                    Vector<int>&                           recv_from,
    1366             :                    Vector<MPI_Request>&                   recv_reqs,
    1367             :                    int                                    ncomp,
    1368             :                    int                                    SeqNum);
    1369             : 
    1370             :     template <typename BUF=value_type>
    1371             :     AMREX_NODISCARD
    1372             :     static TheFaArenaPointer PostRcvs (const MapOfCopyComTagContainers&       RcvTags,
    1373             :                    Vector<char*>&                         recv_data,
    1374             :                    Vector<std::size_t>&                   recv_size,
    1375             :                    Vector<int>&                           recv_from,
    1376             :                    Vector<MPI_Request>&                   recv_reqs,
    1377             :                    int                                    ncomp,
    1378             :                    int                                    SeqNum);
    1379             : 
    1380             :     template <typename BUF=value_type>
    1381             :     static void PrepareSendBuffers (const MapOfCopyComTagContainers&     SndTags,
    1382             :                              char*&                               the_send_data,
    1383             :                              Vector<char*>&                       send_data,
    1384             :                              Vector<std::size_t>&                 send_size,
    1385             :                              Vector<int>&                         send_rank,
    1386             :                              Vector<MPI_Request>&                 send_reqs,
    1387             :                              Vector<const CopyComTagsContainer*>& send_cctc,
    1388             :                              int                                  ncomp);
    1389             : 
    1390             :     template <typename BUF=value_type>
    1391             :     AMREX_NODISCARD
    1392             :     static TheFaArenaPointer PrepareSendBuffers (const MapOfCopyComTagContainers&     SndTags,
    1393             :                              Vector<char*>&                       send_data,
    1394             :                              Vector<std::size_t>&                 send_size,
    1395             :                              Vector<int>&                         send_rank,
    1396             :                              Vector<MPI_Request>&                 send_reqs,
    1397             :                              Vector<const CopyComTagsContainer*>& send_cctc,
    1398             :                              int                                  ncomp);
    1399             : 
    1400             :     static void PostSnds (Vector<char*> const&       send_data,
    1401             :                           Vector<std::size_t> const& send_size,
    1402             :                           Vector<int> const&         send_rank,
    1403             :                           Vector<MPI_Request>&       send_reqs,
    1404             :                           int                        SeqNum);
    1405             : #endif
    1406             : 
    1407             :     std::unique_ptr<FBData<FAB>> fbd;
    1408             :     std::unique_ptr<PCData<FAB>> pcd;
    1409             : 
    1410             :     // Pointer to temporary fab used in non-blocking amrex::OverrideSync
    1411             :     std::unique_ptr< FabArray<FAB> > os_temp;
    1412             : 
    1413             : 
    1414             : 
    1415             :     /**
    1416             :      * \brief y += a*x
    1417             :      *
    1418             :      * \param y      FabArray y
    1419             :      * \param a      scalar a
    1420             :      * \param x      FabArray x
    1421             :      * \param xcomp  starting component of x
    1422             :      * \param ycomp  starting component of y
    1423             :      * \param ncomp  number of components
    1424             :      * \param nghost number of ghost cells
    1425             :      */
    1426             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
    1427             :     static void Saxpy (FabArray<FAB>& y, value_type a, FabArray<FAB> const& x,
    1428             :                        int xcomp, int ycomp, int ncomp, IntVect const& nghost);
    1429             : 
    1430             :     /**
    1431             :      * \brief y = x + a*y
    1432             :      *
    1433             :      * \param y      FabArray y
    1434             :      * \param a      scalar a
    1435             :      * \param x      FabArray x
    1436             :      * \param xcomp  starting component of x
    1437             :      * \param ycomp  starting component of y
    1438             :      * \param ncomp  number of components
    1439             :      * \param nghost number of ghost cells
    1440             :      */
    1441             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
    1442             :     static void Xpay (FabArray<FAB>& y, value_type a, FabArray<FAB> const& x,
    1443             :                       int xcomp, int ycomp, int ncomp, IntVect const& nghost);
    1444             : 
    1445             :     /**
    1446             :      * \brief dst = a*x + b*y
    1447             :      *
    1448             :      * \param dst     destination FabArray
    1449             :      * \param a       scalar a
    1450             :      * \param x       FabArray x
    1451             :      * \param xcomp   starting component of x
    1452             :      * \param b       scalar b
    1453             :      * \param y       FabArray y
    1454             :      * \param ycomp   starting component of y
    1455             :      * \param dstcomp starting component of destination
    1456             :      * \param numcomp number of components
    1457             :      * \param nghost  number of ghost cells
    1458             :      */
    1459             :     template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
    1460             :     static void LinComb (FabArray<FAB>& dst,
    1461             :                          value_type a, const FabArray<FAB>& x, int xcomp,
    1462             :                          value_type b, const FabArray<FAB>& y, int ycomp,
    1463             :                          int dstcomp, int numcomp, const IntVect& nghost);
    1464             : };
    1465             : 
    1466             : 
    1467             : #include <AMReX_FabArrayCommI.H>
    1468             : 
    1469             : template <class FAB>
    1470             : bool
    1471             : FabArray<FAB>::defined (int K) const noexcept
    1472             : {
    1473             :     int li = localindex(K);
    1474             :     if (li >= 0 && li < static_cast<int>(m_fabs_v.size()) && m_fabs_v[li] != 0) {
    1475             :         return true;
    1476             :     }
    1477             :     else {
    1478             :         return false;
    1479             :     }
    1480             : }
    1481             : 
    1482             : template <class FAB>
    1483             : bool
    1484             : FabArray<FAB>::defined (const MFIter& mfi) const noexcept
    1485             : {
    1486             :     int li = mfi.LocalIndex();
    1487             :     if (li < static_cast<int>(m_fabs_v.size()) && m_fabs_v[li] != nullptr) {
    1488             :         return true;
    1489             :     }
    1490             :     else {
    1491             :         return false;
    1492             :     }
    1493             : }
    1494             : 
    1495             : template <class FAB>
    1496             : FAB*
    1497     9062617 : FabArray<FAB>::fabPtr (const MFIter& mfi) noexcept
    1498             : {
    1499             :     AMREX_ASSERT(mfi.LocalIndex() < indexArray.size());
    1500             :     AMREX_ASSERT(DistributionMap() == mfi.DistributionMap());
    1501     9062617 :     int li = mfi.LocalIndex();
    1502     9062617 :     return m_fabs_v[li];
    1503             : }
    1504             : 
    1505             : template <class FAB>
    1506             : FAB const*
    1507     2795960 : FabArray<FAB>::fabPtr (const MFIter& mfi) const noexcept
    1508             : {
    1509             :     AMREX_ASSERT(mfi.LocalIndex() < indexArray.size());
    1510             :     AMREX_ASSERT(DistributionMap() == mfi.DistributionMap());
    1511     2795960 :     int li = mfi.LocalIndex();
    1512     2795960 :     return m_fabs_v[li];
    1513             : }
    1514             : 
    1515             : template <class FAB>
    1516             : FAB*
    1517    14751472 : FabArray<FAB>::fabPtr (int K) noexcept
    1518             : {
    1519    14751472 :     int li = localindex(K);
    1520             :     AMREX_ASSERT(li >=0 && li < indexArray.size());
    1521    14751472 :     return m_fabs_v[li];
    1522             : }
    1523             : 
    1524             : template <class FAB>
    1525             : FAB const*
    1526     4920839 : FabArray<FAB>::fabPtr (int K) const noexcept
    1527             : {
    1528     4920839 :     int li = localindex(K);
    1529             :     AMREX_ASSERT(li >=0 && li < indexArray.size());
    1530     4920839 :     return m_fabs_v[li];
    1531             : }
    1532             : 
    1533             : template <class FAB>
    1534             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1535             : void
    1536             : FabArray<FAB>::prefetchToHost (const MFIter& mfi) const noexcept
    1537             : {
    1538             : #ifdef AMREX_USE_CUDA
    1539             :     this->fabPtr(mfi)->prefetchToHost();
    1540             : #else
    1541             :     amrex::ignore_unused(mfi);
    1542             : #endif
    1543             : }
    1544             : 
    1545             : template <class FAB>
    1546             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1547             : void
    1548             : FabArray<FAB>::prefetchToDevice (const MFIter& mfi) const noexcept
    1549             : {
    1550             : #ifdef AMREX_USE_CUDA
    1551             :     this->fabPtr(mfi)->prefetchToDevice();
    1552             : #else
    1553             :     amrex::ignore_unused(mfi);
    1554             : #endif
    1555             : }
    1556             : 
    1557             : template <class FAB>
    1558             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1559             : Array4<typename FabArray<FAB>::value_type const>
    1560      815623 : FabArray<FAB>::array (const MFIter& mfi) const noexcept
    1561             : {
    1562      815623 :     return fabPtr(mfi)->const_array();
    1563             : }
    1564             : 
    1565             : template <class FAB>
    1566             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1567             : Array4<typename FabArray<FAB>::value_type>
    1568     3133814 : FabArray<FAB>::array (const MFIter& mfi) noexcept
    1569             : {
    1570     3133814 :     return fabPtr(mfi)->array();
    1571             : }
    1572             : 
    1573             : template <class FAB>
    1574             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1575             : Array4<typename FabArray<FAB>::value_type const>
    1576           0 : FabArray<FAB>::array (int K) const noexcept
    1577             : {
    1578           0 :     return fabPtr(K)->const_array();
    1579             : }
    1580             : 
    1581             : template <class FAB>
    1582             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1583             : Array4<typename FabArray<FAB>::value_type>
    1584             : FabArray<FAB>::array (int K) noexcept
    1585             : {
    1586             :     return fabPtr(K)->array();
    1587             : }
    1588             : 
    1589             : template <class FAB>
    1590             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1591             : Array4<typename FabArray<FAB>::value_type const>
    1592      761435 : FabArray<FAB>::const_array (const MFIter& mfi) const noexcept
    1593             : {
    1594      761435 :     return fabPtr(mfi)->const_array();
    1595             : }
    1596             : 
    1597             : template <class FAB>
    1598             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1599             : Array4<typename FabArray<FAB>::value_type const>
    1600             : FabArray<FAB>::const_array (int K) const noexcept
    1601             : {
    1602             :     return fabPtr(K)->const_array();
    1603             : }
    1604             : 
    1605             : template <class FAB>
    1606             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1607             : Array4<typename FabArray<FAB>::value_type const>
    1608             : FabArray<FAB>::array (const MFIter& mfi, int start_comp) const noexcept
    1609             : {
    1610             :     return fabPtr(mfi)->const_array(start_comp);
    1611             : }
    1612             : 
    1613             : template <class FAB>
    1614             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1615             : Array4<typename FabArray<FAB>::value_type>
    1616             : FabArray<FAB>::array (const MFIter& mfi, int start_comp) noexcept
    1617             : {
    1618             :     return fabPtr(mfi)->array(start_comp);
    1619             : }
    1620             : 
    1621             : template <class FAB>
    1622             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1623             : Array4<typename FabArray<FAB>::value_type const>
    1624             : FabArray<FAB>::array (int K, int start_comp) const noexcept
    1625             : {
    1626             :     return fabPtr(K)->const_array(start_comp);
    1627             : }
    1628             : 
    1629             : template <class FAB>
    1630             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1631             : Array4<typename FabArray<FAB>::value_type>
    1632             : FabArray<FAB>::array (int K, int start_comp) noexcept
    1633             : {
    1634             :     return fabPtr(K)->array(start_comp);
    1635             : }
    1636             : 
    1637             : template <class FAB>
    1638             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1639             : Array4<typename FabArray<FAB>::value_type const>
    1640             : FabArray<FAB>::const_array (const MFIter& mfi, int start_comp) const noexcept
    1641             : {
    1642             :     return fabPtr(mfi)->const_array(start_comp);
    1643             : }
    1644             : 
    1645             : template <class FAB>
    1646             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1647             : Array4<typename FabArray<FAB>::value_type const>
    1648             : FabArray<FAB>::const_array (int K, int start_comp) const noexcept
    1649             : {
    1650             :     return fabPtr(K)->const_array(start_comp);
    1651             : }
    1652             : 
    1653             : template <class FAB>
    1654             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1655             : MultiArray4<typename FabArray<FAB>::value_type>
    1656             : FabArray<FAB>::arrays () noexcept
    1657             : {
    1658             :     build_arrays();
    1659             :     return m_arrays;
    1660             : }
    1661             : 
    1662             : template <class FAB>
    1663             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1664             : MultiArray4<typename FabArray<FAB>::value_type const>
    1665             : FabArray<FAB>::arrays () const noexcept
    1666             : {
    1667             :     build_arrays();
    1668             :     return m_const_arrays;
    1669             : }
    1670             : 
    1671             : template <class FAB>
    1672             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1673             : MultiArray4<typename FabArray<FAB>::value_type const>
    1674             : FabArray<FAB>::const_arrays () const noexcept
    1675             : {
    1676             :     build_arrays();
    1677             :     return m_const_arrays;
    1678             : }
    1679             : 
    1680             : template <class FAB>
    1681             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1682             : void
    1683             : FabArray<FAB>::build_arrays () const
    1684             : {
    1685             :     using A = Array4<value_type>;
    1686             :     using AC = Array4<value_type const>;
    1687             :     static_assert(sizeof(A) == sizeof(AC), "sizeof(Array4<T>) != sizeof(Array4<T const>)");
    1688             :     if (!m_hp_arrays && local_size() > 0) {
    1689             :         const int n = local_size();
    1690             : #ifdef AMREX_USE_GPU
    1691             :         m_hp_arrays = (void*)The_Pinned_Arena()->alloc(n*2*sizeof(A));
    1692             :         m_dp_arrays = (void*)The_Arena()->alloc(n*2*sizeof(A));
    1693             : #else
    1694             :         m_hp_arrays = (void*)std::malloc(n*2*sizeof(A));
    1695             : #endif
    1696             :         for (int li = 0; li < n; ++li) {
    1697             :             if (m_fabs_v[li]) {
    1698             :                 new ((A*)m_hp_arrays+li) A(m_fabs_v[li]->array());
    1699             :                 new ((AC*)m_hp_arrays+li+n) AC(m_fabs_v[li]->const_array());
    1700             :             } else {
    1701             :                 new ((A*)m_hp_arrays+li) A{};
    1702             :                 new ((AC*)m_hp_arrays+li+n) AC{};
    1703             :             }
    1704             :         }
    1705             :         m_arrays.hp = (A*)m_hp_arrays;
    1706             :         m_const_arrays.hp = (AC*)m_hp_arrays + n;
    1707             : #ifdef AMREX_USE_GPU
    1708             :         m_arrays.dp = (A*)m_dp_arrays;
    1709             :         m_const_arrays.dp = (AC*)m_dp_arrays + n;
    1710             :         Gpu::htod_memcpy(m_dp_arrays, m_hp_arrays, n*2*sizeof(A));
    1711             : #endif
    1712             :     }
    1713             : }
    1714             : 
    1715             : template <class FAB>
    1716             : void
    1717     2096199 : FabArray<FAB>::clear_arrays ()
    1718             : {
    1719             : #ifdef AMREX_USE_GPU
    1720             :     The_Pinned_Arena()->free(m_hp_arrays);
    1721             :     The_Arena()->free(m_dp_arrays);
    1722             :     m_dp_arrays = nullptr;
    1723             : #else
    1724     2096199 :     std::free(m_hp_arrays);
    1725             : #endif
    1726     2096199 :     m_hp_arrays = nullptr;
    1727     2096199 :     m_arrays.hp = nullptr;
    1728     2096199 :     m_const_arrays.hp = nullptr;
    1729     2096199 : }
    1730             : 
    1731             : template <class FAB>
    1732             : AMREX_NODISCARD
    1733             : FAB*
    1734             : FabArray<FAB>::release (int K)
    1735             : {
    1736             :     const int li = localindex(K);
    1737             :     if (li >= 0 && li < static_cast<int>(m_fabs_v.size()) && m_fabs_v[li] != nullptr) {
    1738             :         AMREX_ASSERT(m_single_chunk_arena == nullptr);
    1739             :         Long nbytes = amrex::nBytesOwned(*m_fabs_v[li]);
    1740             :         if (nbytes > 0) {
    1741             :             for (auto const& t : m_tags) {
    1742             :                 updateMemUsage(t, -nbytes, nullptr);
    1743             :             }
    1744             :         }
    1745             :         return std::exchange(m_fabs_v[li], nullptr);
    1746             :     } else {
    1747             :         return nullptr;
    1748             :     }
    1749             : }
    1750             : 
    1751             : template <class FAB>
    1752             : AMREX_NODISCARD
    1753             : FAB*
    1754             : FabArray<FAB>::release (const MFIter& mfi)
    1755             : {
    1756             :     const int li = mfi.LocalIndex();
    1757             :     if (li >= 0 && li < static_cast<int>(m_fabs_v.size()) && m_fabs_v[li] != nullptr) {
    1758             :         AMREX_ASSERT(m_single_chunk_arena == nullptr);
    1759             :         Long nbytes = amrex::nBytesOwned(*m_fabs_v[li]);
    1760             :         if (nbytes > 0) {
    1761             :             for (auto const& t : m_tags) {
    1762             :                 updateMemUsage(t, -nbytes, nullptr);
    1763             :             }
    1764             :         }
    1765             :         return std::exchange(m_fabs_v[li], nullptr);
    1766             :     } else {
    1767             :         return nullptr;
    1768             :     }
    1769             : }
    1770             : 
    1771             : template <class FAB>
    1772             : void
    1773     2096199 : FabArray<FAB>::clear ()
    1774             : {
    1775     2096199 :     if (define_function_called)
    1776             :     {
    1777      972846 :         define_function_called = false;
    1778      972846 :         clearThisBD();  //!< addThisBD is called in define
    1779             :     }
    1780             : 
    1781     2096199 :     Long nbytes = 0L;
    1782     5529700 :     for (auto *x : m_fabs_v) {
    1783     3433501 :         if (x) {
    1784     3433501 :             nbytes += amrex::nBytesOwned(*x);
    1785     3433501 :             m_factory->destroy(x);
    1786             :         }
    1787             :     }
    1788     2096199 :     m_fabs_v.clear();
    1789     2096199 :     clear_arrays();
    1790     2096199 :     m_factory.reset();
    1791     2096199 :     m_dallocator.m_arena = nullptr;
    1792             :     // no need to clear the non-blocking fillboundary stuff
    1793             : 
    1794     2096199 :     if (nbytes > 0) {
    1795     1942676 :         for (auto const& t : m_tags) {
    1796      971336 :             updateMemUsage(t, -nbytes, nullptr);
    1797             :         }
    1798             :     }
    1799             : 
    1800     2096199 :     if (m_single_chunk_arena) {
    1801           0 :         m_single_chunk_arena.reset();
    1802             :     }
    1803     2096199 :     m_single_chunk_size = 0;
    1804             : 
    1805     2096199 :     m_tags.clear();
    1806             : 
    1807     2096199 :     FabArrayBase::clear();
    1808     2096199 : }
    1809             : 
    1810             : template <class FAB>
    1811             : template <typename SFAB, typename DFAB,
    1812             :           std::enable_if_t<std::conjunction_v<
    1813             :               IsBaseFab<DFAB>, IsBaseFab<SFAB>,
    1814             :               std::is_convertible<typename SFAB::value_type,
    1815             :                                   typename DFAB::value_type>>, int>>
    1816             : void
    1817             : FabArray<FAB>::LocalCopy (FabArray<SFAB> const& src, int scomp, int dcomp, int ncomp,
    1818             :                           IntVect const& nghost)
    1819             : {
    1820             :     amrex::Copy(*this, src, scomp, dcomp, ncomp, nghost);
    1821             : }
    1822             : 
    1823             : template <class FAB>
    1824             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1825             : void
    1826             : FabArray<FAB>::LocalAdd (FabArray<FAB> const& src, int scomp, int dcomp, int ncomp,
    1827             :                          IntVect const& nghost)
    1828             : {
    1829             :     amrex::Add(*this, src, scomp, dcomp, ncomp, nghost);
    1830             : }
    1831             : 
    1832             : template <class FAB>
    1833             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1834             : void
    1835             : FabArray<FAB>::setVal (value_type val, int nghost)
    1836             : {
    1837             :     setVal(val,0,n_comp,IntVect(nghost));
    1838             : }
    1839             : 
    1840             : template <class FAB>
    1841             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1842             : void
    1843             : FabArray<FAB>::setVal (value_type val, const IntVect& nghost)
    1844             : {
    1845             :     setVal(val,0,n_comp,nghost);
    1846             : }
    1847             : 
    1848             : template <class FAB>
    1849             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1850             : void
    1851             : FabArray<FAB>::setVal (value_type val, const Box& region, int nghost)
    1852             : {
    1853             :     setVal(val,region,0,n_comp,IntVect(nghost));
    1854             : }
    1855             : 
    1856             : template <class FAB>
    1857             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    1858             : void
    1859             : FabArray<FAB>::setVal (value_type val, const Box& region, const IntVect& nghost)
    1860             : {
    1861             :     setVal(val,region,0,n_comp,nghost);
    1862             : }
    1863             : 
    1864             : template <class FAB>
    1865         218 : FabArray<FAB>::FabArray () noexcept
    1866         218 :     : shmem()
    1867             : {
    1868         218 :     m_FA_stats.recordBuild();
    1869         218 : }
    1870             : 
    1871             : template <class FAB>
    1872             : FabArray<FAB>::FabArray (Arena* a) noexcept
    1873             :     : m_dallocator(a),
    1874             :       shmem()
    1875             : {
    1876             :     m_FA_stats.recordBuild();
    1877             : }
    1878             : 
    1879             : template <class FAB>
    1880       29351 : FabArray<FAB>::FabArray (const BoxArray&            bxs,
    1881             :                          const DistributionMapping& dm,
    1882             :                          int                        nvar,
    1883             :                          int                        ngrow,
    1884             :                          const MFInfo&              info,
    1885             :                          const FabFactory<FAB>&     factory)
    1886       29351 :     : FabArray<FAB>(bxs,dm,nvar,IntVect(ngrow),info,factory)
    1887       29351 : {}
    1888             : 
    1889             : template <class FAB>
    1890      885974 : FabArray<FAB>::FabArray (const BoxArray&            bxs,
    1891             :                          const DistributionMapping& dm,
    1892             :                          int                        nvar,
    1893             :                          const IntVect&             ngrow,
    1894             :                          const MFInfo&              info,
    1895             :                          const FabFactory<FAB>&     factory)
    1896             :     : m_factory(factory.clone()),
    1897      885974 :       shmem()
    1898             : {
    1899      885974 :     m_FA_stats.recordBuild();
    1900      885974 :     define(bxs,dm,nvar,ngrow,info,*m_factory);
    1901      885974 : }
    1902             : 
    1903             : template <class FAB>
    1904             : FabArray<FAB>::FabArray (const FabArray<FAB>& rhs, MakeType maketype, int scomp, int ncomp)
    1905             :     : m_factory(rhs.Factory().clone()),
    1906             :       shmem()
    1907             : {
    1908             :     m_FA_stats.recordBuild();
    1909             :     define(rhs.boxArray(), rhs.DistributionMap(), ncomp, rhs.nGrowVect(),
    1910             :            MFInfo().SetAlloc(false), *m_factory);
    1911             : 
    1912             :     if (maketype == amrex::make_alias)
    1913             :     {
    1914             :         for (int i = 0, n = indexArray.size(); i < n; ++i) {
    1915             :             auto const& rhsfab = *(rhs.m_fabs_v[i]);
    1916             :             m_fabs_v.push_back(m_factory->create_alias(rhsfab, scomp, ncomp));
    1917             :         }
    1918             :     }
    1919             :     else
    1920             :     {
    1921             :         amrex::Abort("FabArray: unknown MakeType");
    1922             :     }
    1923             : }
    1924             : 
    1925             : template <class FAB>
    1926           5 : FabArray<FAB>::FabArray (FabArray<FAB>&& rhs) noexcept
    1927             :     : FabArrayBase (static_cast<FabArrayBase&&>(rhs))
    1928           5 :     , m_factory    (std::move(rhs.m_factory))
    1929           5 :     , m_dallocator (std::move(rhs.m_dallocator))
    1930           5 :     , m_single_chunk_arena(std::move(rhs.m_single_chunk_arena))
    1931           5 :     , m_single_chunk_size(std::exchange(rhs.m_single_chunk_size,0))
    1932           5 :     , define_function_called(rhs.define_function_called)
    1933           5 :     , m_fabs_v     (std::move(rhs.m_fabs_v))
    1934             : #ifdef AMREX_USE_GPU
    1935             :     , m_dp_arrays  (std::exchange(rhs.m_dp_arrays, nullptr))
    1936             : #endif
    1937           5 :     , m_hp_arrays  (std::exchange(rhs.m_hp_arrays, nullptr))
    1938             :     , m_arrays     (rhs.m_arrays)
    1939             :     , m_const_arrays(rhs.m_const_arrays)
    1940           5 :     , m_tags       (std::move(rhs.m_tags))
    1941          20 :     , shmem        (std::move(rhs.shmem))
    1942             :     // no need to worry about the data used in non-blocking FillBoundary.
    1943             : {
    1944           5 :     m_FA_stats.recordBuild();
    1945           5 :     rhs.define_function_called = false; // the responsibility of clear BD has been transferred.
    1946           5 :     rhs.m_fabs_v.clear(); // clear the data pointers so that rhs.clear does delete them.
    1947           5 :     rhs.clear();
    1948           5 : }
    1949             : 
    1950             : template <class FAB>
    1951             : FabArray<FAB>&
    1952       41641 : FabArray<FAB>::operator= (FabArray<FAB>&& rhs) noexcept
    1953             : {
    1954       41641 :     if (&rhs != this)
    1955             :     {
    1956       41641 :         clear();
    1957             : 
    1958       41641 :         FabArrayBase::operator=(static_cast<FabArrayBase&&>(rhs));
    1959       41641 :         m_factory = std::move(rhs.m_factory);
    1960       41641 :         m_dallocator = std::move(rhs.m_dallocator);
    1961       41641 :         m_single_chunk_arena = std::move(rhs.m_single_chunk_arena);
    1962       41641 :         std::swap(m_single_chunk_size, rhs.m_single_chunk_size);
    1963       41641 :         define_function_called = rhs.define_function_called;
    1964       41641 :         std::swap(m_fabs_v, rhs.m_fabs_v);
    1965             : #ifdef AMREX_USE_GPU
    1966             :         std::swap(m_dp_arrays, rhs.m_dp_arrays);
    1967             : #endif
    1968       41641 :         std::swap(m_hp_arrays, rhs.m_hp_arrays);
    1969       41641 :         m_arrays = rhs.m_arrays;
    1970       41641 :         m_const_arrays = rhs.m_const_arrays;
    1971       41641 :         std::swap(m_tags, rhs.m_tags);
    1972       41641 :         shmem = std::move(rhs.shmem);
    1973             : 
    1974       41641 :         rhs.define_function_called = false;
    1975       41641 :         rhs.m_fabs_v.clear();
    1976       41641 :         rhs.m_tags.clear();
    1977       41641 :         rhs.clear();
    1978             :     }
    1979       41641 :     return *this;
    1980             : }
    1981             : 
    1982             : template <class FAB>
    1983     1025381 : FabArray<FAB>::~FabArray ()
    1984             : {
    1985     1025381 :     m_FA_stats.recordDelete();
    1986     1025381 :     clear();
    1987     1025381 : }
    1988             : 
    1989             : template <class FAB>
    1990             : bool
    1991             : FabArray<FAB>::ok () const
    1992             : {
    1993             :     if (!define_function_called) { return false; }
    1994             : 
    1995             :     int isok = 1;
    1996             : 
    1997             :     for (MFIter fai(*this); fai.isValid() && isok; ++fai)
    1998             :     {
    1999             :         if (defined(fai))
    2000             :         {
    2001             :             if (get(fai).box() != fabbox(fai.index()))
    2002             :             {
    2003             :                 isok = 0;
    2004             :             }
    2005             :         }
    2006             :         else
    2007             :         {
    2008             :             isok = 0;
    2009             :         }
    2010             :     }
    2011             : 
    2012             :     ParallelAllReduce::Min(isok, ParallelContext::CommunicatorSub());
    2013             : 
    2014             :     return isok == 1;
    2015             : }
    2016             : 
    2017             : template <class FAB>
    2018             : bool
    2019             : FabArray<FAB>::isDefined () const
    2020             : {
    2021             :     return define_function_called;
    2022             : }
    2023             : 
    2024             : template <class FAB>
    2025             : void
    2026         218 : FabArray<FAB>::define (const BoxArray&            bxs,
    2027             :                        const DistributionMapping& dm,
    2028             :                        int                        nvar,
    2029             :                        int                        ngrow,
    2030             :                        const MFInfo&              info,
    2031             :                        const FabFactory<FAB>&     a_factory)
    2032             : {
    2033         218 :     define(bxs,dm,nvar,IntVect(ngrow),info,a_factory);
    2034         218 : }
    2035             : 
    2036             : template <class FAB>
    2037             : void
    2038      886192 : FabArray<FAB>::define (const BoxArray&            bxs,
    2039             :                        const DistributionMapping& dm,
    2040             :                        int                        nvar,
    2041             :                        const IntVect&             ngrow,
    2042             :                        const MFInfo&              info,
    2043             :                        const FabFactory<FAB>&     a_factory)
    2044             : {
    2045     1772380 :     std::unique_ptr<FabFactory<FAB> > factory(a_factory.clone());
    2046             : 
    2047      886192 :     auto *default_arena = m_dallocator.m_arena;
    2048      886192 :     clear();
    2049             : 
    2050      886192 :     m_factory = std::move(factory);
    2051      886192 :     m_dallocator.m_arena = info.arena ? info.arena : default_arena;
    2052             : 
    2053      886192 :     define_function_called = true;
    2054             : 
    2055             :     AMREX_ASSERT(ngrow.allGE(0));
    2056             :     AMREX_ASSERT(boxarray.empty());
    2057      886192 :     FabArrayBase::define(bxs, dm, nvar, ngrow);
    2058             : 
    2059      886192 :     addThisBD();
    2060             : 
    2061      886192 :     if(info.alloc) {
    2062      885940 :         AllocFabs(*m_factory, m_dallocator.m_arena, info.tags, info.alloc_single_chunk);
    2063             : #ifdef BL_USE_TEAM
    2064             :         ParallelDescriptor::MyTeam().MemoryBarrier();
    2065             : #endif
    2066             :     }
    2067      886192 : }
    2068             : 
    2069             : template <class FAB>
    2070             : void
    2071      966153 : FabArray<FAB>::AllocFabs (const FabFactory<FAB>& factory, Arena* ar,
    2072             :                           const Vector<std::string>& tags, bool alloc_single_chunk)
    2073             : {
    2074      966153 :     if (shmem.alloc) { alloc_single_chunk = false; }
    2075             :     if constexpr (!IsBaseFab_v<FAB>) { alloc_single_chunk = false; }
    2076             : 
    2077      966153 :     const int n = indexArray.size();
    2078      966153 :     const int nworkers = ParallelDescriptor::TeamSize();
    2079      966153 :     shmem.alloc = (nworkers > 1);
    2080             : 
    2081      966153 :     bool alloc = !shmem.alloc;
    2082             : 
    2083      966153 :     FabInfo fab_info;
    2084      966153 :     fab_info.SetAlloc(alloc).SetShared(shmem.alloc).SetArena(ar);
    2085             : 
    2086      966153 :     if (alloc_single_chunk) {
    2087           0 :         m_single_chunk_size = 0L;
    2088           0 :         for (int i = 0; i < n; ++i) {
    2089           0 :             int K = indexArray[i];
    2090           0 :             const Box& tmpbox = fabbox(K);
    2091           0 :             m_single_chunk_size += factory.nBytes(tmpbox, n_comp, K);
    2092             :         }
    2093             :         AMREX_ASSERT(m_single_chunk_size >= 0); // 0 is okay.
    2094           0 :         m_single_chunk_arena = std::make_unique<detail::SingleChunkArena>(ar, m_single_chunk_size);
    2095           0 :         fab_info.SetArena(m_single_chunk_arena.get());
    2096             :     }
    2097             : 
    2098      966153 :     m_fabs_v.reserve(n);
    2099             : 
    2100      966153 :     Long nbytes = 0L;
    2101     4381165 :     for (int i = 0; i < n; ++i)
    2102             :     {
    2103     3415010 :         int K = indexArray[i];
    2104     3415010 :         const Box& tmpbox = fabbox(K);
    2105     3415010 :         m_fabs_v.push_back(factory.create(tmpbox, n_comp, fab_info, K));
    2106     3415010 :         nbytes += amrex::nBytesOwned(*m_fabs_v.back());
    2107             :     }
    2108             : 
    2109      966153 :     m_tags.clear();
    2110      966153 :     m_tags.emplace_back("All");
    2111      966153 :     for (auto const& t : m_region_tag) {
    2112           0 :         m_tags.push_back(t);
    2113             :     }
    2114      966153 :     for (auto const& t : tags) {
    2115           0 :         m_tags.push_back(t);
    2116             :     }
    2117     1932310 :     for (auto const& t: m_tags) {
    2118      966153 :         updateMemUsage(t, nbytes, ar);
    2119             :     }
    2120             : 
    2121             : #ifdef BL_USE_TEAM
    2122             :     if (shmem.alloc)
    2123             :     {
    2124             :         const int teamlead = ParallelDescriptor::MyTeamLead();
    2125             : 
    2126             :         shmem.n_values = 0;
    2127             :         shmem.n_points = 0;
    2128             :         Vector<Long> offset(n,0);
    2129             :         Vector<Long> nextoffset(nworkers,-1);
    2130             :         for (int i = 0; i < n; ++i) {
    2131             :             int K = indexArray[i];
    2132             :             int owner = distributionMap[K] - teamlead;
    2133             :             Long s = m_fabs_v[i]->size();
    2134             :             if (ownership[i]) {
    2135             :                 shmem.n_values += s;
    2136             :                 shmem.n_points += m_fabs_v[i]->numPts();
    2137             :             }
    2138             :             if (nextoffset[owner] < 0) {
    2139             :                 offset[i] = 0;
    2140             :                 nextoffset[owner] = s;
    2141             :             } else {
    2142             :                 offset[i] = nextoffset[owner];
    2143             :                 nextoffset[owner] += s;
    2144             :             }
    2145             :         }
    2146             : 
    2147             :         size_t bytes = shmem.n_values*sizeof(value_type);
    2148             : 
    2149             :         value_type *mfp;
    2150             :         Vector<value_type*> dps;
    2151             : 
    2152             : #if defined (BL_USE_MPI3)
    2153             : 
    2154             :         static MPI_Info info = MPI_INFO_NULL;
    2155             :         if (info == MPI_INFO_NULL) {
    2156             :             MPI_Info_create(&info);
    2157             :             MPI_Info_set(info, "alloc_shared_noncontig", "true");
    2158             :         }
    2159             : 
    2160             :         const MPI_Comm& team_comm = ParallelDescriptor::MyTeam().get();
    2161             : 
    2162             :         BL_MPI_REQUIRE( MPI_Win_allocate_shared(bytes, sizeof(value_type),
    2163             :                                                 info, team_comm, &mfp, &shmem.win) );
    2164             : 
    2165             :         for (int w = 0; w < nworkers; ++w) {
    2166             :             MPI_Aint sz;
    2167             :             int disp;
    2168             :             value_type *dptr = 0;
    2169             :             BL_MPI_REQUIRE( MPI_Win_shared_query(shmem.win, w, &sz, &disp, &dptr) );
    2170             :             // AMREX_ASSERT(disp == sizeof(value_type));
    2171             :             dps.push_back(dptr);
    2172             :         }
    2173             : 
    2174             : #else
    2175             : 
    2176             :         amrex::Abort("BaseFab::define: to allocate shared memory, USE_MPI3 must be true");
    2177             : 
    2178             : #endif
    2179             : 
    2180             :         for (int i = 0; i < n; ++i) {
    2181             :             int K = indexArray[i];
    2182             :             int owner = distributionMap[K] - teamlead;
    2183             :             value_type *p = dps[owner] + offset[i];
    2184             :             m_fabs_v[i]->setPtr(p, m_fabs_v[i]->size());
    2185             :         }
    2186             : 
    2187             :         for (Long i = 0; i < shmem.n_values; i++, mfp++) {
    2188             :             new (mfp) value_type;
    2189             :         }
    2190             : 
    2191             :         amrex::update_fab_stats(shmem.n_points, shmem.n_values, sizeof(value_type));
    2192             :     }
    2193             : #endif
    2194      966153 : }
    2195             : 
    2196             : template <class FAB>
    2197             : void
    2198             : FabArray<FAB>::setFab_assert (int K, FAB const& fab) const
    2199             : {
    2200             :     amrex::ignore_unused(K,fab);
    2201             :     AMREX_ASSERT(n_comp == fab.nComp());
    2202             :     AMREX_ASSERT(!boxarray.empty());
    2203             :     AMREX_ASSERT(fab.box() == fabbox(K));
    2204             :     AMREX_ASSERT(distributionMap[K] == ParallelDescriptor::MyProc());
    2205             :     AMREX_ASSERT(m_single_chunk_arena == nullptr);
    2206             : }
    2207             : 
    2208             : template <class FAB>
    2209             : void
    2210             : FabArray<FAB>::setFab (int boxno, std::unique_ptr<FAB> elem)
    2211             : {
    2212             :     if (n_comp == 0) {
    2213             :         n_comp = elem->nComp();
    2214             :     }
    2215             : 
    2216             :     setFab_assert(boxno, *elem);
    2217             : 
    2218             :     if (m_fabs_v.empty()) {
    2219             :         m_fabs_v.resize(indexArray.size(),nullptr);
    2220             :     }
    2221             : 
    2222             :     const int li = localindex(boxno);
    2223             :     if (m_fabs_v[li]) {
    2224             :         m_factory->destroy(m_fabs_v[li]);
    2225             :     }
    2226             :     m_fabs_v[li] = elem.release();
    2227             : }
    2228             : 
    2229             : template <class FAB>
    2230             : template <class F, std::enable_if_t<std::is_move_constructible_v<F>,int> >
    2231             : void
    2232             : FabArray<FAB>::setFab (int boxno, FAB&& elem)
    2233             : {
    2234             :     if (n_comp == 0) {
    2235             :         n_comp = elem.nComp();
    2236             :     }
    2237             : 
    2238             :     setFab_assert(boxno, elem);
    2239             : 
    2240             :     if (m_fabs_v.empty()) {
    2241             :         m_fabs_v.resize(indexArray.size(),nullptr);
    2242             :     }
    2243             : 
    2244             :     const int li = localindex(boxno);
    2245             :     if (m_fabs_v[li]) {
    2246             :         m_factory->destroy(m_fabs_v[li]);
    2247             :     }
    2248             :     m_fabs_v[li] = new FAB(std::move(elem));
    2249             : }
    2250             : 
    2251             : template <class FAB>
    2252             : void
    2253             : FabArray<FAB>::setFab (const MFIter& mfi, std::unique_ptr<FAB> elem)
    2254             : {
    2255             :     if (n_comp == 0) {
    2256             :         n_comp = elem->nComp();
    2257             :     }
    2258             : 
    2259             :     setFab_assert(mfi.index(), *elem);
    2260             : 
    2261             :     if (m_fabs_v.empty()) {
    2262             :         m_fabs_v.resize(indexArray.size(),nullptr);
    2263             :     }
    2264             : 
    2265             :     const int li = mfi.LocalIndex();
    2266             :     if (m_fabs_v[li]) {
    2267             :         m_factory->destroy(m_fabs_v[li]);
    2268             :     }
    2269             :     m_fabs_v[li] = elem.release();
    2270             : }
    2271             : 
    2272             : template <class FAB>
    2273             : template <class F, std::enable_if_t<std::is_move_constructible_v<F>,int> >
    2274             : void
    2275             : FabArray<FAB>::setFab (const MFIter& mfi, FAB&& elem)
    2276             : {
    2277             :     if (n_comp == 0) {
    2278             :         n_comp = elem.nComp();
    2279             :     }
    2280             : 
    2281             :     setFab_assert(mfi.index(), elem);
    2282             : 
    2283             :     if (m_fabs_v.empty()) {
    2284             :         m_fabs_v.resize(indexArray.size(),nullptr);
    2285             :     }
    2286             : 
    2287             :     const int li = mfi.LocalIndex();
    2288             :     if (m_fabs_v[li]) {
    2289             :         m_factory->destroy(m_fabs_v[li]);
    2290             :     }
    2291             :     m_fabs_v[li] = new FAB(std::move(elem));
    2292             : }
    2293             : 
    2294             : template <class FAB>
    2295             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    2296             : void
    2297             : FabArray<FAB>::setBndry (value_type val)
    2298             : {
    2299             :     setBndry(val, 0, n_comp);
    2300             : }
    2301             : 
    2302             : template <class FAB>
    2303             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
    2304             : void
    2305           0 : FabArray<FAB>::setBndry (value_type val,
    2306             :                          int        strt_comp,
    2307             :                          int        ncomp)
    2308             : {
    2309           0 :     if (n_grow.max() > 0)
    2310             :     {
    2311             : #ifdef AMREX_USE_GPU
    2312             :         if (Gpu::inLaunchRegion()) {
    2313             :             bool use_mfparfor = true;
    2314             :             const int nboxes = local_size();
    2315             :             if (nboxes == 1) {
    2316             :                 if (boxarray[indexArray[0]].numPts() > Long(65*65*65)) {
    2317             :                     use_mfparfor = false;
    2318             :                 }
    2319             :             } else {
    2320             :                 for (int i = 0; i < nboxes; ++i) {
    2321             :                     const Long npts = boxarray[indexArray[i]].numPts();
    2322             :                     if (npts >= Long(64*64*64)) {
    2323             :                         use_mfparfor = false;
    2324             :                         break;
    2325             :                     } else if (npts <= Long(17*17*17)) {
    2326             :                         break;
    2327             :                     }
    2328             :                 }
    2329             :             }
    2330             :             const IntVect nghost = n_grow;
    2331             :             if (use_mfparfor) {
    2332             :                 auto const& ma = this->arrays();
    2333             :                 ParallelFor(*this, nghost,
    2334             :                 [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
    2335             :                 {
    2336             :                     auto const& a = ma[box_no];
    2337             :                     Box vbx(a);
    2338             :                     vbx.grow(-nghost);
    2339             :                     if (!vbx.contains(i,j,k)) {
    2340             :                         for (int n = 0; n < ncomp; ++n) {
    2341             :                             a(i,j,k,strt_comp+n) = val;
    2342             :                         }
    2343             :                     }
    2344             :                 });
    2345             :                 Gpu::streamSynchronize();
    2346             :             } else {
    2347             :                 using Tag = Array4BoxTag<value_type>;
    2348             :                 Vector<Tag> tags;
    2349             :                 for (MFIter mfi(*this); mfi.isValid(); ++mfi) {
    2350             :                     Box const& vbx = mfi.validbox();
    2351             :                     auto const& a = this->array(mfi);
    2352             : 
    2353             :                     Box b;
    2354             : #if (AMREX_SPACEDIM == 3)
    2355             :                     if (nghost[2] > 0) {
    2356             :                         b = vbx;
    2357             :                         b.setRange(2, vbx.smallEnd(2)-nghost[2], nghost[2]);
    2358             :                         b.grow(IntVect(nghost[0],nghost[1],0));
    2359             :                         tags.emplace_back(Tag{a, b});
    2360             :                         b.shift(2, vbx.length(2)+nghost[2]);
    2361             :                         tags.emplace_back(Tag{a, b});
    2362             :                     }
    2363             : #endif
    2364             : #if (AMREX_SPACEDIM >= 2)
    2365             :                     if (nghost[1] > 0) {
    2366             :                         b = vbx;
    2367             :                         b.setRange(1, vbx.smallEnd(1)-nghost[1], nghost[1]);
    2368             :                         b.grow(0, nghost[0]);
    2369             :                         tags.emplace_back(Tag{a, b});
    2370             :                         b.shift(1, vbx.length(1)+nghost[1]);
    2371             :                         tags.emplace_back(Tag{a, b});
    2372             :                     }
    2373             : #endif
    2374             :                     if (nghost[0] > 0) {
    2375             :                         b = vbx;
    2376             :                         b.setRange(0, vbx.smallEnd(0)-nghost[0], nghost[0]);
    2377             :                         tags.emplace_back(Tag{a, b});
    2378             :                         b.shift(0, vbx.length(0)+nghost[0]);
    2379             :                         tags.emplace_back(Tag{a, b});
    2380             :                     }
    2381             :                 }
    2382             : 
    2383             :                 ParallelFor(tags, ncomp,
    2384             :                 [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, Tag const& tag) noexcept
    2385             :                 {
    2386             :                     tag.dfab(i,j,k,strt_comp+n) = val;
    2387             :                 });
    2388             :             }
    2389             :         } else
    2390             : #endif
    2391             :         {
    2392             : #ifdef AMREX_USE_OMP
    2393             : #pragma omp parallel
    2394             : #endif
    2395           0 :             for (MFIter fai(*this); fai.isValid(); ++fai)
    2396             :             {
    2397           0 :                 get(fai).template setComplement<RunOn::Host>(val, fai.validbox(), strt_comp, ncomp);
    2398             :             }
    2399             :         }
    2400             :     }
    2401           0 : }
    2402             : 
    2403             : template <class FAB>
    2404             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    2405             : void
    2406      213210 : FabArray<FAB>::setDomainBndry (value_type val, const Geometry& geom)
    2407             : {
    2408      213210 :     setDomainBndry(val, 0, n_comp, geom);
    2409      213210 : }
    2410             : 
    2411             : template <class FAB>
    2412             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    2413             : void
    2414      213210 : FabArray<FAB>::setDomainBndry (value_type val,
    2415             :                                int        strt_comp,
    2416             :                                int        ncomp,
    2417             :                                const Geometry& geom)
    2418             : {
    2419             :     BL_PROFILE("FabArray::setDomainBndry()");
    2420             : 
    2421      213210 :     Box domain_box = amrex::convert(geom.Domain(), boxArray().ixType());
    2422      639630 :     for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
    2423      426420 :         if (geom.isPeriodic(idim)) {
    2424      184585 :             int n = domain_box.length(idim);
    2425      184585 :             domain_box.grow(idim, n);
    2426             :         }
    2427             :     }
    2428             : 
    2429             : #ifdef AMREX_USE_OMP
    2430             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2431             : #endif
    2432     1376310 :     for (MFIter fai(*this); fai.isValid(); ++fai)
    2433             :     {
    2434     1163100 :         const Box& gbx = fai.fabbox();
    2435     1163100 :         if (! domain_box.contains(gbx))
    2436             :         {
    2437      345562 :             get(fai).template setComplement<RunOn::Device>(val, domain_box, strt_comp, ncomp);
    2438             :         }
    2439             :     }
    2440      213210 : }
    2441             : 
    2442             : template <class FAB>
    2443             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
    2444             : typename F::value_type
    2445             : FabArray<FAB>::sum (int comp, IntVect const& nghost, bool local) const
    2446             : {
    2447             :     BL_PROFILE("FabArray::sum()");
    2448             : 
    2449             :     using T = typename FAB::value_type;
    2450             :     auto sm = T(0.0);
    2451             : #ifdef AMREX_USE_GPU
    2452             :     if (Gpu::inLaunchRegion()) {
    2453             :         auto const& ma = this->const_arrays();
    2454             :         sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, *this, nghost,
    2455             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
    2456             :                        -> GpuTuple<T>
    2457             :         {
    2458             :             return ma[box_no](i,j,k,comp);
    2459             :         });
    2460             :     } else
    2461             : #endif
    2462             :     {
    2463             : #ifdef AMREX_USE_OMP
    2464             : #pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
    2465             : #endif
    2466             :         for (MFIter mfi(*this,true); mfi.isValid(); ++mfi)
    2467             :         {
    2468             :             Box const& bx = mfi.growntilebox(nghost);
    2469             :             auto const& a = this->const_array(mfi);
    2470             :             auto tmp = T(0.0);
    2471             :             AMREX_LOOP_3D(bx, i, j, k,
    2472             :             {
    2473             :                 tmp += a(i,j,k,comp);
    2474             :             });
    2475             :             sm += tmp; // Do it this way so that it does not break regression tests.
    2476             :         }
    2477             :     }
    2478             : 
    2479             :     if (!local) {
    2480             :         ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
    2481             :     }
    2482             : 
    2483             :     return sm;
    2484             : }
    2485             : 
    2486             : template <class FAB>
    2487             : void
    2488             : FabArray<FAB>::copyTo (FAB& dest, int  nghost) const
    2489             : {
    2490             :     copyTo(dest, 0, 0, dest.nComp(), nghost);
    2491             : }
    2492             : 
    2493             : template <class FAB>
    2494             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    2495             : void
    2496       23978 : FabArray<FAB>::setVal (value_type val)
    2497             : {
    2498       23978 :     setVal(val,0,n_comp,n_grow);
    2499       23978 : }
    2500             : 
    2501             : template <class FAB>
    2502             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    2503             : FabArray<FAB>&
    2504             : FabArray<FAB>::operator= (value_type val)
    2505             : {
    2506             :     setVal(val);
    2507             :     return *this;
    2508             : }
    2509             : 
    2510             : template <class FAB>
    2511             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    2512             : void
    2513           0 : FabArray<FAB>::setVal (value_type val,
    2514             :                        int        comp,
    2515             :                        int        ncomp,
    2516             :                        int        nghost)
    2517             : {
    2518           0 :     setVal(val,comp,ncomp,IntVect(nghost));
    2519           0 : }
    2520             : 
    2521             : template <class FAB>
    2522             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
    2523             : void
    2524       23978 : FabArray<FAB>::setVal (value_type val,
    2525             :                        int        comp,
    2526             :                        int        ncomp,
    2527             :                        const IntVect& nghost)
    2528             : {
    2529             :     AMREX_ASSERT(nghost.allGE(0) && nghost.allLE(n_grow));
    2530       23978 :     AMREX_ALWAYS_ASSERT(comp+ncomp <= n_comp);
    2531             : 
    2532             :     BL_PROFILE("FabArray::setVal()");
    2533             : 
    2534             : #ifdef AMREX_USE_GPU
    2535             :     if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
    2536             :         auto const& fa = this->arrays();
    2537             :         ParallelFor(*this, nghost, ncomp,
    2538             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    2539             :         {
    2540             :             fa[box_no](i,j,k,n+comp) = val;
    2541             :         });
    2542             :         if (!Gpu::inNoSyncRegion()) {
    2543             :             Gpu::streamSynchronize();
    2544             :         }
    2545             :     } else
    2546             : #endif
    2547             :     {
    2548             : #ifdef AMREX_USE_OMP
    2549             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2550             : #endif
    2551       81076 :         for (MFIter fai(*this,TilingIfNotGPU()); fai.isValid(); ++fai)
    2552             :         {
    2553       57098 :             const Box& bx = fai.growntilebox(nghost);
    2554       57098 :             auto fab = this->array(fai);
    2555    27835037 :             AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
    2556             :             {
    2557             :                 fab(i,j,k,n+comp) = val;
    2558             :             });
    2559             :         }
    2560             :     }
    2561       23978 : }
    2562             : 
    2563             : template <class FAB>
    2564             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    2565             : void
    2566             : FabArray<FAB>::setVal (value_type val,
    2567             :                        const Box& region,
    2568             :                        int        comp,
    2569             :                        int        ncomp,
    2570             :                        int        nghost)
    2571             : {
    2572             :     setVal(val,region,comp,ncomp,IntVect(nghost));
    2573             : }
    2574             : 
    2575             : template <class FAB>
    2576             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
    2577             : void
    2578             : FabArray<FAB>::setVal (value_type val,
    2579             :                        const Box& region,
    2580             :                        int        comp,
    2581             :                        int        ncomp,
    2582             :                        const IntVect& nghost)
    2583             : {
    2584             :     AMREX_ASSERT(nghost.allGE(0) && nghost.allLE(n_grow));
    2585             :     AMREX_ALWAYS_ASSERT(comp+ncomp <= n_comp);
    2586             : 
    2587             :     BL_PROFILE("FabArray::setVal(val,region,comp,ncomp,nghost)");
    2588             : 
    2589             : #ifdef AMREX_USE_GPU
    2590             :     if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
    2591             :         auto const& fa = this->arrays();
    2592             :         ParallelFor(*this, nghost, ncomp,
    2593             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    2594             :         {
    2595             :             if (region.contains(i,j,k)) {
    2596             :                 fa[box_no](i,j,k,n+comp) = val;
    2597             :             }
    2598             :         });
    2599             :         if (!Gpu::inNoSyncRegion()) {
    2600             :             Gpu::streamSynchronize();
    2601             :         }
    2602             :     } else
    2603             : #endif
    2604             :     {
    2605             : #ifdef AMREX_USE_OMP
    2606             :         AMREX_ALWAYS_ASSERT(!omp_in_parallel());
    2607             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2608             : #endif
    2609             :         for (MFIter fai(*this,TilingIfNotGPU()); fai.isValid(); ++fai)
    2610             :         {
    2611             :             Box b = fai.growntilebox(nghost) & region;
    2612             : 
    2613             :             if (b.ok()) {
    2614             :                 auto fab = this->array(fai);
    2615             :                 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( b, ncomp, i, j, k, n,
    2616             :                 {
    2617             :                     fab(i,j,k,n+comp) = val;
    2618             :                 });
    2619             :             }
    2620             :         }
    2621             :     }
    2622             : }
    2623             : 
    2624             : template <class FAB>
    2625             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    2626             : void
    2627             : FabArray<FAB>::abs (int comp, int ncomp, int nghost)
    2628             : {
    2629             :     abs(comp, ncomp, IntVect(nghost));
    2630             : }
    2631             : 
    2632             : template <class FAB>
    2633             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
    2634             : void
    2635             : FabArray<FAB>::abs (int comp, int ncomp, const IntVect& nghost)
    2636             : {
    2637             :     AMREX_ASSERT(nghost.allGE(0) && nghost.allLE(n_grow));
    2638             :     AMREX_ALWAYS_ASSERT(comp+ncomp <= n_comp);
    2639             :     BL_PROFILE("FabArray::abs()");
    2640             : 
    2641             : #ifdef AMREX_USE_GPU
    2642             :     if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
    2643             :         auto const& fa = this->arrays();
    2644             :         ParallelFor(*this, nghost, ncomp,
    2645             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    2646             :         {
    2647             :             fa[box_no](i,j,k,n+comp) = std::abs(fa[box_no](i,j,k,n+comp));
    2648             :         });
    2649             :         if (!Gpu::inNoSyncRegion()) {
    2650             :             Gpu::streamSynchronize();
    2651             :         }
    2652             :     } else
    2653             : #endif
    2654             :     {
    2655             : #ifdef AMREX_USE_OMP
    2656             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2657             : #endif
    2658             :         for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
    2659             :         {
    2660             :             const Box& bx = mfi.growntilebox(nghost);
    2661             :             auto fab = this->array(mfi);
    2662             :             AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
    2663             :             {
    2664             :                 fab(i,j,k,n+comp) = std::abs(fab(i,j,k,n+comp));
    2665             :             });
    2666             :         }
    2667             :     }
    2668             : }
    2669             : 
    2670             : template <class FAB>
    2671             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
    2672             : void
    2673             : FabArray<FAB>::plus (value_type val, int comp, int num_comp, int nghost)
    2674             : {
    2675             :     BL_PROFILE("FabArray::plus()");
    2676             : 
    2677             : #ifdef AMREX_USE_GPU
    2678             :     if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
    2679             :         auto const& fa = this->arrays();
    2680             :         ParallelFor(*this, IntVect(nghost), num_comp,
    2681             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    2682             :         {
    2683             :             fa[box_no](i,j,k,n+comp) += val;
    2684             :         });
    2685             :         if (!Gpu::inNoSyncRegion()) {
    2686             :             Gpu::streamSynchronize();
    2687             :         }
    2688             :     } else
    2689             : #endif
    2690             :     {
    2691             : #ifdef AMREX_USE_OMP
    2692             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2693             : #endif
    2694             :         for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
    2695             :         {
    2696             :             const Box& bx = mfi.growntilebox(nghost);
    2697             :             auto fab = this->array(mfi);
    2698             :             AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
    2699             :             {
    2700             :                 fab(i,j,k,n+comp) += val;
    2701             :             });
    2702             :         }
    2703             :     }
    2704             : }
    2705             : 
    2706             : template <class FAB>
    2707             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
    2708             : void
    2709             : FabArray<FAB>::plus (value_type val, const Box& region, int comp, int num_comp, int nghost)
    2710             : {
    2711             :     BL_PROFILE("FabArray::plus(val, region, comp, num_comp, nghost)");
    2712             : 
    2713             : #ifdef AMREX_USE_GPU
    2714             :     if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
    2715             :         auto const& fa = this->arrays();
    2716             :         ParallelFor(*this, IntVect(nghost), num_comp,
    2717             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    2718             :         {
    2719             :             if (region.contains(i,j,k)) {
    2720             :                 fa[box_no](i,j,k,n+comp) += val;
    2721             :             }
    2722             :         });
    2723             :         if (!Gpu::inNoSyncRegion()) {
    2724             :             Gpu::streamSynchronize();
    2725             :         }
    2726             :     } else
    2727             : #endif
    2728             :     {
    2729             : #ifdef AMREX_USE_OMP
    2730             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2731             : #endif
    2732             :         for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
    2733             :         {
    2734             :             const Box& bx = mfi.growntilebox(nghost) & region;
    2735             :             if (bx.ok()) {
    2736             :                 auto fab = this->array(mfi);
    2737             :                 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
    2738             :                 {
    2739             :                     fab(i,j,k,n+comp) += val;
    2740             :                 });
    2741             :             }
    2742             :         }
    2743             :     }
    2744             : }
    2745             : 
    2746             : template <class FAB>
    2747             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
    2748             : void
    2749             : FabArray<FAB>::mult (value_type val, int comp, int num_comp, int nghost)
    2750             : {
    2751             :     BL_PROFILE("FabArray::mult()");
    2752             : 
    2753             : #ifdef AMREX_USE_GPU
    2754             :     if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
    2755             :         auto const& fa = this->arrays();
    2756             :         ParallelFor(*this, IntVect(nghost), num_comp,
    2757             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    2758             :         {
    2759             :             fa[box_no](i,j,k,n+comp) *= val;
    2760             :         });
    2761             :         if (!Gpu::inNoSyncRegion()) {
    2762             :             Gpu::streamSynchronize();
    2763             :         }
    2764             :     } else
    2765             : #endif
    2766             :     {
    2767             : #ifdef AMREX_USE_OMP
    2768             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2769             : #endif
    2770             :         for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
    2771             :         {
    2772             :             const Box& bx = mfi.growntilebox(nghost);
    2773             :             auto fab = this->array(mfi);
    2774             :             AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
    2775             :             {
    2776             :                 fab(i,j,k,n+comp) *= val;
    2777             :             });
    2778             :         }
    2779             :     }
    2780             : }
    2781             : 
    2782             : template <class FAB>
    2783             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
    2784             : void
    2785             : FabArray<FAB>::mult (value_type val, const Box& region, int comp, int num_comp, int nghost)
    2786             : {
    2787             :     BL_PROFILE("FabArray::mult(val, region, comp, num_comp, nghost)");
    2788             : 
    2789             : #ifdef AMREX_USE_GPU
    2790             :     if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
    2791             :         auto const& fa = this->arrays();
    2792             :         ParallelFor(*this, IntVect(nghost), num_comp,
    2793             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    2794             :         {
    2795             :             if (region.contains(i,j,k)) {
    2796             :                 fa[box_no](i,j,k,n+comp) *= val;
    2797             :             }
    2798             :         });
    2799             :         if (!Gpu::inNoSyncRegion()) {
    2800             :             Gpu::streamSynchronize();
    2801             :         }
    2802             :     } else
    2803             : #endif
    2804             :     {
    2805             : #ifdef AMREX_USE_OMP
    2806             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2807             : #endif
    2808             :         for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
    2809             :         {
    2810             :             const Box& bx = mfi.growntilebox(nghost) & region;
    2811             :             if (bx.ok()) {
    2812             :                 auto fab = this->array(mfi);
    2813             :                 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
    2814             :                 {
    2815             :                     fab(i,j,k,n+comp) *= val;
    2816             :                 });
    2817             :             }
    2818             :         }
    2819             :     }
    2820             : }
    2821             : 
    2822             : template <class FAB>
    2823             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
    2824             : void
    2825             : FabArray<FAB>::invert (value_type numerator, int comp, int num_comp, int nghost)
    2826             : {
    2827             :     BL_PROFILE("FabArray::invert()");
    2828             : 
    2829             : #ifdef AMREX_USE_GPU
    2830             :     if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
    2831             :         auto const& fa = this->arrays();
    2832             :         ParallelFor(*this, IntVect(nghost), num_comp,
    2833             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    2834             :         {
    2835             :             fa[box_no](i,j,k,n+comp) = numerator / fa[box_no](i,j,k,n+comp);
    2836             :         });
    2837             :         if (!Gpu::inNoSyncRegion()) {
    2838             :             Gpu::streamSynchronize();
    2839             :         }
    2840             :     } else
    2841             : #endif
    2842             :     {
    2843             : #ifdef AMREX_USE_OMP
    2844             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2845             : #endif
    2846             :         for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
    2847             :         {
    2848             :             const Box& bx = mfi.growntilebox(nghost);
    2849             :             auto fab = this->array(mfi);
    2850             :             AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
    2851             :             {
    2852             :                 fab(i,j,k,n+comp) = numerator / fab(i,j,k,n+comp);
    2853             :             });
    2854             :         }
    2855             :     }
    2856             : }
    2857             : 
    2858             : template <class FAB>
    2859             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
    2860             : void
    2861             : FabArray<FAB>::invert (value_type numerator, const Box& region, int comp, int num_comp, int nghost)
    2862             : {
    2863             :     BL_PROFILE("FabArray::invert(numerator, region, comp, num_comp, nghost)");
    2864             : 
    2865             : #ifdef AMREX_USE_GPU
    2866             :     if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
    2867             :         auto const& fa = this->arrays();
    2868             :         ParallelFor(*this, IntVect(nghost), num_comp,
    2869             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    2870             :         {
    2871             :             if (region.contains(i,j,k)) {
    2872             :                 fa[box_no](i,j,k,n+comp) = numerator / fa[box_no](i,j,k,n+comp);
    2873             :             }
    2874             :         });
    2875             :         if (!Gpu::inNoSyncRegion()) {
    2876             :             Gpu::streamSynchronize();
    2877             :         }
    2878             :     } else
    2879             : #endif
    2880             :     {
    2881             : #ifdef AMREX_USE_OMP
    2882             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2883             : #endif
    2884             :         for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
    2885             :         {
    2886             :             const Box& bx = mfi.growntilebox(nghost) & region;
    2887             :             if (bx.ok()) {
    2888             :                 auto fab = this->array(mfi);
    2889             :                 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
    2890             :                 {
    2891             :                     fab(i,j,k,n+comp) = numerator / fab(i,j,k,n+comp);
    2892             :                 });
    2893             :             }
    2894             :         }
    2895             :     }
    2896             : }
    2897             : 
    2898             : template <class FAB>
    2899             : void
    2900             : FabArray<FAB>::shift (const IntVect& v)
    2901             : {
    2902             :     clearThisBD();  // The new boxarray will have a different ID.
    2903             :     boxarray.shift(v);
    2904             :     addThisBD();
    2905             : #ifdef AMREX_USE_OMP
    2906             : #pragma omp parallel
    2907             : #endif
    2908             :     for (MFIter fai(*this); fai.isValid(); ++fai)
    2909             :     {
    2910             :         get(fai).shift(v);
    2911             :     }
    2912             :     clear_arrays();
    2913             : }
    2914             : 
    2915             : template <class FAB>
    2916             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
    2917             : void FabArray<FAB>::Saxpy (FabArray<FAB>& y, value_type a, FabArray<FAB> const& x,
    2918             :                            int xcomp, int ycomp, int ncomp, IntVect const& nghost)
    2919             : {
    2920             :     AMREX_ASSERT(y.boxArray() == x.boxArray());
    2921             :     AMREX_ASSERT(y.distributionMap == x.distributionMap);
    2922             :     AMREX_ASSERT(y.nGrowVect().allGE(nghost) && x.nGrowVect().allGE(nghost));
    2923             : 
    2924             :     BL_PROFILE("FabArray::Saxpy()");
    2925             : 
    2926             : #ifdef AMREX_USE_GPU
    2927             :     if (Gpu::inLaunchRegion() && y.isFusingCandidate()) {
    2928             :         auto const& yma = y.arrays();
    2929             :         auto const& xma = x.const_arrays();
    2930             :         ParallelFor(y, nghost, ncomp,
    2931             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    2932             :         {
    2933             :             yma[box_no](i,j,k,ycomp+n) += a * xma[box_no](i,j,k,xcomp+n);
    2934             :         });
    2935             :         if (!Gpu::inNoSyncRegion()) {
    2936             :             Gpu::streamSynchronize();
    2937             :         }
    2938             :     } else
    2939             : #endif
    2940             :     {
    2941             : #ifdef AMREX_USE_OMP
    2942             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2943             : #endif
    2944             :         for (MFIter mfi(y,TilingIfNotGPU()); mfi.isValid(); ++mfi)
    2945             :         {
    2946             :             const Box& bx = mfi.growntilebox(nghost);
    2947             : 
    2948             :             if (bx.ok()) {
    2949             :                 auto const& xfab = x.const_array(mfi);
    2950             :                 auto const& yfab = y.array(mfi);
    2951             :                 AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
    2952             :                 {
    2953             :                     yfab(i,j,k,ycomp+n) += a * xfab(i,j,k,xcomp+n);
    2954             :                 });
    2955             :             }
    2956             :         }
    2957             :     }
    2958             : }
    2959             : 
    2960             : template <class FAB>
    2961             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
    2962             : void
    2963             : FabArray<FAB>::Xpay (FabArray<FAB>& y, value_type a, FabArray<FAB> const& x,
    2964             :                      int xcomp, int ycomp, int ncomp, IntVect const& nghost)
    2965             : {
    2966             :     AMREX_ASSERT(y.boxArray() == x.boxArray());
    2967             :     AMREX_ASSERT(y.distributionMap == x.distributionMap);
    2968             :     AMREX_ASSERT(y.nGrowVect().allGE(nghost) && x.nGrowVect().allGE(nghost));
    2969             : 
    2970             :     BL_PROFILE("FabArray::Xpay()");
    2971             : 
    2972             : #ifdef AMREX_USE_GPU
    2973             :     if (Gpu::inLaunchRegion() && y.isFusingCandidate()) {
    2974             :         auto const& yfa = y.arrays();
    2975             :         auto const& xfa = x.const_arrays();
    2976             :         ParallelFor(y, nghost, ncomp,
    2977             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    2978             :         {
    2979             :             yfa[box_no](i,j,k,n+ycomp) = xfa[box_no](i,j,k,n+xcomp)
    2980             :                 +                    a * yfa[box_no](i,j,k,n+ycomp);
    2981             :         });
    2982             :         if (!Gpu::inNoSyncRegion()) {
    2983             :             Gpu::streamSynchronize();
    2984             :         }
    2985             :     } else
    2986             : #endif
    2987             :     {
    2988             : #ifdef AMREX_USE_OMP
    2989             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    2990             : #endif
    2991             :         for (MFIter mfi(y,TilingIfNotGPU()); mfi.isValid(); ++mfi)
    2992             :         {
    2993             :             const Box& bx = mfi.growntilebox(nghost);
    2994             :             auto const& xFab = x.const_array(mfi);
    2995             :             auto const& yFab = y.array(mfi);
    2996             :             AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
    2997             :             {
    2998             :                 yFab(i,j,k,n+ycomp) = xFab(i,j,k,n+xcomp)
    2999             :                     +             a * yFab(i,j,k,n+ycomp);
    3000             :             });
    3001             :         }
    3002             :     }
    3003             : }
    3004             : 
    3005             : template <class FAB>
    3006             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
    3007             : void
    3008             : FabArray<FAB>::LinComb (FabArray<FAB>& dst,
    3009             :                         value_type a, const FabArray<FAB>& x, int xcomp,
    3010             :                         value_type b, const FabArray<FAB>& y, int ycomp,
    3011             :                         int dstcomp, int numcomp, const IntVect& nghost)
    3012             : {
    3013             :     AMREX_ASSERT(dst.boxArray() == x.boxArray());
    3014             :     AMREX_ASSERT(dst.distributionMap == x.distributionMap);
    3015             :     AMREX_ASSERT(dst.boxArray() == y.boxArray());
    3016             :     AMREX_ASSERT(dst.distributionMap == y.distributionMap);
    3017             :     AMREX_ASSERT(dst.nGrowVect().allGE(nghost) && x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
    3018             : 
    3019             :     BL_PROFILE("FabArray::LinComb()");
    3020             : 
    3021             : #ifdef AMREX_USE_GPU
    3022             :     if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
    3023             :         auto const& dstma = dst.arrays();
    3024             :         auto const& xma = x.const_arrays();
    3025             :         auto const& yma = y.const_arrays();
    3026             :         ParallelFor(dst, nghost, numcomp,
    3027             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    3028             :         {
    3029             :             dstma[box_no](i,j,k,dstcomp+n) = a*xma[box_no](i,j,k,xcomp+n)
    3030             :                 +                            b*yma[box_no](i,j,k,ycomp+n);
    3031             :         });
    3032             :         if (!Gpu::inNoSyncRegion()) {
    3033             :             Gpu::streamSynchronize();
    3034             :         }
    3035             :     } else
    3036             : #endif
    3037             :     {
    3038             : #ifdef AMREX_USE_OMP
    3039             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    3040             : #endif
    3041             :         for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
    3042             :         {
    3043             :             const Box& bx = mfi.growntilebox(nghost);
    3044             :             auto const& xfab = x.const_array(mfi);
    3045             :             auto const& yfab = y.const_array(mfi);
    3046             :             auto const& dfab = dst.array(mfi);
    3047             :             AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
    3048             :             {
    3049             :                 dfab(i,j,k,dstcomp+n) = a*xfab(i,j,k,xcomp+n) + b*yfab(i,j,k,ycomp+n);
    3050             :             });
    3051             :         }
    3052             :     }
    3053             : }
    3054             : 
    3055             : template <class FAB>
    3056             : template <typename BUF>
    3057             : void
    3058      216055 : FabArray<FAB>::FillBoundary (bool cross)
    3059             : {
    3060             :     BL_PROFILE("FabArray::FillBoundary()");
    3061      432110 :     if ( n_grow.max() > 0 ) {
    3062      216055 :         FillBoundary_nowait<BUF>(0, nComp(), n_grow, Periodicity::NonPeriodic(), cross);
    3063      216055 :         FillBoundary_finish<BUF>();
    3064             :     }
    3065      216055 : }
    3066             : 
    3067             : template <class FAB>
    3068             : template <typename BUF>
    3069             : void
    3070      433151 : FabArray<FAB>::FillBoundary (const Periodicity& period, bool cross)
    3071             : {
    3072             :     BL_PROFILE("FabArray::FillBoundary()");
    3073      866302 :     if ( n_grow.max() > 0 ) {
    3074      219941 :         FillBoundary_nowait<BUF>(0, nComp(), n_grow, period, cross);
    3075      219941 :         FillBoundary_finish<BUF>();
    3076             :     }
    3077      433151 : }
    3078             : 
    3079             : template <class FAB>
    3080             : template <typename BUF>
    3081             : void
    3082             : FabArray<FAB>::FillBoundary (const IntVect& nghost, const Periodicity& period, bool cross)
    3083             : {
    3084             :     BL_PROFILE("FabArray::FillBoundary()");
    3085             :     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(nghost.allLE(nGrowVect()),
    3086             :                                      "FillBoundary: asked to fill more ghost cells than we have");
    3087             :     if ( nghost.max() > 0 ) {
    3088             :         FillBoundary_nowait<BUF>(0, nComp(), nghost, period, cross);
    3089             :         FillBoundary_finish<BUF>();
    3090             :     }
    3091             : }
    3092             : 
    3093             : template <class FAB>
    3094             : template <typename BUF>
    3095             : void
    3096             : FabArray<FAB>::FillBoundary (int scomp, int ncomp, bool cross)
    3097             : {
    3098             :     BL_PROFILE("FabArray::FillBoundary()");
    3099             :     if ( n_grow.max() > 0 ) {
    3100             :         FillBoundary_nowait<BUF>(scomp, ncomp, n_grow, Periodicity::NonPeriodic(), cross);
    3101             :         FillBoundary_finish<BUF>();
    3102             :     }
    3103             : }
    3104             : 
    3105             : template <class FAB>
    3106             : template <typename BUF>
    3107             : void
    3108             : FabArray<FAB>::FillBoundary (int scomp, int ncomp, const Periodicity& period, bool cross)
    3109             : {
    3110             :     BL_PROFILE("FabArray::FillBoundary()");
    3111             :     if ( n_grow.max() > 0 ) {
    3112             :         FillBoundary_nowait<BUF>(scomp, ncomp, n_grow, period, cross);
    3113             :         FillBoundary_finish<BUF>();
    3114             :     }
    3115             : }
    3116             : 
    3117             : template <class FAB>
    3118             : template <typename BUF>
    3119             : void
    3120      216914 : FabArray<FAB>::FillBoundary (int scomp, int ncomp, const IntVect& nghost,
    3121             :                              const Periodicity& period, bool cross)
    3122             : {
    3123             :     BL_PROFILE("FabArray::FillBoundary()");
    3124      433828 :     AMREX_ALWAYS_ASSERT_WITH_MESSAGE(nghost.allLE(nGrowVect()),
    3125             :                                      "FillBoundary: asked to fill more ghost cells than we have");
    3126      216914 :     if ( nghost.max() > 0 ) {
    3127      216914 :         FillBoundary_nowait<BUF>(scomp, ncomp, nghost, period, cross);
    3128      216914 :         FillBoundary_finish<BUF>();
    3129             :     }
    3130      216914 : }
    3131             : 
    3132             : template <class FAB>
    3133             : template <typename BUF>
    3134             : void
    3135             : FabArray<FAB>::FillBoundary_nowait (bool cross)
    3136             : {
    3137             :     FillBoundary_nowait<BUF>(0, nComp(), nGrowVect(), Periodicity::NonPeriodic(), cross);
    3138             : }
    3139             : 
    3140             : template <class FAB>
    3141             : template <typename BUF>
    3142             : void
    3143             : FabArray<FAB>::FillBoundary_nowait (const Periodicity& period, bool cross)
    3144             : {
    3145             :     FillBoundary_nowait<BUF>(0, nComp(), nGrowVect(), period, cross);
    3146             : }
    3147             : 
    3148             : template <class FAB>
    3149             : template <typename BUF>
    3150             : void
    3151             : FabArray<FAB>::FillBoundary_nowait (const IntVect& nghost, const Periodicity& period, bool cross)
    3152             : {
    3153             :     FillBoundary_nowait<BUF>(0, nComp(), nghost, period, cross);
    3154             : }
    3155             : 
    3156             : template <class FAB>
    3157             : template <typename BUF>
    3158             : void
    3159             : FabArray<FAB>::FillBoundary_nowait (int scomp, int ncomp, bool cross)
    3160             : {
    3161             :     FillBoundary_nowait<BUF>(scomp, ncomp, nGrowVect(), Periodicity::NonPeriodic(), cross);
    3162             : }
    3163             : 
    3164             : template <class FAB>
    3165             : void
    3166             : FabArray<FAB>::FillBoundaryAndSync (const Periodicity& period)
    3167             : {
    3168             :     BL_PROFILE("FabArray::FillBoundaryAndSync()");
    3169             :     if (n_grow.max() > 0 || !is_cell_centered()) {
    3170             :         FillBoundaryAndSync_nowait(0, nComp(), n_grow, period);
    3171             :         FillBoundaryAndSync_finish();
    3172             :     }
    3173             : }
    3174             : 
    3175             : template <class FAB>
    3176             : void
    3177             : FabArray<FAB>::FillBoundaryAndSync (int scomp, int ncomp, const IntVect& nghost,
    3178             :                                     const Periodicity& period)
    3179             : {
    3180             :     BL_PROFILE("FabArray::FillBoundaryAndSync()");
    3181             :     if (nghost.max() > 0 || !is_cell_centered()) {
    3182             :         FillBoundaryAndSync_nowait(scomp, ncomp, nghost, period);
    3183             :         FillBoundaryAndSync_finish();
    3184             :     }
    3185             : }
    3186             : 
    3187             : template <class FAB>
    3188             : void
    3189             : FabArray<FAB>::FillBoundaryAndSync_nowait (const Periodicity& period)
    3190             : {
    3191             :     FillBoundaryAndSync_nowait(0, nComp(), nGrowVect(), period);
    3192             : }
    3193             : 
    3194             : template <class FAB>
    3195             : void
    3196             : FabArray<FAB>::FillBoundaryAndSync_nowait (int scomp, int ncomp, const IntVect& nghost,
    3197             :                                            const Periodicity& period)
    3198             : {
    3199             :     BL_PROFILE("FillBoundaryAndSync_nowait()");
    3200             :     FBEP_nowait(scomp, ncomp, nghost, period, false, false, true);
    3201             : }
    3202             : 
    3203             : template <class FAB>
    3204             : void
    3205             : FabArray<FAB>::FillBoundaryAndSync_finish ()
    3206             : {
    3207             :     BL_PROFILE("FillBoundaryAndSync_finish()");
    3208             :     FillBoundary_finish();
    3209             : }
    3210             : 
    3211             : template <class FAB>
    3212             : void
    3213             : FabArray<FAB>::OverrideSync (const Periodicity& period)
    3214             : {
    3215             :     BL_PROFILE("FAbArray::OverrideSync()");
    3216             :     if (!is_cell_centered()) {
    3217             :         OverrideSync_nowait(0, nComp(), period);
    3218             :         OverrideSync_finish();
    3219             :     }
    3220             : }
    3221             : 
    3222             : template <class FAB>
    3223             : void
    3224             : FabArray<FAB>::OverrideSync (int scomp, int ncomp, const Periodicity& period)
    3225             : {
    3226             :     BL_PROFILE("FAbArray::OverrideSync()");
    3227             :     if (!is_cell_centered()) {
    3228             :         OverrideSync_nowait(scomp, ncomp, period);
    3229             :         OverrideSync_finish();
    3230             :     }
    3231             : }
    3232             : 
    3233             : template <class FAB>
    3234             : void
    3235             : FabArray<FAB>::OverrideSync_nowait (const Periodicity& period)
    3236             : {
    3237             :     OverrideSync_nowait(0, nComp(), period);
    3238             : }
    3239             : 
    3240             : template <class FAB>
    3241             : void
    3242             : FabArray<FAB>::OverrideSync_nowait (int scomp, int ncomp, const Periodicity& period)
    3243             : {
    3244             :     BL_PROFILE("OverrideSync_nowait()");
    3245             :     FBEP_nowait(scomp, ncomp, IntVect(0), period, false, false, true);
    3246             : }
    3247             : 
    3248             : template <class FAB>
    3249             : void
    3250             : FabArray<FAB>::OverrideSync_finish ()
    3251             : {
    3252             :     BL_PROFILE("OverrideSync_finish()");
    3253             :     FillBoundary_finish();
    3254             : }
    3255             : 
    3256             : template <class FAB>
    3257             : void
    3258             : FabArray<FAB>::SumBoundary (const Periodicity& period)
    3259             : {
    3260             :     SumBoundary(0, n_comp, IntVect(0), period);
    3261             : }
    3262             : 
    3263             : template <class FAB>
    3264             : void
    3265             : FabArray<FAB>::SumBoundary (int scomp, int ncomp, const Periodicity& period)
    3266             : {
    3267             :     SumBoundary(scomp, ncomp, IntVect(0), period);
    3268             : }
    3269             : 
    3270             : template <class FAB>
    3271             : void
    3272             : FabArray<FAB>::SumBoundary (int scomp, int ncomp, IntVect const& nghost, const Periodicity& period)
    3273             : {
    3274             :     SumBoundary(scomp, ncomp, this->nGrowVect(), nghost, period);
    3275             : }
    3276             : 
    3277             : template <class FAB>
    3278             : void
    3279             : FabArray<FAB>::SumBoundary (int scomp, int ncomp, IntVect const& src_nghost, IntVect const& dst_nghost, const Periodicity& period)
    3280             : {
    3281             :     BL_PROFILE("FabArray<FAB>::SumBoundary()");
    3282             : 
    3283             :     SumBoundary_nowait(scomp, ncomp, src_nghost, dst_nghost, period);
    3284             :     SumBoundary_finish();
    3285             : }
    3286             : 
    3287             : template <class FAB>
    3288             : void
    3289             : FabArray<FAB>::SumBoundary_nowait (const Periodicity& period)
    3290             : {
    3291             :     SumBoundary_nowait(0, n_comp, IntVect(0), period);
    3292             : }
    3293             : 
    3294             : template <class FAB>
    3295             : void
    3296             : FabArray<FAB>::SumBoundary_nowait (int scomp, int ncomp, const Periodicity& period)
    3297             : {
    3298             :     SumBoundary_nowait(scomp, ncomp, IntVect(0), period);
    3299             : }
    3300             : 
    3301             : template <class FAB>
    3302             : void
    3303             : FabArray<FAB>::SumBoundary_nowait (int scomp, int ncomp, IntVect const& nghost, const Periodicity& period)
    3304             : {
    3305             :     SumBoundary_nowait(scomp, ncomp, this->nGrowVect(), nghost, period);
    3306             : }
    3307             : 
    3308             : template <class FAB>
    3309             : void
    3310             : FabArray<FAB>::SumBoundary_nowait (int scomp, int ncomp, IntVect const& src_nghost, IntVect const& dst_nghost, const Periodicity& period)
    3311             : {
    3312             :     BL_PROFILE("FabArray<FAB>::SumBoundary_nowait()");
    3313             : 
    3314             :     if ( n_grow == IntVect::TheZeroVector() && boxArray().ixType().cellCentered()) { return; }
    3315             : 
    3316             :     AMREX_ALWAYS_ASSERT(src_nghost.allLE(n_grow));
    3317             : 
    3318             :     auto* tmp = new FabArray<FAB>( boxArray(), DistributionMap(), ncomp, src_nghost, MFInfo(), Factory() );
    3319             :     amrex::Copy(*tmp, *this, scomp, 0, ncomp, src_nghost);
    3320             :     this->setVal(typename FAB::value_type(0), scomp, ncomp, dst_nghost);
    3321             :     this->ParallelCopy_nowait(*tmp,0,scomp,ncomp,src_nghost,dst_nghost,period,FabArrayBase::ADD);
    3322             : 
    3323             :     // All local. Operation complete.
    3324             :     if (!this->pcd) { delete tmp; }
    3325             : }
    3326             : 
    3327             : template <class FAB>
    3328             : void
    3329             : FabArray<FAB>::SumBoundary_finish ()
    3330             : {
    3331             :     BL_PROFILE("FabArray<FAB>::SumBoundary_finish()");
    3332             : 
    3333             :     // If pcd doesn't exist, ParallelCopy was all local and operation was fully completed in "SumBoundary_nowait".
    3334             :     if ( (n_grow == IntVect::TheZeroVector() && boxArray().ixType().cellCentered()) || !(this->pcd) ) { return; }
    3335             : 
    3336             :     auto* tmp = const_cast<FabArray<FAB>*> (this->pcd->src);
    3337             :     this->ParallelCopy_finish();
    3338             :     delete tmp;
    3339             : }
    3340             : 
    3341             : template <class FAB>
    3342             : void
    3343             : FabArray<FAB>::EnforcePeriodicity (const Periodicity& period)
    3344             : {
    3345             :     BL_PROFILE("FabArray::EnforcePeriodicity");
    3346             :     if (period.isAnyPeriodic()) {
    3347             :         FBEP_nowait(0, nComp(), nGrowVect(), period, false, true);
    3348             :         FillBoundary_finish(); // unsafe unless isAnyPeriodic()
    3349             :     }
    3350             : }
    3351             : 
    3352             : template <class FAB>
    3353             : void
    3354             : FabArray<FAB>::EnforcePeriodicity (int scomp, int ncomp, const Periodicity& period)
    3355             : {
    3356             :     BL_PROFILE("FabArray::EnforcePeriodicity");
    3357             :     if (period.isAnyPeriodic()) {
    3358             :         FBEP_nowait(scomp, ncomp, nGrowVect(), period, false, true);
    3359             :         FillBoundary_finish(); // unsafe unless isAnyPeriodic()
    3360             :     }
    3361             : }
    3362             : 
    3363             : template <class FAB>
    3364             : void
    3365             : FabArray<FAB>::EnforcePeriodicity (int scomp, int ncomp, const IntVect& nghost,
    3366             :                                    const Periodicity& period)
    3367             : {
    3368             :     BL_PROFILE("FabArray::EnforcePeriodicity");
    3369             :     if (period.isAnyPeriodic()) {
    3370             :         FBEP_nowait(scomp, ncomp, nghost, period, false, true);
    3371             :         FillBoundary_finish(); // unsafe unless isAnyPeriodic()
    3372             :     }
    3373             : }
    3374             : 
    3375             : template <class FAB>
    3376             : template <typename BUF>
    3377             : void
    3378             : FabArray<FAB>::FillBoundary_nowait (int scomp, int ncomp, const Periodicity& period, bool cross)
    3379             : {
    3380             :     FBEP_nowait<BUF>(scomp, ncomp, nGrowVect(), period, cross);
    3381             : }
    3382             : 
    3383             : template <class FAB>
    3384             : template <typename BUF>
    3385             : void
    3386      652910 : FabArray<FAB>::FillBoundary_nowait (int scomp, int ncomp, const IntVect& nghost,
    3387             :                                     const Periodicity& period, bool cross)
    3388             : {
    3389      652910 :     FBEP_nowait<BUF>(scomp, ncomp, nghost, period, cross);
    3390      652910 : }
    3391             : 
    3392             : template <class FAB>
    3393             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
    3394             : void
    3395             : FabArray<FAB>::BuildMask (const Box& phys_domain, const Periodicity& period,
    3396             :                           value_type covered, value_type notcovered,
    3397             :                           value_type physbnd, value_type interior)
    3398             : {
    3399             :     BL_PROFILE("FabArray::BuildMask()");
    3400             : 
    3401             :     int ncomp = this->nComp();
    3402             :     const IntVect& ngrow = this->nGrowVect();
    3403             : 
    3404             :     Box domain = amrex::convert(phys_domain, boxArray().ixType());
    3405             :     for (int i = 0; i < AMREX_SPACEDIM; ++i) {
    3406             :         if (period.isPeriodic(i)) {
    3407             :             domain.grow(i, ngrow[i]);
    3408             :         }
    3409             :     }
    3410             : 
    3411             : #ifdef AMREX_USE_GPU
    3412             :     if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
    3413             :         auto const& fa = this->arrays();
    3414             :         ParallelFor(*this, ngrow, ncomp,
    3415             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
    3416             :         {
    3417             :             auto const& fab = fa[box_no];
    3418             :             Box vbx(fab);
    3419             :             vbx.grow(-ngrow);
    3420             :             if (vbx.contains(i,j,k)) {
    3421             :                 fab(i,j,k,n) = interior;
    3422             :             } else if (domain.contains(i,j,k)) {
    3423             :                 fab(i,j,k,n) = notcovered;
    3424             :             } else {
    3425             :                 fab(i,j,k,n) = physbnd;
    3426             :             }
    3427             :         });
    3428             :         if (!Gpu::inNoSyncRegion()) {
    3429             :             Gpu::streamSynchronize();
    3430             :         }
    3431             :     } else
    3432             : #endif
    3433             :     {
    3434             : #ifdef AMREX_USE_OMP
    3435             : #pragma omp parallel if (Gpu::notInLaunchRegion())
    3436             : #endif
    3437             :         for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
    3438             :         {
    3439             :             auto const& fab = this->array(mfi);
    3440             :             Box const& fbx = mfi.growntilebox();
    3441             :             Box const& gbx = fbx & domain;
    3442             :             Box const& vbx = mfi.validbox();
    3443             :             AMREX_HOST_DEVICE_FOR_4D(fbx, ncomp, i, j, k, n,
    3444             :             {
    3445             :                 if (vbx.contains(i,j,k)) {
    3446             :                     fab(i,j,k,n) = interior;
    3447             :                 } else if (gbx.contains(i,j,k)) {
    3448             :                     fab(i,j,k,n) = notcovered;
    3449             :                 } else {
    3450             :                     fab(i,j,k,n) = physbnd;
    3451             :                 }
    3452             :             });
    3453             :         }
    3454             :     }
    3455             : 
    3456             :     const FabArrayBase::FB& TheFB = this->getFB(ngrow,period);
    3457             :     setVal(covered, TheFB, 0, ncomp);
    3458             : }
    3459             : 
    3460             : template <class FAB>
    3461             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    3462             : void
    3463         409 : FabArray<FAB>::setVal (value_type val, const CommMetaData& thecmd, int scomp, int ncomp)
    3464             : {
    3465             :     BL_PROFILE("FabArray::setVal(val, thecmd, scomp, ncomp)");
    3466             : 
    3467             : #ifdef AMREX_USE_GPU
    3468             :     if (Gpu::inLaunchRegion())
    3469             :     {
    3470             :         CMD_local_setVal_gpu(val, thecmd, scomp, ncomp);
    3471             :         CMD_remote_setVal_gpu(val, thecmd, scomp, ncomp);
    3472             :     }
    3473             :     else
    3474             : #endif
    3475             :     {
    3476             :         AMREX_ASSERT(thecmd.m_LocTags && thecmd.m_RcvTags);
    3477         409 :         const CopyComTagsContainer&      LocTags = *(thecmd.m_LocTags);
    3478         409 :         const MapOfCopyComTagContainers& RcvTags = *(thecmd.m_RcvTags);
    3479         409 :         auto N_locs = static_cast<int>(LocTags.size());
    3480             : #ifdef AMREX_USE_OMP
    3481             : #pragma omp parallel for if (thecmd.m_threadsafe_loc)
    3482             : #endif
    3483        1204 :         for (int i = 0; i < N_locs; ++i) {
    3484         795 :             const CopyComTag& tag = LocTags[i];
    3485         795 :             (*this)[tag.dstIndex].template setVal<RunOn::Host>(val, tag.dbox, scomp, ncomp);
    3486             :         }
    3487             : 
    3488         409 :         for (const auto & RcvTag : RcvTags) {
    3489           0 :             auto N = static_cast<int>(RcvTag.second.size());
    3490             : #ifdef AMREX_USE_OMP
    3491             : #pragma omp parallel for if (thecmd.m_threadsafe_rcv)
    3492             : #endif
    3493           0 :             for (int i = 0; i < N; ++i) {
    3494           0 :                 const CopyComTag& tag = RcvTag.second[i];
    3495           0 :                 (*this)[tag.dstIndex].template setVal<RunOn::Host>(val, tag.dbox, scomp, ncomp);
    3496             :             }
    3497             :         }
    3498             :     }
    3499         409 : }
    3500             : 
    3501             : template <class FAB>
    3502             : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
    3503             : LayoutData<int>
    3504             : FabArray<FAB>::RecvLayoutMask (const CommMetaData& thecmd)
    3505             : {
    3506             :     BL_PROFILE("FabArray::RecvLayoutMask()");
    3507             : 
    3508             :     LayoutData<int> r(this->boxArray(), this->DistributionMap());
    3509             : #ifdef AMREX_USE_OMP
    3510             : #pragma omp parallel if (thecmd.m_threadsafe_rcv)
    3511             : #endif
    3512             :     for (MFIter mfi(r); mfi.isValid(); ++mfi) {
    3513             :         r[mfi] = 0;
    3514             :     }
    3515             : 
    3516             :     const CopyComTagsContainer&      LocTags = *(thecmd.m_LocTags);
    3517             :     const MapOfCopyComTagContainers& RcvTags = *(thecmd.m_RcvTags);
    3518             : 
    3519             :     auto N_locs = static_cast<int>(LocTags.size());
    3520             :     for (int i = 0; i < N_locs; ++i) {
    3521             :         const CopyComTag& tag = LocTags[i];
    3522             :         r[tag.dstIndex] = 1;
    3523             :     }
    3524             : 
    3525             :     for (const auto & RcvTag : RcvTags) {
    3526             :         auto N = static_cast<int>(RcvTag.second.size());
    3527             :         for (int i = 0; i < N; ++i) {
    3528             :             const CopyComTag& tag = RcvTag.second[i];
    3529             :             r[tag.dstIndex] = 1;
    3530             :         }
    3531             :     }
    3532             :     return r;
    3533             : }
    3534             : 
    3535             : template <class FAB>
    3536             : template <typename F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
    3537             : typename F::value_type
    3538             : FabArray<FAB>::norminf (int comp, int ncomp, IntVect const& nghost, bool local,
    3539             :                         [[maybe_unused]] bool ignore_covered) const
    3540             : {
    3541             :     BL_PROFILE("FabArray::norminf()");
    3542             : 
    3543             :     using RT = typename F::value_type;
    3544             : 
    3545             :     auto nm0 = RT(0.0);
    3546             : 
    3547             : #ifdef AMREX_USE_EB
    3548             :     if ( this->is_cell_centered() && this->hasEBFabFactory() && ignore_covered )
    3549             :     {
    3550             :         const auto& ebfactory = dynamic_cast<EBFArrayBoxFactory const&>(this->Factory());
    3551             :         auto const& flags = ebfactory.getMultiEBCellFlagFab();
    3552             : #ifdef AMREX_USE_GPU
    3553             :         if (Gpu::inLaunchRegion()) {
    3554             :             auto const& flagsma = flags.const_arrays();
    3555             :             auto const& ma = this->const_arrays();
    3556             :             nm0 = ParReduce(TypeList<ReduceOpMax>{}, TypeList<RT>{}, *this, nghost,
    3557             :             [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<RT>
    3558             :             {
    3559             :                 if (flagsma[box_no](i,j,k).isCovered()) {
    3560             :                     return RT(0.0);
    3561             :                 } else {
    3562             :                     auto tmp = RT(0.0);
    3563             :                     auto const& a = ma[box_no];
    3564             :                     for (int n = 0; n < ncomp; ++n) {
    3565             :                         tmp = amrex::max(tmp, std::abs(a(i,j,k,comp+n)));
    3566             :                     }
    3567             :                     return tmp;
    3568             :                 }
    3569             :             });
    3570             :         } else
    3571             : #endif
    3572             :         {
    3573             : #ifdef AMREX_USE_OMP
    3574             : #pragma omp parallel reduction(max:nm0)
    3575             : #endif
    3576             :             for (MFIter mfi(*this,true); mfi.isValid(); ++mfi) {
    3577             :                 Box const& bx = mfi.growntilebox(nghost);
    3578             :                 if (flags[mfi].getType(bx) != FabType::covered) {
    3579             :                     auto const& flag = flags.const_array(mfi);
    3580             :                     auto const& a = this->const_array(mfi);
    3581             :                     AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
    3582             :                     {
    3583             :                         if (!flag(i,j,k).isCovered()) {
    3584             :                             nm0 = std::max(nm0, std::abs(a(i,j,k,comp+n)));
    3585             :                         }
    3586             :                     });
    3587             :                 }
    3588             :             }
    3589             :         }
    3590             :     }
    3591             :     else
    3592             : #endif
    3593             :     {
    3594             : #ifdef AMREX_USE_GPU
    3595             :         if (Gpu::inLaunchRegion()) {
    3596             :             auto const& ma = this->const_arrays();
    3597             :             nm0 = ParReduce(TypeList<ReduceOpMax>{}, TypeList<RT>{}, *this, nghost, ncomp,
    3598             :             [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept -> GpuTuple<RT>
    3599             :             {
    3600             :                 return std::abs(ma[box_no](i,j,k,comp+n));
    3601             :             });
    3602             :         } else
    3603             : #endif
    3604             :         {
    3605             : #ifdef AMREX_USE_OMP
    3606             : #pragma omp parallel reduction(max:nm0)
    3607             : #endif
    3608             :             for (MFIter mfi(*this,true); mfi.isValid(); ++mfi) {
    3609             :                 Box const& bx = mfi.growntilebox(nghost);
    3610             :                 auto const& a = this->const_array(mfi);
    3611             :                 AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
    3612             :                 {
    3613             :                     nm0 = std::max(nm0, std::abs(a(i,j,k,comp+n)));
    3614             :                 });
    3615             :             }
    3616             :         }
    3617             :     }
    3618             : 
    3619             :     if (!local) {
    3620             :         ParallelAllReduce::Max(nm0, ParallelContext::CommunicatorSub());
    3621             :     }
    3622             : 
    3623             :     return nm0;
    3624             : }
    3625             : 
    3626             : template <class FAB>
    3627             : template <typename IFAB, typename F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
    3628             : typename F::value_type
    3629             : FabArray<FAB>::norminf (FabArray<IFAB> const& mask, int comp, int ncomp,
    3630             :                         IntVect const& nghost, bool local) const
    3631             : {
    3632             :     BL_PROFILE("FabArray::norminf(mask)");
    3633             : 
    3634             :     using RT = typename F::value_type;
    3635             : 
    3636             :     auto nm0 = RT(0.0);
    3637             : 
    3638             : #ifdef AMREX_USE_GPU
    3639             :     if (Gpu::inLaunchRegion()) {
    3640             :         auto const& ma = this->const_arrays();
    3641             :         auto const& maskma = mask.const_arrays();
    3642             :         nm0 = ParReduce(TypeList<ReduceOpMax>{}, TypeList<RT>{}, *this, IntVect(nghost),
    3643             :         [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<RT>
    3644             :         {
    3645             :             if (maskma[box_no](i,j,k)) {
    3646             :                 auto tmp = RT(0.0);
    3647             :                 auto const& a = ma[box_no];
    3648             :                 for (int n = 0; n < ncomp; ++n) {
    3649             :                     tmp = amrex::max(tmp, std::abs(a(i,j,k,comp+n)));
    3650             :                 }
    3651             :                 return tmp;
    3652             :             } else {
    3653             :                 return RT(0.0);
    3654             :             }
    3655             :         });
    3656             :     } else
    3657             : #endif
    3658             :     {
    3659             : #ifdef AMREX_USE_OMP
    3660             : #pragma omp parallel reduction(max:nm0)
    3661             : #endif
    3662             :         for (MFIter mfi(*this,true); mfi.isValid(); ++mfi) {
    3663             :             Box const& bx = mfi.growntilebox(nghost);
    3664             :             auto const& a = this->const_array(mfi);
    3665             :             auto const& mskfab = mask.const_array(mfi);
    3666             :             AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
    3667             :             {
    3668             :                 if (mskfab(i,j,k)) {
    3669             :                     nm0 = std::max(nm0, std::abs(a(i,j,k,comp+n)));
    3670             :                 }
    3671             :             });
    3672             :         }
    3673             :     }
    3674             : 
    3675             :     if (!local) {
    3676             :         ParallelAllReduce::Max(nm0, ParallelContext::CommunicatorSub());
    3677             :     }
    3678             : 
    3679             :     return nm0;
    3680             : }
    3681             : 
    3682             : }
    3683             : 
    3684             : #endif /*BL_FABARRAY_H*/

Generated by: LCOV version 1.14