LCOV - code coverage report
Current view: top level - ext/amrex/3d-coverage-g++-24.08/include - AMReX_BaseFab.H (source / functions) Hit Total Coverage
Test: coverage_merged.info Lines: 79 128 61.7 %
Date: 2025-01-16 18:33:59 Functions: 89 464 19.2 %

          Line data    Source code
       1             : #ifndef AMREX_BASEFAB_H_
       2             : #define AMREX_BASEFAB_H_
       3             : #include <AMReX_Config.H>
       4             : 
       5             : #include <AMReX_Algorithm.H>
       6             : #include <AMReX_Extension.H>
       7             : #include <AMReX_BLassert.H>
       8             : #include <AMReX_Array.H>
       9             : #include <AMReX_Box.H>
      10             : #include <AMReX_Loop.H>
      11             : #include <AMReX_BoxList.H>
      12             : #include <AMReX_BArena.H>
      13             : #include <AMReX_CArena.H>
      14             : #include <AMReX_DataAllocator.H>
      15             : #include <AMReX_REAL.H>
      16             : #include <AMReX_BLProfiler.H>
      17             : #include <AMReX_BoxIterator.H>
      18             : #include <AMReX_MakeType.H>
      19             : #include <AMReX_Utility.H>
      20             : #include <AMReX_Reduce.H>
      21             : #include <AMReX_Gpu.H>
      22             : #include <AMReX_Scan.H>
      23             : #include <AMReX_Math.H>
      24             : #include <AMReX_OpenMP.H>
      25             : #include <AMReX_MemPool.H>
      26             : 
      27             : #include <cmath>
      28             : #include <cstdlib>
      29             : #include <algorithm>
      30             : #include <limits>
      31             : #include <climits>
      32             : #include <array>
      33             : #include <type_traits>
      34             : #include <memory>
      35             : #include <atomic>
      36             : #include <utility>
      37             : 
      38             : namespace amrex
      39             : {
      40             : 
      41             : extern std::atomic<Long> atomic_total_bytes_allocated_in_fabs;
      42             : extern std::atomic<Long> atomic_total_bytes_allocated_in_fabs_hwm;
      43             : extern std::atomic<Long> atomic_total_cells_allocated_in_fabs;
      44             : extern std::atomic<Long> atomic_total_cells_allocated_in_fabs_hwm;
      45             : extern Long private_total_bytes_allocated_in_fabs;     //!< total bytes at any given time
      46             : extern Long private_total_bytes_allocated_in_fabs_hwm; //!< high-water-mark over a given interval
      47             : extern Long private_total_cells_allocated_in_fabs;     //!< total cells at any given time
      48             : extern Long private_total_cells_allocated_in_fabs_hwm; //!< high-water-mark over a given interval
      49             : #ifdef AMREX_USE_OMP
      50             : #pragma omp threadprivate(private_total_bytes_allocated_in_fabs)
      51             : #pragma omp threadprivate(private_total_bytes_allocated_in_fabs_hwm)
      52             : #pragma omp threadprivate(private_total_cells_allocated_in_fabs)
      53             : #pragma omp threadprivate(private_total_cells_allocated_in_fabs_hwm)
      54             : #endif
      55             : 
      56             : Long TotalBytesAllocatedInFabs () noexcept;
      57             : Long TotalBytesAllocatedInFabsHWM () noexcept;
      58             : Long TotalCellsAllocatedInFabs () noexcept;
      59             : Long TotalCellsAllocatedInFabsHWM () noexcept;
      60             : void ResetTotalBytesAllocatedInFabsHWM () noexcept;
      61             : void update_fab_stats (Long n, Long s, std::size_t szt) noexcept;
      62             : 
      63             : void BaseFab_Initialize ();
      64             : void BaseFab_Finalize ();
      65             : 
      66             : struct SrcComp {
      67             :     AMREX_GPU_HOST_DEVICE
      68           0 :     explicit SrcComp (int ai) noexcept : i(ai) {}
      69             :     int i;
      70             : };
      71             : 
      72             : struct DestComp {
      73             :     AMREX_GPU_HOST_DEVICE
      74        4986 :     explicit DestComp (int ai) noexcept : i(ai) {}
      75             :     int i;
      76             : };
      77             : 
      78             : struct NumComps {
      79             :     AMREX_GPU_HOST_DEVICE
      80        4986 :     explicit NumComps (int an) noexcept : n(an) {}
      81             :     int n;
      82             : };
      83             : 
      84             : template <typename T>
      85             : AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
      86             : Array4<T>
      87             : makeArray4 (T* p, Box const& bx, int ncomp) noexcept
      88             : {
      89     9288024 :     return Array4<T>{p, amrex::begin(bx), amrex::end(bx), ncomp};
      90             : }
      91             : 
      92             : template <typename T>
      93             : std::enable_if_t<std::is_arithmetic_v<T>>
      94             : placementNew (T* const /*ptr*/, Long /*n*/)
      95             : {}
      96             : 
      97             : template <typename T>
      98             : std::enable_if_t<std::is_trivially_default_constructible_v<T>
      99             :                  && !std::is_arithmetic_v<T>>
     100             : placementNew (T* const ptr, Long n)
     101             : {
     102             :     for (Long i = 0; i < n; ++i) {
     103             :         new (ptr+i) T;
     104             :     }
     105             : }
     106             : 
     107             : template <typename T>
     108             : std::enable_if_t<!std::is_trivially_default_constructible_v<T>>
     109        3315 : placementNew (T* const ptr, Long n)
     110             : {
     111     1788150 :     AMREX_HOST_DEVICE_FOR_1D ( n, i,
     112             :     {
     113             :         new (ptr+i) T;
     114             :     });
     115        3315 : }
     116             : 
     117             : template <typename T>
     118             : std::enable_if_t<std::is_trivially_destructible_v<T>>
     119      409453 : placementDelete (T* const /*ptr*/, Long /*n*/)
     120      409453 : {}
     121             : 
     122             : template <typename T>
     123             : std::enable_if_t<!std::is_trivially_destructible_v<T>>
     124           3 : placementDelete (T* const ptr, Long n)
     125             : {
     126        2190 :     AMREX_HOST_DEVICE_FOR_1D (n, i,
     127             :     {
     128             :         (ptr+i)->~T();
     129             :     });
     130           3 : }
     131             : 
     132             : /**
     133             :  * \brief A FortranArrayBox(FAB)-like object
     134             :  *
     135             :  * BaseFab emulates the Fortran array concept.
     136             :  * Useful operations can be performed upon
     137             :  * BaseFabs in C++, and they provide a convenient interface to
     138             :  * Fortran when it is necessary to retreat into that language.
     139             :  *
     140             :  * BaseFab is a template class.  Through use of the
     141             :  * template, a BaseFab may be based upon any class.  So far at least,
     142             :  * most applications have been based upon simple types like integers,
     143             :  * real*4s, or real*8s.  Most applications do not use BaseFabs
     144             :  * directly, but utilize specialized classes derived from BaseFab.
     145             :  *
     146             :  * Classes derived from BaseFab include FArrayBox, IArrayBox, TagBox,
     147             :  * Mask, EBFArrayBox, EBCellFlag and CutFab.
     148             :  *
     149             :  * BaseFab objects depend on the dimensionality of space
     150             :  * (indirectly through the DOMAIN Box member).  It is
     151             :  * typical to define the macro SPACEDIM to be 1, 2, or 3 to indicate
     152             :  * the dimension of space.  See the discussion of class Box for more
     153             :  * information.  A BaseFab contains a Box DOMAIN, which indicates the
     154             :  * integer indexing space over which the array is defined.  A BaseFab
     155             :  * also has NVAR components.  By components, we mean that for each
     156             :  * point in the rectangular indexing space, there are NVAR values
     157             :  * associated with that point.  A Fortran array corresponding to a
     158             :  * BaseFab would have (SPACEDIM+1) dimensions.
     159             :  *
     160             :  * By design, the array layout in a BaseFab mirrors that of a
     161             :  * Fortran array.  The first index (x direction for example) varies
     162             :  * most rapidly, the next index (y direction), if any, varies next
     163             :  * fastest. The component index varies last, after all the spatial
     164             :  * indices.
     165             :  *
     166             :  * It is sometimes convenient to be able to treat a sub-array within an
     167             :  * existing BaseFab as a BaseFab in its own right.  This is often
     168             :  * referred to as aliasing the BaseFab.  Note that when aliasing is
     169             :  * used, the BaseFabs domain will not, in general, be the same as the
     170             :  * parent BaseFabs domain, nor will the number of components.
     171             :  * BaseFab is a dimension dependent class, so SPACEDIM must be
     172             :  * defined as either 1, 2, or 3 when compiling.
     173             :  *
     174             :  * This is NOT a polymorphic class.
     175             :  *
     176             :  * It does NOT provide a copy constructor or assignment operator.
     177             :  *
     178             :  * \tparam T MUST have a default constructor and an assignment operator.
     179             :  */
     180             : template <class T>
     181             : class BaseFab
     182             :     : public DataAllocator
     183             : {
     184             : public:
     185             : 
     186             :     template <class U> friend class BaseFab;
     187             : 
     188             :     using value_type = T;
     189             : 
     190             :     //! Construct an empty BaseFab, which must be resized (see BaseFab::resize) before use.
     191             :     BaseFab () noexcept = default;
     192             : 
     193             :     explicit BaseFab (Arena* ar) noexcept;
     194             : 
     195             :     BaseFab (const Box& bx, int n, Arena* ar);
     196             : 
     197             :     //!  Make BaseFab with desired domain (box) and number of components.
     198             :     explicit BaseFab (const Box& bx, int n = 1, bool alloc = true,
     199             :                       bool shared = false, Arena* ar = nullptr);
     200             : 
     201             :     BaseFab (const BaseFab<T>& rhs, MakeType make_type, int scomp, int ncomp);
     202             : 
     203             :     /**
     204             :      * \brief Create an NON-OWNING BaseFab.  Thus BaseFab is not
     205             :      * responsible for memory management.  And it's caller's responsibility that
     206             :      * p points to a chunk of memory large enough.
     207             :      */
     208             :     BaseFab (const Box& bx, int ncomp, T* p);
     209             :     BaseFab (const Box& bx, int ncomp, T const* p);
     210             : 
     211             :     explicit BaseFab (Array4<T> const& a) noexcept;
     212             : 
     213             :     explicit BaseFab (Array4<T> const& a, IndexType t) noexcept;
     214             : 
     215             :     explicit BaseFab (Array4<T const> const& a) noexcept;
     216             : 
     217             :     explicit BaseFab (Array4<T const> const& a, IndexType t) noexcept;
     218             : 
     219             :     //! The destructor deletes the array memory.
     220             :     virtual ~BaseFab () noexcept;
     221             : 
     222             :     BaseFab (const BaseFab<T>& rhs) = delete;
     223             :     BaseFab<T>& operator= (const BaseFab<T>& rhs) = delete;
     224             : 
     225             :     BaseFab (BaseFab<T>&& rhs) noexcept;
     226             :     BaseFab<T>& operator= (BaseFab<T>&& rhs) noexcept;
     227             : 
     228             : #if defined(AMREX_USE_GPU)
     229             :     template <RunOn run_on>
     230             : #else
     231             :     template <RunOn run_on=RunOn::Host>
     232             : #endif
     233             :     BaseFab& operator= (T const&) noexcept;
     234             : 
     235             :     static void Initialize();
     236             :     static void Finalize();
     237             : 
     238             :     /**
     239             :     * \brief This function resizes a BaseFab so it covers the Box b
     240             :     * with N components.
     241             : 
     242             :     * The default action is that under resizing, the memory allocated for the
     243             :     * BaseFab only grows and never shrinks.  This function is
     244             :     * particularly useful when a BaseFab is used as a temporary
     245             :     * space which must be a different size whenever it is used.
     246             :     * Resizing is typically faster than re-allocating a
     247             :     * BaseFab because memory allocation can often be avoided.
     248             :     * If a nullptr is provided as Arena*, the Arena already in BaseFab will
     249             :     * be used. Otherwise, the Arena argument will be used.
     250             :     */
     251             :     void resize (const Box& b, int N = 1, Arena* ar = nullptr);
     252             : 
     253             :     template <class U=T, std::enable_if_t<std::is_trivially_destructible_v<U>,int> = 0>
     254             :     [[nodiscard]] Elixir elixir () noexcept;
     255             : 
     256             :     /**
     257             :      * \brief The function returns the BaseFab to the invalid state.
     258             :      * The memory is freed.
     259             :      */
     260             :     void clear () noexcept;
     261             : 
     262             :     //! Release ownership of memory
     263             :     [[nodiscard]] std::unique_ptr<T,DataDeleter> release () noexcept;
     264             : 
     265             :     //! Returns how many bytes used
     266      811212 :     [[nodiscard]] std::size_t nBytes () const noexcept { return this->truesize*sizeof(T); }
     267             : 
     268      811512 :     [[nodiscard]] std::size_t nBytesOwned () const noexcept {
     269      811512 :         return (this->ptr_owner) ? nBytes() : 0;
     270             :     }
     271             : 
     272             :     //! Returns bytes used in the Box for those components
     273             :     [[nodiscard]] std::size_t nBytes (const Box& bx, int ncomps) const noexcept
     274             :         { return bx.numPts() * sizeof(T) * ncomps; }
     275             : 
     276             :     //! Returns the number of components
     277       14628 :     [[nodiscard]] int nComp () const noexcept { return this->nvar; }
     278             : 
     279             :     //! for calls to fortran.
     280             :     [[nodiscard]] const int* nCompPtr() const noexcept {
     281             :         return &(this->nvar);
     282             :     }
     283             : 
     284             :     //! Returns the number of points
     285           0 :     [[nodiscard]] Long numPts () const noexcept { return this->domain.numPts(); }
     286             : 
     287             :     //! Returns the total number of points of all components
     288             :     [[nodiscard]] Long size () const noexcept { return this->nvar*this->domain.numPts(); }
     289             : 
     290             :     //! Returns the domain (box) where the array is defined
     291         648 :     [[nodiscard]] const Box& box () const noexcept { return this->domain; }
     292             : 
     293             :     /**
     294             :     * \brief Returns a pointer to an array of SPACEDIM integers
     295             :     * giving the length of the domain in each direction
     296             :     */
     297             :     [[nodiscard]] IntVect length () const noexcept { return this->domain.length(); }
     298             : 
     299             :     /**
     300             :     * \brief Returns the lower corner of the domain
     301             :     * See class Box for analogue.
     302             :     */
     303             :     [[nodiscard]] const IntVect& smallEnd () const noexcept { return this->domain.smallEnd(); }
     304             : 
     305             :     //!  Returns the upper corner of the domain.  See class Box for analogue.
     306             :     [[nodiscard]] const IntVect& bigEnd () const noexcept { return this->domain.bigEnd(); }
     307             : 
     308             :     /**
     309             :     * \brief Returns the lower corner of the domain.
     310             : 
     311             :     *Instead of returning them in the form of INTVECTs, as in smallEnd and
     312             :     * bigEnd, it returns the values as a pointer to an array of
     313             :     * constant integers.  This is useful when interfacing to
     314             :     * Fortran subroutines.
     315             :     */
     316             :     [[nodiscard]] const int* loVect () const noexcept { return this->domain.loVect(); }
     317             : 
     318             :     /**
     319             :     * \brief Returns the upper corner of the domain.
     320             : 
     321             :     *Instead of returning them in the form of INTVECTs, as in smallEnd and
     322             :     * bigEnd, it returns the values as a pointer to an array of
     323             :     * constant integers.  This is useful when interfacing to
     324             :     * Fortran subroutines.
     325             :     */
     326             :     [[nodiscard]] const int* hiVect () const noexcept { return this->domain.hiVect(); }
     327             : 
     328             :     /**
     329             :     * \brief Returns true if the domain of fab is totally contained within
     330             :     * the domain of this BaseFab.
     331             :     */
     332             :     [[nodiscard]] bool contains (const BaseFab<T>& fab) const noexcept
     333             :     {
     334             :         return box().contains(fab.box()) && this->nvar <= fab.nvar;
     335             :     }
     336             : 
     337             :     /**
     338             :     * \brief Returns true if bx is totally contained
     339             :     * within the domain of this BaseFab.
     340             :     */
     341             :     [[nodiscard]] bool contains (const Box& bx) const noexcept { return box().contains(bx); }
     342             : 
     343             :     /**
     344             :     * \brief Returns a pointer to an object of type T that is the
     345             :     * value of the Nth component associated with the cell at the
     346             :     * low end of the domain.  This is commonly used to get a pointer
     347             :     * to data in the array which is then handed off to a Fortran
     348             :     * subroutine.  Remember that data is stored in Fortran array
     349             :     * order, with the component index coming last.   In other words,
     350             :     * dataPtr returns a pointer to all the Nth components.
     351             :     */
     352      365725 :     [[nodiscard]] T* dataPtr (int n = 0) noexcept {
     353      365725 :         if (this->dptr) {
     354      365725 :             return &(this->dptr[n*this->domain.numPts()]);
     355             :         } else {
     356           0 :             return nullptr;
     357             :         }
     358             :     }
     359             : 
     360             :     //! Same as above except works on const FABs.
     361      365725 :     [[nodiscard]] const T* dataPtr (int n = 0) const noexcept {
     362      365725 :         if (this->dptr) {
     363      365725 :             return &(this->dptr[n*this->domain.numPts()]);
     364             :         } else {
     365           0 :             return nullptr;
     366             :         }
     367             :     }
     368             : 
     369             :     [[nodiscard]] T* dataPtr (const IntVect& p, int n = 0) noexcept;
     370             : 
     371             :     [[nodiscard]] const T* dataPtr (const IntVect& p, int n = 0) const noexcept;
     372             : 
     373             :     void setPtr (T* p, Long sz) noexcept { AMREX_ASSERT(this->dptr == nullptr && this->truesize == 0); this->dptr = p; this->truesize = sz; }
     374             : 
     375             :     void prefetchToHost () const noexcept;
     376             :     void prefetchToDevice () const noexcept;
     377             : 
     378             :     [[nodiscard]] AMREX_FORCE_INLINE
     379             :     Array4<T const> array () const noexcept
     380             :     {
     381           0 :         return makeArray4<T const>(this->dptr, this->domain, this->nvar);
     382             :     }
     383             : 
     384             :     [[nodiscard]] AMREX_FORCE_INLINE
     385             :     Array4<T const> array (int start_comp) const noexcept
     386             :     {
     387             :         return Array4<T const>(makeArray4<T const>(this->dptr, this->domain, this->nvar),start_comp);
     388             :     }
     389             : 
     390             :     [[nodiscard]] AMREX_FORCE_INLINE
     391             :     Array4<T const> array (int start_comp, int num_comps) const noexcept
     392             :     {
     393             :         return Array4<T const>(makeArray4<T const>(this->dptr, this->domain, this->nvar), start_comp, num_comps);
     394             :     }
     395             : 
     396             :     [[nodiscard]] AMREX_FORCE_INLINE
     397             :     Array4<T> array () noexcept
     398             :     {
     399     5391752 :         return makeArray4<T>(this->dptr, this->domain, this->nvar);
     400             :     }
     401             : 
     402             :     [[nodiscard]] AMREX_FORCE_INLINE
     403             :     Array4<T> array (int start_comp) noexcept
     404             :     {
     405             :         return Array4<T>(makeArray4<T>(this->dptr, this->domain, this->nvar),start_comp);
     406             :     }
     407             : 
     408             :     [[nodiscard]] AMREX_FORCE_INLINE
     409             :     Array4<T> array (int start_comp, int num_comps) noexcept
     410             :     {
     411             :         return Array4<T>(makeArray4<T>(this->dptr, this->domain, this->nvar), start_comp, num_comps);
     412             :     }
     413             : 
     414             :     [[nodiscard]] AMREX_FORCE_INLINE
     415             :     Array4<T const> const_array () const noexcept
     416             :     {
     417     3854400 :         return makeArray4<T const>(this->dptr, this->domain, this->nvar);
     418             :     }
     419             : 
     420             :     [[nodiscard]] AMREX_FORCE_INLINE
     421             :     Array4<T const> const_array (int start_comp) const noexcept
     422             :     {
     423             :         return Array4<T const>(makeArray4<T const>(this->dptr, this->domain, this->nvar),start_comp);
     424             :     }
     425             : 
     426             :     [[nodiscard]] AMREX_FORCE_INLINE
     427             :     Array4<T const> const_array (int start_comp, int num_comps) const noexcept
     428             :     {
     429             :         return Array4<T const>(makeArray4<T const>(this->dptr, this->domain, this->nvar), start_comp, num_comps);
     430             :     }
     431             : 
     432             :     //! Returns true if the data for the FAB has been allocated.
     433             :     [[nodiscard]] bool isAllocated () const noexcept { return this->dptr != nullptr; }
     434             : 
     435             :     /**
     436             :     * \brief Returns a reference to the Nth component value
     437             :     * defined at position p in the domain.  This operator may be
     438             :     * inefficient if the C++ compiler is unable to optimize the
     439             :     * C++ code.
     440             :     */
     441             :     [[nodiscard]] T& operator() (const IntVect& p, int N) noexcept;
     442             : 
     443             :     //! Same as above, except returns component 0.
     444             :     [[nodiscard]] T& operator() (const IntVect& p) noexcept;
     445             : 
     446             :     //! Same as above except works on const FABs.
     447             :     [[nodiscard]] const T& operator() (const IntVect& p, int N) const noexcept;
     448             : 
     449             :     //! Same as above, except returns component 0.
     450             :     [[nodiscard]] const T& operator() (const IntVect& p) const noexcept;
     451             : 
     452             :     /**
     453             :     * \brief This function puts numcomp component values, starting at
     454             :     * component N, from position pos in the domain into array data,
     455             :     * that must be allocated by the user.
     456             :     */
     457             :     void getVal (T* data, const IntVect& pos, int N, int numcomp) const noexcept;
     458             :     //! Same as above, except that starts at component 0 and copies all comps.
     459             :     void getVal (T* data, const IntVect& pos) const noexcept;
     460             : 
     461             : #if defined(AMREX_USE_GPU)
     462             :     template <RunOn run_on,
     463             : #else
     464             :     template <RunOn run_on=RunOn::Host,
     465             : #endif
     466             :               class U=T, std::enable_if_t<std::is_same_v<U,float> || std::is_same_v<U,double>,int> FOO = 0>
     467             :     void fill_snan () noexcept;
     468             : 
     469             :     /**
     470             :     * \brief The setVal functions set sub-regions in the BaseFab to a
     471             :     * constant value.  This most general form specifies the sub-box,
     472             :     * the starting component number, and the number of components
     473             :     * to be set.
     474             :     */
     475             : #if defined(AMREX_USE_GPU)
     476             :     template <RunOn run_on>
     477             : #else
     478             :     template <RunOn run_on=RunOn::Host>
     479             : #endif
     480             :     void setVal (T const& x, const Box& bx, int dcomp, int ncomp) noexcept;
     481             :     //! Same as above, except the number of modified components is one. N is the component to be modified.
     482             : #if defined(AMREX_USE_GPU)
     483             :     template <RunOn run_on>
     484             : #else
     485             :     template <RunOn run_on=RunOn::Host>
     486             : #endif
     487             :     void setVal (T const& x, const Box& bx, int N = 0) noexcept;
     488             :     //! Same as above, except the sub-box defaults to the entire domain.
     489             : #if defined(AMREX_USE_GPU)
     490             :     template <RunOn run_on>
     491             : #else
     492             :     template <RunOn run_on=RunOn::Host>
     493             : #endif
     494             :     void setVal (T const& x, int N) noexcept;
     495             : 
     496             : #if defined(AMREX_USE_GPU)
     497             :     template <RunOn run_on>
     498             : #else
     499             :     template <RunOn run_on=RunOn::Host>
     500             : #endif
     501             :     void setValIfNot (T const& val, const Box& bx, const BaseFab<int>& mask, int nstart, int num) noexcept;
     502             : 
     503             :     /**
     504             :     * \brief This function is analogous to the fourth form of
     505             :     * setVal above, except that instead of setting values on the
     506             :     * Box b, values are set on the complement of b in the domain.
     507             :     */
     508             : #if defined(AMREX_USE_GPU)
     509             :     template <RunOn run_on>
     510             : #else
     511             :     template <RunOn run_on=RunOn::Host>
     512             : #endif
     513             :     void setComplement (T const& x, const Box& b, int ns, int num) noexcept;
     514             : 
     515             :     /**
     516             :     * \brief The copy functions copy the contents of one BaseFab into
     517             :     * another.  The destination BaseFab is always the object which
     518             :     * invokes the function.  This, the most general form of copy,
     519             :     * specifies the contents of any sub-box srcbox in BaseFab src
     520             :     * may be copied into a (possibly different) destbox in the
     521             :     * destination BaseFab.  Note that although the srcbox and the
     522             :     * destbox may be disjoint, they must be the same size and shape.
     523             :     * If the sizes differ, the copy is undefined and a runtime error
     524             :     * results.  This copy function is the only one of the copy
     525             :     * functions to allow a copy between differing boxes. The user
     526             :     * also specifies how many components are copied, starting at
     527             :     * component srccomp in src and stored starting at component
     528             :     * destcomp. The results are UNDEFINED if the src and dest are the
     529             :     * same and the srcbox and destbox overlap.
     530             :     */
     531             : #if defined(AMREX_USE_GPU)
     532             :     template <RunOn run_on>
     533             : #else
     534             :     template <RunOn run_on=RunOn::Host>
     535             : #endif
     536             :     BaseFab<T>& copy (const BaseFab<T>& src, const Box& srcbox, int srccomp,
     537             :                       const Box& destbox, int destcomp, int numcomp) noexcept;
     538             : 
     539             :     /**
     540             :     * \brief As above, except the destination Box and the source Box
     541             :     * are taken to be the entire domain of the destination.   A copy
     542             :     * of the intersecting region is performed.
     543             :     * class.
     544             :     */
     545             : #if defined(AMREX_USE_GPU)
     546             :     template <RunOn run_on>
     547             : #else
     548             :     template <RunOn run_on=RunOn::Host>
     549             : #endif
     550             :     BaseFab<T>& copy (const BaseFab<T>& src, int srccomp, int destcomp,
     551             :                       int numcomp = 1) noexcept;
     552             :     /**
     553             :     * \brief As above, except that the destination Box is specified,
     554             :     * but the source Box is taken to the equal to the destination
     555             :     * Box, and all components of the destination BaseFab are
     556             :     * copied.
     557             :     */
     558             : #if defined(AMREX_USE_GPU)
     559             :     template <RunOn run_on>
     560             : #else
     561             :     template <RunOn run_on=RunOn::Host>
     562             : #endif
     563             :     BaseFab<T>& copy (const BaseFab<T>& src, const Box& destbox) noexcept;
     564             : 
     565             :     //! Copy from the srcbox of this Fab to raw memory and return the number of bytes copied
     566             : #if defined(AMREX_USE_GPU)
     567             :     template <RunOn run_on>
     568             : #else
     569             :     template <RunOn run_on=RunOn::Host>
     570             : #endif
     571             :     std::size_t copyToMem (const Box& srcbox, int srccomp,
     572             :                            int numcomp, void* dst) const noexcept;
     573             : 
     574             :     //! Copy from raw memory to the dstbox of this Fab and return the number of bytes copied
     575             : #if defined(AMREX_USE_GPU)
     576             :     template <RunOn run_on, typename BUF = T>
     577             : #else
     578             :     template <RunOn run_on=RunOn::Host, typename BUF = T>
     579             : #endif
     580             :     std::size_t copyFromMem (const Box& dstbox, int dstcomp,
     581             :                              int numcomp, const void* src) noexcept;
     582             : 
     583             :     //! Add from raw memory to the dstbox of this Fab and return the number of bytes copied
     584             : #if defined(AMREX_USE_GPU)
     585             :     template <RunOn run_on, typename BUF = T>
     586             : #else
     587             :     template <RunOn run_on=RunOn::Host, typename BUF = T>
     588             : #endif
     589             :     std::size_t addFromMem (const Box& dstbox, int dstcomp,
     590             :                             int numcomp, const void* src) noexcept;
     591             : 
     592             :     /**
     593             :     * \brief Perform shifts upon the domain of the BaseFab. They are
     594             :     * completely analogous to the corresponding Box functions.
     595             :     * There is no effect upon the array memory.
     596             :     */
     597             :     BaseFab<T>& shift (const IntVect& v) noexcept;
     598             :     /**
     599             :     * \brief Perform shifts upon the domain of the BaseFab.  They are
     600             :     * completely analogous to the corresponding Box functions.
     601             :     * There is no effect upon the array memory.
     602             :     */
     603             :     BaseFab<T>& shift (int idir, int n_cell) noexcept;
     604             :     /**
     605             :     * \brief Perform shifts upon the domain of the BaseFab.  They are
     606             :     * completely analogous to the corresponding Box functions.
     607             :     * There is no effect upon the array memory.
     608             :     */
     609             :     BaseFab<T>& shiftHalf (int dir, int n_cell) noexcept;
     610             :     /**
     611             :     * \brief Perform shifts upon the domain of the BaseFab. They are
     612             :     * completely analogous to the corresponding Box functions.
     613             :     * There is no effect upon the array memory.
     614             :     */
     615             :     BaseFab<T>& shiftHalf (const IntVect& v) noexcept;
     616             : 
     617             : #if defined(AMREX_USE_GPU)
     618             :     template <RunOn run_on>
     619             : #else
     620             :     template <RunOn run_on=RunOn::Host>
     621             : #endif
     622             :     [[nodiscard]] Real norminfmask (const Box& subbox, const BaseFab<int>& mask, int scomp=0, int ncomp=1) const noexcept;
     623             : 
     624             :     /**
     625             :     * \brief Compute the Lp-norm of this FAB using components (scomp : scomp+ncomp-1).
     626             :     *   p < 0  -> ERROR
     627             :     *   p = 0  -> infinity norm (max norm)
     628             :     *   p = 1  -> sum of ABS(FAB)
     629             :     */
     630             : #if defined(AMREX_USE_GPU)
     631             :     template <RunOn run_on>
     632             : #else
     633             :     template <RunOn run_on=RunOn::Host>
     634             : #endif
     635             :     [[nodiscard]] Real norm (int p, int scomp = 0, int numcomp = 1) const noexcept;
     636             : 
     637             :     //! Same as above except only on given subbox.
     638             : #if defined(AMREX_USE_GPU)
     639             :     template <RunOn run_on>
     640             : #else
     641             :     template <RunOn run_on=RunOn::Host>
     642             : #endif
     643             :     [[nodiscard]] Real norm (const Box& subbox, int p, int scomp = 0, int numcomp = 1) const noexcept;
     644             :     //!Compute absolute value for all components of this FAB.
     645             : #if defined(AMREX_USE_GPU)
     646             :     template <RunOn run_on>
     647             : #else
     648             :     template <RunOn run_on=RunOn::Host>
     649             : #endif
     650             :     void abs () noexcept;
     651             :     //! Same as above except only for components (comp: comp+numcomp-1)
     652             : #if defined(AMREX_USE_GPU)
     653             :     template <RunOn run_on>
     654             : #else
     655             :     template <RunOn run_on=RunOn::Host>
     656             : #endif
     657             :     void abs (int comp, int numcomp=1) noexcept;
     658             :     /**
     659             :     * \brief Calculate abs() on subbox for given component range.
     660             :     */
     661             : #if defined(AMREX_USE_GPU)
     662             :     template <RunOn run_on>
     663             : #else
     664             :     template <RunOn run_on=RunOn::Host>
     665             : #endif
     666             :     void abs (const Box& subbox, int comp = 0, int numcomp=1) noexcept;
     667             :     /**
     668             :     * \return Minimum value of given component.
     669             :     */
     670             : #if defined(AMREX_USE_GPU)
     671             :     template <RunOn run_on>
     672             : #else
     673             :     template <RunOn run_on=RunOn::Host>
     674             : #endif
     675             :     [[nodiscard]] T min (int comp = 0) const noexcept;
     676             :     /**
     677             :     * \return Minimum value of given component in given subbox.
     678             :     */
     679             : #if defined(AMREX_USE_GPU)
     680             :     template <RunOn run_on>
     681             : #else
     682             :     template <RunOn run_on=RunOn::Host>
     683             : #endif
     684             :     [[nodiscard]] T min (const Box& subbox, int comp = 0) const noexcept;
     685             :     /**
     686             :     * \return Maximum value of given component.
     687             :     */
     688             : #if defined(AMREX_USE_GPU)
     689             :     template <RunOn run_on>
     690             : #else
     691             :     template <RunOn run_on=RunOn::Host>
     692             : #endif
     693             :     [[nodiscard]] T max (int comp = 0) const noexcept;
     694             :     /**
     695             :     * \return Maximum value of given component in given subbox.
     696             :     */
     697             : #if defined(AMREX_USE_GPU)
     698             :     template <RunOn run_on>
     699             : #else
     700             :     template <RunOn run_on=RunOn::Host>
     701             : #endif
     702             :     [[nodiscard]] T max (const Box& subbox, int comp = 0) const noexcept;
     703             :     /**
     704             :     * \return Minimum and Maximum of given component
     705             :     */
     706             : #if defined(AMREX_USE_GPU)
     707             :     template <RunOn run_on>
     708             : #else
     709             :     template <RunOn run_on=RunOn::Host>
     710             : #endif
     711             :     [[nodiscard]] std::pair<T,T> minmax (int comp = 0) const noexcept;
     712             :     /**
     713             :     * \return Minimum and Maximum of given component in given subbox.
     714             :     */
     715             : #if defined(AMREX_USE_GPU)
     716             :     template <RunOn run_on>
     717             : #else
     718             :     template <RunOn run_on=RunOn::Host>
     719             : #endif
     720             :     [[nodiscard]] std::pair<T,T> minmax (const Box& subbox, int comp = 0) const noexcept;
     721             :     /**
     722             :     * \return Maximum of the absolute value of given component.
     723             :     */
     724             : #if defined(AMREX_USE_GPU)
     725             :     template <RunOn run_on>
     726             : #else
     727             :     template <RunOn run_on=RunOn::Host>
     728             : #endif
     729             :     [[nodiscard]] T maxabs (int comp = 0) const noexcept;
     730             :     /**
     731             :     * \return Maximum of the absolute value of given component in given subbox.
     732             :     */
     733             : #if defined(AMREX_USE_GPU)
     734             :     template <RunOn run_on>
     735             : #else
     736             :     template <RunOn run_on=RunOn::Host>
     737             : #endif
     738             :     [[nodiscard]] T maxabs (const Box& subbox, int comp = 0) const noexcept;
     739             : 
     740             :     /*(
     741             :     * \return location of given value
     742             :     */
     743             : #if defined(AMREX_USE_GPU)
     744             :     template <RunOn run_on>
     745             : #else
     746             :     template <RunOn run_on=RunOn::Host>
     747             : #endif
     748             :     [[nodiscard]] IntVect indexFromValue (const Box& subbox, int comp, T const& value) const noexcept;
     749             : 
     750             :     /**
     751             :     * \return location of minimum value in given component.
     752             :     */
     753             : #if defined(AMREX_USE_GPU)
     754             :     template <RunOn run_on>
     755             : #else
     756             :     template <RunOn run_on=RunOn::Host>
     757             : #endif
     758             :     [[nodiscard]] IntVect minIndex (int comp = 0) const noexcept;
     759             :     /**
     760             :     * \return location of minimum value in given component in
     761             :     * given subbox.
     762             :     */
     763             : #if defined(AMREX_USE_GPU)
     764             :     template <RunOn run_on>
     765             : #else
     766             :     template <RunOn run_on=RunOn::Host>
     767             : #endif
     768             :     [[nodiscard]] IntVect minIndex (const Box& subbox, int comp = 0) const noexcept;
     769             :     /**
     770             :     * \return return minimum value and location to allow
     771             :     * efficient looping over multiple boxes.
     772             :     */
     773             : #if defined(AMREX_USE_GPU)
     774             :     template <RunOn run_on>
     775             : #else
     776             :     template <RunOn run_on=RunOn::Host>
     777             : #endif
     778             :     void  minIndex (const Box& subbox, Real& min_val, IntVect& min_idx, int comp = 0) const noexcept;
     779             : 
     780             :     /**
     781             :     * \return location of maximum value in given component.
     782             :     */
     783             : #if defined(AMREX_USE_GPU)
     784             :     template <RunOn run_on>
     785             : #else
     786             :     template <RunOn run_on=RunOn::Host>
     787             : #endif
     788             :     [[nodiscard]] IntVect maxIndex (int comp = 0) const noexcept;
     789             :     /**
     790             :     * \return location of maximum value in given component in given
     791             :     * subbox.
     792             :     */
     793             : #if defined(AMREX_USE_GPU)
     794             :     template <RunOn run_on>
     795             : #else
     796             :     template <RunOn run_on=RunOn::Host>
     797             : #endif
     798             :     [[nodiscard]] IntVect maxIndex (const Box& subbox, int comp = 0) const noexcept;
     799             :     /**
     800             :     * \return return maximum value and location to allow
     801             :     * efficient looping over multiple boxes.
     802             :     */
     803             : #if defined(AMREX_USE_GPU)
     804             :     template <RunOn run_on>
     805             : #else
     806             :     template <RunOn run_on=RunOn::Host>
     807             : #endif
     808             :     void  maxIndex (const Box& subbox, Real& max_value, IntVect& max_idx, int comp = 0) const noexcept;
     809             : 
     810             :     /**
     811             :     * \brief Compute mask array with value of 1 in cells where
     812             :     * BaseFab has value less than val, 0 otherwise.
     813             :     * mask is resized by this function.
     814             :     * The number of cells marked with 1 returned.
     815             :     */
     816             : #if defined(AMREX_USE_GPU)
     817             :     template <RunOn run_on>
     818             : #else
     819             :     template <RunOn run_on=RunOn::Host>
     820             : #endif
     821             :     int maskLT (BaseFab<int>& mask, T const& val, int comp = 0) const noexcept;
     822             :     //! Same as above except mark cells with value less than or equal to val.
     823             : #if defined(AMREX_USE_GPU)
     824             :     template <RunOn run_on>
     825             : #else
     826             :     template <RunOn run_on=RunOn::Host>
     827             : #endif
     828             :     int maskLE (BaseFab<int>& mask, T const& val, int comp = 0) const noexcept;
     829             : 
     830             :     //! Same as above except mark cells with value equal to val.
     831             : #if defined(AMREX_USE_GPU)
     832             :     template <RunOn run_on>
     833             : #else
     834             :     template <RunOn run_on=RunOn::Host>
     835             : #endif
     836             :     int maskEQ (BaseFab<int>& mask, T const& val, int comp = 0) const noexcept;
     837             :     //! Same as above except mark cells with value greater than val.
     838             : #if defined(AMREX_USE_GPU)
     839             :     template <RunOn run_on>
     840             : #else
     841             :     template <RunOn run_on=RunOn::Host>
     842             : #endif
     843             :     int maskGT (BaseFab<int>& mask, T const& val, int comp = 0) const noexcept;
     844             :     //! Same as above except mark cells with value greater than or equal to val.
     845             : #if defined(AMREX_USE_GPU)
     846             :     template <RunOn run_on>
     847             : #else
     848             :     template <RunOn run_on=RunOn::Host>
     849             : #endif
     850             :     int maskGE (BaseFab<int>& mask, T const& val, int comp = 0) const noexcept;
     851             : 
     852             :     //! Returns sum of given component of FAB state vector.
     853             : #if defined(AMREX_USE_GPU)
     854             :     template <RunOn run_on>
     855             : #else
     856             :     template <RunOn run_on=RunOn::Host>
     857             : #endif
     858             :     [[nodiscard]] T sum (int comp, int numcomp = 1) const noexcept;
     859             :     //! Compute sum of given component of FAB state vector in given subbox.
     860             : #if defined(AMREX_USE_GPU)
     861             :     template <RunOn run_on>
     862             : #else
     863             :     template <RunOn run_on=RunOn::Host>
     864             : #endif
     865             :     [[nodiscard]] T sum (const Box& subbox, int comp, int numcomp = 1) const noexcept;
     866             : 
     867             :     //! Most general version, specify subbox and which components.
     868             : #if defined(AMREX_USE_GPU)
     869             :     template <RunOn run_on>
     870             : #else
     871             :     template <RunOn run_on=RunOn::Host>
     872             : #endif
     873             :     BaseFab<T>& invert (T const& r, const Box& b, int comp=0, int numcomp=1) noexcept;
     874             :     //! As above except on entire domain.
     875             : #if defined(AMREX_USE_GPU)
     876             :     template <RunOn run_on>
     877             : #else
     878             :     template <RunOn run_on=RunOn::Host>
     879             : #endif
     880             :     BaseFab<T>& invert (T const& r, int comp, int numcomp=1) noexcept;
     881             : 
     882             :     //! Negate BaseFab, most general.
     883             : #if defined(AMREX_USE_GPU)
     884             :     template <RunOn run_on>
     885             : #else
     886             :     template <RunOn run_on=RunOn::Host>
     887             : #endif
     888             :     BaseFab<T>& negate (const Box& b, int comp=0, int numcomp=1) noexcept;
     889             :     //! As above, except on entire domain.
     890             : #if defined(AMREX_USE_GPU)
     891             :     template <RunOn run_on>
     892             : #else
     893             :     template <RunOn run_on=RunOn::Host>
     894             : #endif
     895             :     BaseFab<T>& negate (int comp, int numcomp=1) noexcept;
     896             : 
     897             :     //! Scalar addition (a[i] <- a[i] + r), most general.
     898             : #if defined(AMREX_USE_GPU)
     899             :     template <RunOn run_on>
     900             : #else
     901             :     template <RunOn run_on=RunOn::Host>
     902             : #endif
     903             :     BaseFab<T>& plus (T const& r, const Box& b, int comp=0, int numcomp=1) noexcept;
     904             : 
     905             :     //! As above, except on entire domain.
     906             : #if defined(AMREX_USE_GPU)
     907             :     template <RunOn run_on>
     908             : #else
     909             :     template <RunOn run_on=RunOn::Host>
     910             : #endif
     911             :     BaseFab<T>& plus (T const& r, int comp, int numcomp=1) noexcept;
     912             : 
     913             :     /**
     914             :     * \brief Add src components (srccomp:srccomp+numcomp-1) to
     915             :     * this FABs components (destcomp:destcomp+numcomp-1)
     916             :     * where the two FABs intersect.
     917             :     */
     918             : #if defined(AMREX_USE_GPU)
     919             :     template <RunOn run_on>
     920             : #else
     921             :     template <RunOn run_on=RunOn::Host>
     922             : #endif
     923             :     BaseFab<T>& plus (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
     924             :     /**
     925             :     * \brief Same as above except addition is restricted to intersection
     926             :     * of subbox and src FAB. NOTE: subbox must be contained in this
     927             :     * FAB.
     928             :     */
     929             : #if defined(AMREX_USE_GPU)
     930             :     template <RunOn run_on>
     931             : #else
     932             :     template <RunOn run_on=RunOn::Host>
     933             : #endif
     934             :     BaseFab<T>& plus (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp, int numcomp=1) noexcept;
     935             :     /**
     936             :     * \brief Add srcbox region of src FAB to destbox region of this FAB.
     937             :     * The srcbox and destbox must be same size.
     938             :     */
     939             : #if defined(AMREX_USE_GPU)
     940             :     template <RunOn run_on>
     941             : #else
     942             :     template <RunOn run_on=RunOn::Host>
     943             : #endif
     944             :     BaseFab<T>& plus (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
     945             :                       int srccomp, int destcomp, int numcomp=1) noexcept;
     946             : 
     947             :     //! Atomic FAB addition (a[i] <- a[i] + b[i]).
     948             : #if defined(AMREX_USE_GPU)
     949             :     template <RunOn run_on>
     950             : #else
     951             :     template <RunOn run_on=RunOn::Host>
     952             : #endif
     953             :     BaseFab<T>& atomicAdd (const BaseFab<T>& x) noexcept;
     954             : 
     955             :     /**
     956             :     * \brief Atomically add src components (srccomp:srccomp+numcomp-1) to
     957             :     * this FABs components (destcomp:destcomp+numcomp-1)
     958             :     * where the two FABs intersect.
     959             :     */
     960             : #if defined(AMREX_USE_GPU)
     961             :     template <RunOn run_on>
     962             : #else
     963             :     template <RunOn run_on=RunOn::Host>
     964             : #endif
     965             :     BaseFab<T>& atomicAdd (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
     966             :     /**
     967             :     * \brief Same as above except addition is restricted to intersection
     968             :     * of subbox and src FAB. NOTE: subbox must be contained in this
     969             :     * FAB.
     970             :     */
     971             : #if defined(AMREX_USE_GPU)
     972             :     template <RunOn run_on>
     973             : #else
     974             :     template <RunOn run_on=RunOn::Host>
     975             : #endif
     976             :     BaseFab<T>& atomicAdd (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
     977             :                            int numcomp=1) noexcept;
     978             :     /**
     979             :     * \brief Atomically add srcbox region of src FAB to destbox region of this FAB.
     980             :     * The srcbox and destbox must be same size.
     981             :     */
     982             : #if defined(AMREX_USE_GPU)
     983             :     template <RunOn run_on>
     984             : #else
     985             :     template <RunOn run_on=RunOn::Host>
     986             : #endif
     987             :     BaseFab<T>& atomicAdd (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
     988             :                            int srccomp, int destcomp, int numcomp=1) noexcept;
     989             : 
     990             :     /**
     991             :     * \brief Atomically add srcbox region of src FAB to destbox region of this FAB.
     992             :     * The srcbox and destbox must be same size. When OMP is on, this uses OMP locks
     993             :     * in the implementation and it's usually faster than atomicAdd.
     994             :     */
     995             : #if defined(AMREX_USE_GPU)
     996             :     template <RunOn run_on>
     997             : #else
     998             :     template <RunOn run_on=RunOn::Host>
     999             : #endif
    1000             :     BaseFab<T>& lockAdd (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    1001             :                          int srccomp, int destcomp, int numcomp) noexcept;
    1002             : 
    1003             :     //! FAB SAXPY (y[i] <- y[i] + a * x[i]), in place.
    1004             : #if defined(AMREX_USE_GPU)
    1005             :     template <RunOn run_on>
    1006             : #else
    1007             :     template <RunOn run_on=RunOn::Host>
    1008             : #endif
    1009             :     BaseFab<T>& saxpy (T a, const BaseFab<T>& x, const Box& srcbox, const Box& destbox,
    1010             :                        int srccomp, int destcomp, int numcomp=1) noexcept;
    1011             :     //! FAB SAXPY (y[i] <- y[i] + a * x[i]), in place.  All components.
    1012             : #if defined(AMREX_USE_GPU)
    1013             :     template <RunOn run_on>
    1014             : #else
    1015             :     template <RunOn run_on=RunOn::Host>
    1016             : #endif
    1017             :     BaseFab<T>& saxpy (T a, const BaseFab<T>& x) noexcept;
    1018             : 
    1019             :     //! FAB XPAY (y[i] <- x[i] + a * y[i])
    1020             : #if defined(AMREX_USE_GPU)
    1021             :     template <RunOn run_on>
    1022             : #else
    1023             :     template <RunOn run_on=RunOn::Host>
    1024             : #endif
    1025             :     BaseFab<T>& xpay (T a, const BaseFab<T>& x, const Box& srcbox, const Box& destbox,
    1026             :                       int srccomp, int destcomp, int numcomp=1) noexcept;
    1027             : 
    1028             :     //! y[i] <- y[i] + x1[i] * x2[i])
    1029             : #if defined(AMREX_USE_GPU)
    1030             :     template <RunOn run_on>
    1031             : #else
    1032             :     template <RunOn run_on=RunOn::Host>
    1033             : #endif
    1034             :     BaseFab<T>& addproduct (const Box& destbox, int destcomp, int numcomp,
    1035             :                             const BaseFab<T>& src1, int comp1,
    1036             :                             const BaseFab<T>& src2, int comp2) noexcept;
    1037             : 
    1038             :     /**
    1039             :     * \brief Subtract src components (srccomp:srccomp+numcomp-1) to
    1040             :     * this FABs components (destcomp:destcomp+numcomp-1) where
    1041             :     * the two FABs intersect.
    1042             :     */
    1043             : #if defined(AMREX_USE_GPU)
    1044             :     template <RunOn run_on>
    1045             : #else
    1046             :     template <RunOn run_on=RunOn::Host>
    1047             : #endif
    1048             :     BaseFab<T>& minus (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
    1049             :     /**
    1050             :     * \brief Same as above except subtraction is restricted to intersection
    1051             :     * of subbox and src FAB.  NOTE: subbox must be contained in
    1052             :     * this FAB.
    1053             :     */
    1054             : #if defined(AMREX_USE_GPU)
    1055             :     template <RunOn run_on>
    1056             : #else
    1057             :     template <RunOn run_on=RunOn::Host>
    1058             : #endif
    1059             :     BaseFab<T>& minus (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
    1060             :                        int numcomp=1) noexcept;
    1061             :     /**
    1062             :     * \brief Subtract srcbox region of src FAB from destbox region
    1063             :     * of this FAB. srcbox and destbox must be same size.
    1064             :     */
    1065             : #if defined(AMREX_USE_GPU)
    1066             :     template <RunOn run_on>
    1067             : #else
    1068             :     template <RunOn run_on=RunOn::Host>
    1069             : #endif
    1070             :     BaseFab<T>& minus (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    1071             :                        int srccomp, int destcomp, int numcomp=1) noexcept;
    1072             : 
    1073             :     //! Scalar multiplication, except control which components are multiplied.
    1074             : #if defined(AMREX_USE_GPU)
    1075             :     template <RunOn run_on>
    1076             : #else
    1077             :     template <RunOn run_on=RunOn::Host>
    1078             : #endif
    1079             :     BaseFab<T>& mult (T const& r, int comp, int numcomp=1) noexcept;
    1080             :     /**
    1081             :     * \brief As above, except specify sub-box.
    1082             :     */
    1083             : #if defined(AMREX_USE_GPU)
    1084             :     template <RunOn run_on>
    1085             : #else
    1086             :     template <RunOn run_on=RunOn::Host>
    1087             : #endif
    1088             :     BaseFab<T>& mult (T const& r, const Box& b, int comp=0, int numcomp=1) noexcept;
    1089             : 
    1090             :     /**
    1091             :     * \brief Multiply src components (srccomp:srccomp+numcomp-1) with
    1092             :     * this FABs components (destcomp:destcomp+numcomp-1) where
    1093             :     * the two FABs intersect.
    1094             :     */
    1095             : #if defined(AMREX_USE_GPU)
    1096             :     template <RunOn run_on>
    1097             : #else
    1098             :     template <RunOn run_on=RunOn::Host>
    1099             : #endif
    1100             :     BaseFab<T>& mult (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
    1101             : 
    1102             :     /**
    1103             :     * \brief Same as above except multiplication is restricted to
    1104             :     * intersection of subbox and src FAB.  NOTE: subbox must be
    1105             :     * contained in this FAB.
    1106             :     */
    1107             : #if defined(AMREX_USE_GPU)
    1108             :     template <RunOn run_on>
    1109             : #else
    1110             :     template <RunOn run_on=RunOn::Host>
    1111             : #endif
    1112             :     BaseFab<T>& mult (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
    1113             :                       int numcomp=1) noexcept;
    1114             : 
    1115             :     /**
    1116             :     * \brief Multiply srcbox region of src FAB with destbox region
    1117             :     * of this FAB. The srcbox and destbox must be same size.
    1118             :     */
    1119             : #if defined(AMREX_USE_GPU)
    1120             :     template <RunOn run_on>
    1121             : #else
    1122             :     template <RunOn run_on=RunOn::Host>
    1123             : #endif
    1124             :     BaseFab<T>& mult (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    1125             :                       int srccomp, int destcomp, int numcomp=1) noexcept;
    1126             : 
    1127             :     //! As above except specify which components.
    1128             : #if defined(AMREX_USE_GPU)
    1129             :     template <RunOn run_on>
    1130             : #else
    1131             :     template <RunOn run_on=RunOn::Host>
    1132             : #endif
    1133             :     BaseFab<T>& divide (T const& r, int comp, int numcomp=1) noexcept;
    1134             : 
    1135             :     //! As above except specify sub-box.
    1136             : #if defined(AMREX_USE_GPU)
    1137             :     template <RunOn run_on>
    1138             : #else
    1139             :     template <RunOn run_on=RunOn::Host>
    1140             : #endif
    1141             :     BaseFab<T>& divide (T const& r, const Box& b, int comp=0, int numcomp=1) noexcept;
    1142             : 
    1143             :     /**
    1144             :     * \brief This FAB is numerator, src FAB is denominator
    1145             :     * divide src components (srccomp:srccomp+numcomp-1) into
    1146             :     * this FABs components (destcomp:destcomp+numcomp-1)
    1147             :     * where the two FABs intersect.
    1148             :     */
    1149             : #if defined(AMREX_USE_GPU)
    1150             :     template <RunOn run_on>
    1151             : #else
    1152             :     template <RunOn run_on=RunOn::Host>
    1153             : #endif
    1154             :     BaseFab<T>& divide (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
    1155             :     /**
    1156             :     * \brief Same as above except division is restricted to
    1157             :     * intersection of subbox and src FAB.  NOTE: subbox must be
    1158             :     * contained in this FAB.
    1159             :     */
    1160             : #if defined(AMREX_USE_GPU)
    1161             :     template <RunOn run_on>
    1162             : #else
    1163             :     template <RunOn run_on=RunOn::Host>
    1164             : #endif
    1165             :     BaseFab<T>& divide (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
    1166             :                         int numcomp=1) noexcept;
    1167             :     /**
    1168             :     * \brief destbox region of this FAB is numerator. srcbox regions of
    1169             :     * src FAB is denominator. srcbox and destbox must be same size.
    1170             :     */
    1171             : #if defined(AMREX_USE_GPU)
    1172             :     template <RunOn run_on>
    1173             : #else
    1174             :     template <RunOn run_on=RunOn::Host>
    1175             : #endif
    1176             :     BaseFab<T>& divide (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    1177             :                         int srccomp, int destcomp, int numcomp=1) noexcept;
    1178             :     /**
    1179             :     * \brief Divide wherever "src" is "true" or "non-zero".
    1180             :     */
    1181             : #if defined(AMREX_USE_GPU)
    1182             :     template <RunOn run_on>
    1183             : #else
    1184             :     template <RunOn run_on=RunOn::Host>
    1185             : #endif
    1186             :     BaseFab<T>& protected_divide (const BaseFab<T>& src) noexcept;
    1187             : 
    1188             :     /**
    1189             :     * \brief Divide wherever "src" is "true" or "non-zero".
    1190             :     * This FAB is numerator, src FAB is denominator
    1191             :     * divide src components (srccomp:srccomp+numcomp-1) into
    1192             :     * this FABs components (destcomp:destcomp+numcomp-1)
    1193             :     * where the two FABs intersect.
    1194             :     */
    1195             : #if defined(AMREX_USE_GPU)
    1196             :     template <RunOn run_on>
    1197             : #else
    1198             :     template <RunOn run_on=RunOn::Host>
    1199             : #endif
    1200             :     BaseFab<T>& protected_divide (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
    1201             : 
    1202             :     /**
    1203             :     * \brief Divide wherever "src" is "true" or "non-zero".
    1204             :     * Same as above except division is restricted to
    1205             :     * intersection of subbox and src FAB.  NOTE: subbox must be
    1206             :     * contained in this FAB.
    1207             :     */
    1208             : #if defined(AMREX_USE_GPU)
    1209             :     template <RunOn run_on>
    1210             : #else
    1211             :     template <RunOn run_on=RunOn::Host>
    1212             : #endif
    1213             :     BaseFab<T>& protected_divide (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
    1214             :                                   int numcomp=1) noexcept;
    1215             : 
    1216             :     /**
    1217             :     * Divide wherever "src" is "true" or "non-zero".
    1218             :     * destbox region of this FAB is numerator. srcbox regions of
    1219             :     * src FAB is denominator. srcbox and destbox must be same size.
    1220             :     */
    1221             : #if defined(AMREX_USE_GPU)
    1222             :     template <RunOn run_on>
    1223             : #else
    1224             :     template <RunOn run_on=RunOn::Host>
    1225             : #endif
    1226             :     BaseFab<T>& protected_divide (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    1227             :                                   int srccomp, int destcomp, int numcomp=1) noexcept;
    1228             : 
    1229             :     /**
    1230             :     * \brief Linear interpolation / extrapolation.
    1231             :     * Result is (t2-t)/(t2-t1)*f1 + (t-t1)/(t2-t1)*f2
    1232             :     * Data is taken from b1 region of f1, b2 region of f2
    1233             :     * and stored in b region of this FAB.
    1234             :     * Boxes b, b1 and b2 must be the same size.
    1235             :     * Data is taken from component comp1 of f1, comp2 of f2,
    1236             :     * and stored in component comp of this FAB.
    1237             :     * This FAB is returned as a reference for chaining.
    1238             :     */
    1239             : #if defined(AMREX_USE_GPU)
    1240             :     template <RunOn run_on>
    1241             : #else
    1242             :     template <RunOn run_on=RunOn::Host>
    1243             : #endif
    1244             :     BaseFab<T>& linInterp (const BaseFab<T>& f1, const Box& b1, int comp1,
    1245             :                            const BaseFab<T>& f2, const Box& b2, int comp2,
    1246             :                            Real t1, Real t2, Real t,
    1247             :                            const Box& b, int comp, int numcomp = 1) noexcept;
    1248             : 
    1249             :     //! Version of linInterp() in which b, b1, & b2 are the same.
    1250             : #if defined(AMREX_USE_GPU)
    1251             :     template <RunOn run_on>
    1252             : #else
    1253             :     template <RunOn run_on=RunOn::Host>
    1254             : #endif
    1255             :     BaseFab<T>& linInterp (const BaseFab<T>& f1, int comp1,
    1256             :                            const BaseFab<T>& f2, int comp2,
    1257             :                            Real t1, Real t2, Real t,
    1258             :                            const Box& b, int comp, int numcomp = 1) noexcept;
    1259             : 
    1260             :     /**
    1261             :     * \brief Linear combination.  Result is alpha*f1 + beta*f2.
    1262             :     * Data is taken from b1 region of f1, b2 region of f2
    1263             :     * and stored in b region of this FAB.
    1264             :     * Boxes b, b1 and b2 must be the same size.
    1265             :     * Data is taken from component comp1 of f1, comp2 of f2,
    1266             :     * and stored in component comp of this FAB.
    1267             :     * This FAB is returned as a reference for chaining.
    1268             :     */
    1269             : #if defined(AMREX_USE_GPU)
    1270             :     template <RunOn run_on>
    1271             : #else
    1272             :     template <RunOn run_on=RunOn::Host>
    1273             : #endif
    1274             :     BaseFab<T>& linComb (const BaseFab<T>& f1, const Box& b1, int comp1,
    1275             :                          const BaseFab<T>& f2, const Box& b2, int comp2,
    1276             :                          Real alpha, Real beta, const Box& b,
    1277             :                          int comp, int numcomp = 1) noexcept;
    1278             : 
    1279             :     //! Dot product of x (i.e.,this) and y
    1280             : #if defined(AMREX_USE_GPU)
    1281             :     template <RunOn run_on>
    1282             : #else
    1283             :     template <RunOn run_on=RunOn::Host>
    1284             : #endif
    1285             :     [[nodiscard]] T dot (const Box& xbx, int xcomp, const BaseFab<T>& y, const Box& ybx, int ycomp,
    1286             :            int numcomp = 1) const noexcept;
    1287             : 
    1288             : #if defined(AMREX_USE_GPU)
    1289             :     template <RunOn run_on>
    1290             : #else
    1291             :     template <RunOn run_on=RunOn::Host>
    1292             : #endif
    1293             :     [[nodiscard]] T dotmask (const BaseFab<int>& mask, const Box& xbx, int xcomp,
    1294             :                const BaseFab<T>& y, const Box& ybx, int ycomp,
    1295             :                int numcomp) const noexcept;
    1296             : 
    1297             :     //! Change the Box type without change the length
    1298             :     void SetBoxType (const IndexType& typ) noexcept { this->domain.setType(typ); }
    1299             : 
    1300             :     //
    1301             :     // New interfaces
    1302             :     //
    1303             : 
    1304             :     //! Set value on the whole domain and all components
    1305             : #if defined(AMREX_USE_GPU)
    1306             :     template <RunOn run_on>
    1307             : #else
    1308             :     template <RunOn run_on=RunOn::Host>
    1309             : #endif
    1310             :     void setVal (T const& val) noexcept;
    1311             :     //
    1312             :     //! Do nothing if bx is empty.
    1313             : #if defined(AMREX_USE_GPU)
    1314             :     template <RunOn run_on>
    1315             : #else
    1316             :     template <RunOn run_on=RunOn::Host>
    1317             : #endif
    1318             :     void setVal (T const& x, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
    1319             : 
    1320             : #if defined(AMREX_USE_GPU)
    1321             :     template <RunOn run_on>
    1322             : #else
    1323             :     template <RunOn run_on=RunOn::Host>
    1324             : #endif
    1325             :     void setValIf (T const& val, const BaseFab<int>& mask) noexcept;
    1326             :     //
    1327             :     //! Do nothing if bx is empty.
    1328             : #if defined(AMREX_USE_GPU)
    1329             :     template <RunOn run_on>
    1330             : #else
    1331             :     template <RunOn run_on=RunOn::Host>
    1332             : #endif
    1333             :     void setValIf (T const& val, Box const& bx, const BaseFab<int>& mask, DestComp dcomp, NumComps ncomp) noexcept;
    1334             : 
    1335             : #if defined(AMREX_USE_GPU)
    1336             :     template <RunOn run_on>
    1337             : #else
    1338             :     template <RunOn run_on=RunOn::Host>
    1339             : #endif
    1340             :     void setValIfNot (T const& val, const BaseFab<int>& mask) noexcept;
    1341             :     //
    1342             :     //! Do nothing if bx is empty.
    1343             : #if defined(AMREX_USE_GPU)
    1344             :     template <RunOn run_on>
    1345             : #else
    1346             :     template <RunOn run_on=RunOn::Host>
    1347             : #endif
    1348             :     void setValIfNot (T const& val, Box const& bx, const BaseFab<int>& mask, DestComp dcomp, NumComps ncomp) noexcept;
    1349             : 
    1350             :     //! setVal on the complement of bx in the fab's domain
    1351             : #if defined(AMREX_USE_GPU)
    1352             :     template <RunOn run_on>
    1353             : #else
    1354             :     template <RunOn run_on=RunOn::Host>
    1355             : #endif
    1356             :     void setComplement (T const& x, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
    1357             : 
    1358             :     /**
    1359             :     * copy is performed on the intersection of dest and src fabs.
    1360             :     * All components of dest fab are copied. src fab must have enough
    1361             :     * components (more is OK).
    1362             :     */
    1363             : #if defined(AMREX_USE_GPU)
    1364             :     template <RunOn run_on>
    1365             : #else
    1366             :     template <RunOn run_on=RunOn::Host>
    1367             : #endif
    1368             :     BaseFab<T>& copy (const BaseFab<T>& src) noexcept;
    1369             :     //
    1370             :     //! Do nothing if bx does not intersect with src fab.
    1371             : #if defined(AMREX_USE_GPU)
    1372             :     template <RunOn run_on>
    1373             : #else
    1374             :     template <RunOn run_on=RunOn::Host>
    1375             : #endif
    1376             :     BaseFab<T>& copy (const BaseFab<T>& src, Box bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept;
    1377             : 
    1378             :     //! Scalar addition on the whole domain and all components
    1379             : #if defined(AMREX_USE_GPU)
    1380             :     template <RunOn run_on>
    1381             : #else
    1382             :     template <RunOn run_on=RunOn::Host>
    1383             : #endif
    1384             :     BaseFab<T>& plus (T const& val) noexcept;
    1385             :     //
    1386             : #if defined(AMREX_USE_GPU)
    1387             :     template <RunOn run_on>
    1388             : #else
    1389             :     template <RunOn run_on=RunOn::Host>
    1390             : #endif
    1391             :     BaseFab<T>& operator+= (T const& val) noexcept;
    1392             :     //
    1393             :     //! Do nothing if bx is empty.
    1394             : #if defined(AMREX_USE_GPU)
    1395             :     template <RunOn run_on>
    1396             : #else
    1397             :     template <RunOn run_on=RunOn::Host>
    1398             : #endif
    1399             :     BaseFab<T>& plus (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
    1400             :     /**
    1401             :     * Fab addition is performed on the intersection of dest and src fabs.
    1402             :     * All components of dest fab are copied. src fab must have enough
    1403             :     * components (more is OK).
    1404             :     */
    1405             : #if defined(AMREX_USE_GPU)
    1406             :     template <RunOn run_on>
    1407             : #else
    1408             :     template <RunOn run_on=RunOn::Host>
    1409             : #endif
    1410             :     BaseFab<T>& plus (const BaseFab<T>& src) noexcept;
    1411             :     //
    1412             : #if defined(AMREX_USE_GPU)
    1413             :     template <RunOn run_on>
    1414             : #else
    1415             :     template <RunOn run_on=RunOn::Host>
    1416             : #endif
    1417             :     BaseFab<T>& operator+= (const BaseFab<T>& src) noexcept;
    1418             :     //
    1419             :     //! Do nothing if bx does not intersect with src fab.
    1420             : #if defined(AMREX_USE_GPU)
    1421             :     template <RunOn run_on>
    1422             : #else
    1423             :     template <RunOn run_on=RunOn::Host>
    1424             : #endif
    1425             :     BaseFab<T>& plus (const BaseFab<T>& src, Box bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept;
    1426             : 
    1427             :     //! Scalar subtraction on the whole domain and all components
    1428             : #if defined(AMREX_USE_GPU)
    1429             :     template <RunOn run_on>
    1430             : #else
    1431             :     template <RunOn run_on=RunOn::Host>
    1432             : #endif
    1433             :     BaseFab<T>& minus (T const& val) noexcept;
    1434             :     //
    1435             : #if defined(AMREX_USE_GPU)
    1436             :     template <RunOn run_on>
    1437             : #else
    1438             :     template <RunOn run_on=RunOn::Host>
    1439             : #endif
    1440             :     BaseFab<T>& operator-= (T const& val) noexcept;
    1441             :     //
    1442             :     //! Do nothing if bx is empty.
    1443             : #if defined(AMREX_USE_GPU)
    1444             :     template <RunOn run_on>
    1445             : #else
    1446             :     template <RunOn run_on=RunOn::Host>
    1447             : #endif
    1448             :     BaseFab<T>& minus (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
    1449             :     /**
    1450             :     * Fab subtraction is performed on the intersection of dest and src fabs.
    1451             :     * All components of dest fab are copied. src fab must have enough
    1452             :     * components (more is OK).
    1453             :     */
    1454             : #if defined(AMREX_USE_GPU)
    1455             :     template <RunOn run_on>
    1456             : #else
    1457             :     template <RunOn run_on=RunOn::Host>
    1458             : #endif
    1459             :     BaseFab<T>& minus (const BaseFab<T>& src) noexcept;
    1460             :     //
    1461             : #if defined(AMREX_USE_GPU)
    1462             :     template <RunOn run_on>
    1463             : #else
    1464             :     template <RunOn run_on=RunOn::Host>
    1465             : #endif
    1466             :     BaseFab<T>& operator-= (const BaseFab<T>& src) noexcept;
    1467             :     //
    1468             :     //! Do nothing if bx does not intersect with src fab.
    1469             : #if defined(AMREX_USE_GPU)
    1470             :     template <RunOn run_on>
    1471             : #else
    1472             :     template <RunOn run_on=RunOn::Host>
    1473             : #endif
    1474             :     BaseFab<T>& minus (const BaseFab<T>& src, Box bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept;
    1475             : 
    1476             :     //! Scalar multiplication on the whole domain and all components
    1477             : #if defined(AMREX_USE_GPU)
    1478             :     template <RunOn run_on>
    1479             : #else
    1480             :     template <RunOn run_on=RunOn::Host>
    1481             : #endif
    1482             :     BaseFab<T>& mult (T const& val) noexcept;
    1483             :     //
    1484             : #if defined(AMREX_USE_GPU)
    1485             :     template <RunOn run_on>
    1486             : #else
    1487             :     template <RunOn run_on=RunOn::Host>
    1488             : #endif
    1489             :     BaseFab<T>& operator*= (T const& val) noexcept;
    1490             :     //
    1491             :     //! Do nothing if bx is empty.
    1492             : #if defined(AMREX_USE_GPU)
    1493             :     template <RunOn run_on>
    1494             : #else
    1495             :     template <RunOn run_on=RunOn::Host>
    1496             : #endif
    1497             :     BaseFab<T>& mult (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
    1498             :     /**
    1499             :     * Fab multiplication is performed on the intersection of dest and src fabs.
    1500             :     * All components of dest fab are copied. src fab must have enough
    1501             :     * components (more is OK).
    1502             :     */
    1503             : #if defined(AMREX_USE_GPU)
    1504             :     template <RunOn run_on>
    1505             : #else
    1506             :     template <RunOn run_on=RunOn::Host>
    1507             : #endif
    1508             :     BaseFab<T>& mult (const BaseFab<T>& src) noexcept;
    1509             :     //
    1510             : #if defined(AMREX_USE_GPU)
    1511             :     template <RunOn run_on>
    1512             : #else
    1513             :     template <RunOn run_on=RunOn::Host>
    1514             : #endif
    1515             :     BaseFab<T>& operator*= (const BaseFab<T>& src) noexcept;
    1516             :     //
    1517             :     //! Do nothing if bx does not intersect with src fab.
    1518             : #if defined(AMREX_USE_GPU)
    1519             :     template <RunOn run_on>
    1520             : #else
    1521             :     template <RunOn run_on=RunOn::Host>
    1522             : #endif
    1523             :     BaseFab<T>& mult (const BaseFab<T>& src, Box bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept;
    1524             : 
    1525             :     //! Scalar division on the whole domain and all components
    1526             : #if defined(AMREX_USE_GPU)
    1527             :     template <RunOn run_on>
    1528             : #else
    1529             :     template <RunOn run_on=RunOn::Host>
    1530             : #endif
    1531             :     BaseFab<T>& divide (T const& val) noexcept;
    1532             :     //
    1533             : #if defined(AMREX_USE_GPU)
    1534             :     template <RunOn run_on>
    1535             : #else
    1536             :     template <RunOn run_on=RunOn::Host>
    1537             : #endif
    1538             :     BaseFab<T>& operator/= (T const& val) noexcept;
    1539             :     //
    1540             :     //! Do nothing if bx is empty.
    1541             : #if defined(AMREX_USE_GPU)
    1542             :     template <RunOn run_on>
    1543             : #else
    1544             :     template <RunOn run_on=RunOn::Host>
    1545             : #endif
    1546             :     BaseFab<T>& divide (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
    1547             :     /**
    1548             :     * Fab division is performed on the intersection of dest and src fabs.
    1549             :     * All components of dest fab are copied. src fab must have enough
    1550             :     * components (more is OK).
    1551             :     */
    1552             : #if defined(AMREX_USE_GPU)
    1553             :     template <RunOn run_on>
    1554             : #else
    1555             :     template <RunOn run_on=RunOn::Host>
    1556             : #endif
    1557             :     BaseFab<T>& divide (const BaseFab<T>& src) noexcept;
    1558             :     //
    1559             : #if defined(AMREX_USE_GPU)
    1560             :     template <RunOn run_on>
    1561             : #else
    1562             :     template <RunOn run_on=RunOn::Host>
    1563             : #endif
    1564             :     BaseFab<T>& operator/= (const BaseFab<T>& src) noexcept;
    1565             :     //
    1566             :     //! Do nothing if bx does not intersect with src fab.
    1567             : #if defined(AMREX_USE_GPU)
    1568             :     template <RunOn run_on>
    1569             : #else
    1570             :     template <RunOn run_on=RunOn::Host>
    1571             : #endif
    1572             :     BaseFab<T>& divide (const BaseFab<T>& src, Box bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept;
    1573             : 
    1574             :     //! on the whole domain and all components
    1575             : #if defined(AMREX_USE_GPU)
    1576             :     template <RunOn run_on>
    1577             : #else
    1578             :     template <RunOn run_on=RunOn::Host>
    1579             : #endif
    1580             :     BaseFab<T>& negate () noexcept;
    1581             :     //
    1582             : #if defined(AMREX_USE_GPU)
    1583             :     template <RunOn run_on>
    1584             : #else
    1585             :     template <RunOn run_on=RunOn::Host>
    1586             : #endif
    1587             :     BaseFab<T>& negate (const Box& bx, DestComp dcomp, NumComps ncomp) noexcept;
    1588             : 
    1589             :     //! Fab <- Fab/r on the whole domain and all components
    1590             : #if defined(AMREX_USE_GPU)
    1591             :     template <RunOn run_on>
    1592             : #else
    1593             :     template <RunOn run_on=RunOn::Host>
    1594             : #endif
    1595             :     BaseFab<T>& invert (T const& r) noexcept;
    1596             :     //
    1597             : #if defined(AMREX_USE_GPU)
    1598             :     template <RunOn run_on>
    1599             : #else
    1600             :     template <RunOn run_on=RunOn::Host>
    1601             : #endif
    1602             :     BaseFab<T>& invert (T const& r, const Box& bx, DestComp dcomp, NumComps ncomp) noexcept;
    1603             : 
    1604             :     //! Sum
    1605             : #if defined(AMREX_USE_GPU)
    1606             :     template <RunOn run_on>
    1607             : #else
    1608             :     template <RunOn run_on=RunOn::Host>
    1609             : #endif
    1610             :     [[nodiscard]] T sum (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept;
    1611             : 
    1612             :     //! Dot product of two Fabs
    1613             : #if defined(AMREX_USE_GPU)
    1614             :     template <RunOn run_on>
    1615             : #else
    1616             :     template <RunOn run_on=RunOn::Host>
    1617             : #endif
    1618             :     [[nodiscard]] T dot (const BaseFab<T>& src, const Box& bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) const noexcept;
    1619             : 
    1620             :     //! Int wrapper for dot.
    1621             : #if defined(AMREX_USE_GPU)
    1622             :     template <RunOn run_on>
    1623             : #else
    1624             :     template <RunOn run_on=RunOn::Host>
    1625             : #endif
    1626             :     [[nodiscard]] T dot (const Box& bx, int destcomp, int numcomp) const noexcept;
    1627             : 
    1628             :     //! Dot product
    1629             : #if defined(AMREX_USE_GPU)
    1630             :     template <RunOn run_on>
    1631             : #else
    1632             :     template <RunOn run_on=RunOn::Host>
    1633             : #endif
    1634             :     [[nodiscard]] T dot (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept;
    1635             : 
    1636             :     //! Dot product of two Fabs with mask
    1637             : #if defined(AMREX_USE_GPU)
    1638             :     template <RunOn run_on>
    1639             : #else
    1640             :     template <RunOn run_on=RunOn::Host>
    1641             : #endif
    1642             :     [[nodiscard]] T dotmask (const BaseFab<T>& src, const Box& bx, const BaseFab<int>& mask,
    1643             :                SrcComp scomp, DestComp dcomp, NumComps ncomp) const noexcept;
    1644             : 
    1645             : protected:
    1646             :     //! Allocates memory for the BaseFab<T>.
    1647             :     void define ();
    1648             : 
    1649             :     T*   dptr = nullptr;        //!< The data pointer.
    1650             :     Box  domain;                //!< My index space.
    1651             :     int  nvar = 0;              //!< Number components.
    1652             :     Long truesize = 0L;         //!< nvar*numpts that was allocated on heap.
    1653             :     bool ptr_owner = false;     //!< Owner of T*?
    1654             :     bool shared_memory = false; //!< Is the memory allocated in shared memory?
    1655             : #ifdef AMREX_USE_GPU
    1656             :     gpuStream_t alloc_stream{};
    1657             : #endif
    1658             : };
    1659             : 
    1660             : template <class T>
    1661             : AMREX_FORCE_INLINE
    1662             : T*
    1663             : BaseFab<T>::dataPtr (const IntVect& p, int n) noexcept
    1664             : {
    1665             :     AMREX_ASSERT(n >= 0);
    1666             :     AMREX_ASSERT(n < this->nvar);
    1667             :     AMREX_ASSERT(!(this->dptr == nullptr));
    1668             :     AMREX_ASSERT(this->domain.contains(p));
    1669             : 
    1670             :     return this->dptr + (this->domain.index(p)+n*this->domain.numPts());
    1671             : }
    1672             : 
    1673             : template <class T>
    1674             : AMREX_FORCE_INLINE
    1675             : const T*
    1676             : BaseFab<T>::dataPtr (const IntVect& p, int n) const noexcept
    1677             : {
    1678             :     AMREX_ASSERT(n >= 0);
    1679             :     AMREX_ASSERT(n < this->nvar);
    1680             :     AMREX_ASSERT(!(this->dptr == nullptr));
    1681             :     AMREX_ASSERT(this->domain.contains(p));
    1682             : 
    1683             :     return this->dptr + (this->domain.index(p)+n*this->domain.numPts());
    1684             : }
    1685             : 
    1686             : template <class T>
    1687             : void
    1688             : BaseFab<T>::prefetchToHost () const noexcept
    1689             : {
    1690             : #ifdef AMREX_USE_GPU
    1691             :     if (this->arena()->isManaged()) {
    1692             : #if defined(AMREX_USE_SYCL)
    1693             :         // xxxxx SYCL todo: prefetchToHost
    1694             :         // std::size_t s = sizeof(T)*this->nvar*this->domain.numPts();
    1695             :         // auto& q = Gpu::Device::streamQueue();
    1696             :         // q.submit([&] (sycl::handler& h) { h.prefetch(this->dptr, s); });
    1697             : #elif defined(AMREX_USE_CUDA) && !defined(_WIN32)
    1698             :         if (Gpu::Device::devicePropMajor() >= 6) {
    1699             :             std::size_t s = sizeof(T)*this->nvar*this->domain.numPts();
    1700             :             AMREX_CUDA_SAFE_CALL(cudaMemPrefetchAsync(this->dptr, s,
    1701             :                                                       cudaCpuDeviceId,
    1702             :                                                       Gpu::gpuStream()));
    1703             :         }
    1704             : #elif defined(AMREX_USE_HIP)
    1705             :         // xxxxx HIP FIX HERE after managed memory is supported
    1706             : #endif
    1707             :     }
    1708             : #endif
    1709             : }
    1710             : 
    1711             : template <class T>
    1712             : void
    1713             : BaseFab<T>::prefetchToDevice () const noexcept
    1714             : {
    1715             : #ifdef AMREX_USE_GPU
    1716             :     if (this->arena()->isManaged()) {
    1717             : #if defined(AMREX_USE_SYCL)
    1718             :         std::size_t s = sizeof(T)*this->nvar*this->domain.numPts();
    1719             :         auto& q = Gpu::Device::streamQueue();
    1720             :         q.submit([&] (sycl::handler& h) { h.prefetch(this->dptr, s); });
    1721             : #elif defined(AMREX_USE_CUDA) && !defined(_WIN32)
    1722             :         if (Gpu::Device::devicePropMajor() >= 6) {
    1723             :             std::size_t s = sizeof(T)*this->nvar*this->domain.numPts();
    1724             :             AMREX_CUDA_SAFE_CALL(cudaMemPrefetchAsync(this->dptr, s,
    1725             :                                                       Gpu::Device::deviceId(),
    1726             :                                                       Gpu::gpuStream()));
    1727             :         }
    1728             : #elif defined(AMREX_USE_HIP)
    1729             :         // xxxxx HIP FIX HERE after managed memory is supported
    1730             : #endif
    1731             :     }
    1732             : #endif
    1733             : }
    1734             : 
    1735             : template <class T>
    1736             : AMREX_FORCE_INLINE
    1737             : T&
    1738             : BaseFab<T>::operator() (const IntVect& p, int n) noexcept
    1739             : {
    1740             :     AMREX_ASSERT(n >= 0);
    1741             :     AMREX_ASSERT(n < this->nvar);
    1742             :     AMREX_ASSERT(!(this->dptr == nullptr));
    1743             :     AMREX_ASSERT(this->domain.contains(p));
    1744             : 
    1745    61983700 :     return this->dptr[this->domain.index(p)+n*this->domain.numPts()];
    1746             : }
    1747             : 
    1748             : template <class T>
    1749             : AMREX_FORCE_INLINE
    1750             : T&
    1751             : BaseFab<T>::operator() (const IntVect& p) noexcept
    1752             : {
    1753             :     AMREX_ASSERT(!(this->dptr == nullptr));
    1754             :     AMREX_ASSERT(this->domain.contains(p));
    1755             : 
    1756             :     return this->dptr[this->domain.index(p)];
    1757             : }
    1758             : 
    1759             : template <class T>
    1760             : AMREX_FORCE_INLINE
    1761             : const T&
    1762             : BaseFab<T>::operator() (const IntVect& p, int n) const noexcept
    1763             : {
    1764             :     AMREX_ASSERT(n >= 0);
    1765             :     AMREX_ASSERT(n < this->nvar);
    1766             :     AMREX_ASSERT(!(this->dptr == nullptr));
    1767             :     AMREX_ASSERT(this->domain.contains(p));
    1768             : 
    1769    85867800 :     return this->dptr[this->domain.index(p)+n*this->domain.numPts()];
    1770             : }
    1771             : 
    1772             : template <class T>
    1773             : AMREX_FORCE_INLINE
    1774             : const T&
    1775             : BaseFab<T>::operator() (const IntVect& p) const noexcept
    1776             : {
    1777             :     AMREX_ASSERT(!(this->dptr == nullptr));
    1778             :     AMREX_ASSERT(this->domain.contains(p));
    1779             : 
    1780             :     return this->dptr[this->domain.index(p)];
    1781             : }
    1782             : 
    1783             : template <class T>
    1784             : void
    1785             : BaseFab<T>::getVal  (T*             data,
    1786             :                      const IntVect& pos,
    1787             :                      int            n,
    1788             :                      int            numcomp) const noexcept
    1789             : {
    1790             :     const int loc      = this->domain.index(pos);
    1791             :     const Long sz    = this->domain.numPts();
    1792             : 
    1793             :     AMREX_ASSERT(!(this->dptr == nullptr));
    1794             :     AMREX_ASSERT(n >= 0 && n + numcomp <= this->nvar);
    1795             : 
    1796             :     for (int k = 0; k < numcomp; k++) {
    1797             :         data[k] = this->dptr[loc+(n+k)*sz];
    1798             :     }
    1799             : }
    1800             : 
    1801             : template <class T>
    1802             : void
    1803             : BaseFab<T>::getVal (T*             data,
    1804             :                     const IntVect& pos) const noexcept
    1805             : {
    1806             :     getVal(data,pos,0,this->nvar);
    1807             : }
    1808             : 
    1809             : template <class T>
    1810             : BaseFab<T>&
    1811             : BaseFab<T>::shift (const IntVect& v) noexcept
    1812             : {
    1813             :     this->domain += v;
    1814             :     return *this;
    1815             : }
    1816             : 
    1817             : template <class T>
    1818             : BaseFab<T>&
    1819             : BaseFab<T>::shift (int idir, int n_cell) noexcept
    1820             : {
    1821             :     this->domain.shift(idir,n_cell);
    1822             :     return *this;
    1823             : }
    1824             : 
    1825             : template <class T>
    1826             : BaseFab<T> &
    1827             : BaseFab<T>::shiftHalf (const IntVect& v) noexcept
    1828             : {
    1829             :     this->domain.shiftHalf(v);
    1830             :     return *this;
    1831             : }
    1832             : 
    1833             : template <class T>
    1834             : BaseFab<T> &
    1835             : BaseFab<T>::shiftHalf (int idir, int n_cell) noexcept
    1836             : {
    1837             :     this->domain.shiftHalf(idir,n_cell);
    1838             :     return *this;
    1839             : }
    1840             : 
    1841             : template <class T>
    1842             : template <RunOn run_on, class U,
    1843             :           std::enable_if_t<std::is_same_v<U,float> || std::is_same_v<U,double>, int> FOO>
    1844             : void
    1845             : BaseFab<T>::fill_snan () noexcept
    1846             : {
    1847             :     amrex::fill_snan<run_on>(this->dptr, this->truesize);
    1848             : }
    1849             : 
    1850             : template <class T>
    1851             : template <RunOn run_on>
    1852             : void
    1853             : BaseFab<T>::setVal (T const& x, const Box& bx, int n) noexcept
    1854             : {
    1855             :     this->setVal<run_on>(x, bx, DestComp{n}, NumComps{1});
    1856             : }
    1857             : 
    1858             : template <class T>
    1859             : template <RunOn run_on>
    1860             : void
    1861             : BaseFab<T>::setVal (T const& x, int n) noexcept
    1862             : {
    1863             :     this->setVal<run_on>(x, this->domain, DestComp{n}, NumComps{1});
    1864             : }
    1865             : 
    1866             : template <class T>
    1867             : template <RunOn run_on>
    1868             : void
    1869           0 : BaseFab<T>::setVal (T const& x, const Box& bx, int dcomp, int ncomp) noexcept
    1870             : {
    1871           0 :     this->setVal<run_on>(x, bx, DestComp{dcomp}, NumComps{ncomp});
    1872           0 : }
    1873             : 
    1874             : template <class T>
    1875             : template <RunOn run_on>
    1876             : void
    1877             : BaseFab<T>::setValIfNot (T const& val, const Box& bx, const BaseFab<int>& mask, int ns, int num) noexcept
    1878             : {
    1879             :     this->setValIfNot<run_on>(val, bx, mask, DestComp{ns}, NumComps{num});
    1880             : }
    1881             : 
    1882             : template <class T>
    1883             : template <RunOn run_on>
    1884             : BaseFab<T>&
    1885       13574 : BaseFab<T>::copy (const BaseFab<T>& src, const Box& srcbox, int srccomp,
    1886             :                   const Box& destbox, int destcomp, int numcomp) noexcept
    1887             : {
    1888             :     AMREX_ASSERT(destbox.ok());
    1889             :     AMREX_ASSERT(srcbox.sameSize(destbox));
    1890             :     AMREX_ASSERT(src.box().contains(srcbox));
    1891             :     AMREX_ASSERT(this->domain.contains(destbox));
    1892             :     AMREX_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
    1893             :     AMREX_ASSERT(destcomp >= 0 && destcomp+numcomp <= this->nvar);
    1894             : 
    1895       13574 :     Array4<T> const& d = this->array();
    1896       13574 :     Array4<T const> const& s = src.const_array();
    1897             :     const auto dlo = amrex::lbound(destbox);
    1898             :     const auto slo = amrex::lbound(srcbox);
    1899       13574 :     const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
    1900             : 
    1901     8241854 :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
    1902             :     {
    1903             :         d(i,j,k,n+destcomp) = s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
    1904             :     });
    1905             : 
    1906       13574 :     return *this;
    1907             : }
    1908             : 
    1909             : template <class T>
    1910             : template <RunOn run_on>
    1911             : BaseFab<T>&
    1912             : BaseFab<T>::copy (const BaseFab<T>& src, const Box& destbox) noexcept
    1913             : {
    1914             :     return this->copy<run_on>(src, destbox, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
    1915             : }
    1916             : 
    1917             : template <class T>
    1918             : template <RunOn run_on>
    1919             : BaseFab<T>&
    1920             : BaseFab<T>::copy (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
    1921             : {
    1922             :     return copy<run_on>(src, this->domain, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
    1923             : }
    1924             : 
    1925             : template <class T>
    1926             : void
    1927        3315 : BaseFab<T>::define ()
    1928             : {
    1929             :     AMREX_ASSERT(this->dptr == nullptr);
    1930             :     AMREX_ASSERT(this->domain.numPts() > 0);
    1931             :     AMREX_ASSERT(this->nvar >= 0);
    1932        3315 :     if (this->nvar == 0) { return; }
    1933             :     AMREX_ASSERT(std::numeric_limits<Long>::max()/this->nvar > this->domain.numPts());
    1934             : 
    1935        3315 :     this->truesize  = this->nvar*this->domain.numPts();
    1936        3315 :     this->ptr_owner = true;
    1937        3315 :     this->dptr = static_cast<T*>(this->alloc(this->truesize*sizeof(T)));
    1938             : #ifdef AMREX_USE_GPU
    1939             :     this->alloc_stream = Gpu::gpuStream();
    1940             : #endif
    1941             : 
    1942        3315 :     placementNew(this->dptr, this->truesize);
    1943             : 
    1944        3315 :     amrex::update_fab_stats(this->domain.numPts(), this->truesize, sizeof(T));
    1945             : 
    1946             :     if constexpr (std::is_same_v<T,float> || std::is_same_v<T,double>) {
    1947             :         if (amrex::InitSNaN() && this->truesize > 0) {
    1948             : #ifdef AMREX_USE_GPU
    1949             :             if (Gpu::inLaunchRegion() && arena()->isDeviceAccessible()) {
    1950             :                 this->template fill_snan<RunOn::Device>();
    1951             :                 Gpu::streamSynchronize();
    1952             :             } else
    1953             : #endif
    1954             :             {
    1955             :                 this->template fill_snan<RunOn::Host>();
    1956             :             }
    1957             :         }
    1958             :     }
    1959             : }
    1960             : 
    1961             : template <class T>
    1962             : BaseFab<T>::BaseFab (Arena* ar) noexcept
    1963             :     : DataAllocator{ar}
    1964             : {}
    1965             : 
    1966             : template <class T>
    1967             : BaseFab<T>::BaseFab (const Box& bx, int n, Arena* ar)
    1968             :     : DataAllocator{ar}, domain(bx), nvar(n)
    1969             : {
    1970             :     define();
    1971             : }
    1972             : 
    1973             : template <class T>
    1974        3315 : BaseFab<T>::BaseFab (const Box& bx, int n, bool alloc, bool shared, Arena* ar)
    1975        3315 :     : DataAllocator{ar}, domain(bx), nvar(n), shared_memory(shared)
    1976             : {
    1977        3315 :     if (!this->shared_memory && alloc) { define(); }
    1978        3315 : }
    1979             : 
    1980             : template <class T>
    1981           0 : BaseFab<T>::BaseFab (const BaseFab<T>& rhs, MakeType make_type, int scomp, int ncomp)
    1982             :     : DataAllocator{rhs.arena()},
    1983           0 :       dptr(const_cast<T*>(rhs.dataPtr(scomp))),
    1984             :       domain(rhs.domain), nvar(ncomp),
    1985           0 :       truesize(ncomp*rhs.domain.numPts())
    1986             : {
    1987             :     AMREX_ASSERT(scomp+ncomp <= rhs.nComp());
    1988           0 :     if (make_type == amrex::make_deep_copy)
    1989             :     {
    1990           0 :         this->dptr = nullptr;
    1991           0 :         define();
    1992           0 :         this->copy<RunOn::Device>(rhs, this->domain, scomp, this->domain, 0, ncomp);
    1993           0 :     } else if (make_type == amrex::make_alias) {
    1994             :         ; // nothing to do
    1995             :     } else {
    1996             :         amrex::Abort("BaseFab: unknown MakeType");
    1997             :     }
    1998           0 : }
    1999             : 
    2000             : template<class T>
    2001             : BaseFab<T>::BaseFab (const Box& bx, int ncomp, T* p)
    2002             :     : dptr(p), domain(bx), nvar(ncomp), truesize(bx.numPts()*ncomp)
    2003             : {
    2004             : }
    2005             : 
    2006             : template<class T>
    2007             : BaseFab<T>::BaseFab (const Box& bx, int ncomp, T const* p)
    2008             :     : dptr(const_cast<T*>(p)), domain(bx), nvar(ncomp), truesize(bx.numPts()*ncomp)
    2009             : {
    2010             : }
    2011             : 
    2012             : template<class T>
    2013             : BaseFab<T>::BaseFab (Array4<T> const& a) noexcept
    2014             :     : dptr(a.p),
    2015             :       domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
    2016             :              IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1))),
    2017             :       nvar(a.ncomp), truesize(a.ncomp*a.nstride)
    2018             : {}
    2019             : 
    2020             : template<class T>
    2021             : BaseFab<T>::BaseFab (Array4<T> const& a, IndexType t) noexcept
    2022             :     : dptr(a.p),
    2023             :       domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
    2024             :              IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1)), t),
    2025             :       nvar(a.ncomp), truesize(a.ncomp*a.nstride)
    2026             : {}
    2027             : 
    2028             : template<class T>
    2029             : BaseFab<T>::BaseFab (Array4<T const> const& a) noexcept
    2030             :     : dptr(const_cast<T*>(a.p)),
    2031             :       domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
    2032             :              IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1))),
    2033             :       nvar(a.ncomp), truesize(a.ncomp*a.nstride)
    2034             : {}
    2035             : 
    2036             : template<class T>
    2037             : BaseFab<T>::BaseFab (Array4<T const> const& a, IndexType t) noexcept
    2038             :     : dptr(const_cast<T*>(a.p)),
    2039             :       domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
    2040             :              IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1)), t),
    2041             :       nvar(a.ncomp), truesize(a.ncomp*a.nstride)
    2042             : {}
    2043             : 
    2044             : template <class T>
    2045      413071 : BaseFab<T>::~BaseFab () noexcept
    2046             : {
    2047      409756 :     clear();
    2048      413071 : }
    2049             : 
    2050             : template <class T>
    2051             : BaseFab<T>::BaseFab (BaseFab<T>&& rhs) noexcept
    2052             :     : DataAllocator{rhs.arena()},
    2053             :       dptr(rhs.dptr), domain(rhs.domain),
    2054             :       nvar(rhs.nvar), truesize(rhs.truesize),
    2055             :       ptr_owner(rhs.ptr_owner), shared_memory(rhs.shared_memory)
    2056             : #ifdef AMREX_USE_GPU
    2057             :       , alloc_stream(rhs.alloc_stream)
    2058             : #endif
    2059             : {
    2060             :     rhs.dptr = nullptr;
    2061             :     rhs.ptr_owner = false;
    2062             : }
    2063             : 
    2064             : template <class T>
    2065             : BaseFab<T>&
    2066             : BaseFab<T>::operator= (BaseFab<T>&& rhs) noexcept
    2067             : {
    2068             :     if (this != &rhs) {
    2069             :         clear();
    2070             :         DataAllocator::operator=(rhs);
    2071             :         dptr = rhs.dptr;
    2072             :         domain = rhs.domain;
    2073             :         nvar = rhs.nvar;
    2074             :         truesize = rhs.truesize;
    2075             :         ptr_owner = rhs.ptr_owner;
    2076             :         shared_memory = rhs.shared_memory;
    2077             : #ifdef AMREX_USE_GPU
    2078             :         alloc_stream = rhs.alloc_stream;
    2079             : #endif
    2080             : 
    2081             :         rhs.dptr = nullptr;
    2082             :         rhs.ptr_owner = false;
    2083             :     }
    2084             :     return *this;
    2085             : }
    2086             : 
    2087             : template <class T>
    2088             : template <RunOn run_on>
    2089             : BaseFab<T>&
    2090             : BaseFab<T>::operator= (T const& t) noexcept
    2091             : {
    2092             :     setVal<run_on>(t);
    2093             :     return *this;
    2094             : }
    2095             : 
    2096             : template <class T>
    2097             : void
    2098             : BaseFab<T>::resize (const Box& b, int n, Arena* ar)
    2099             : {
    2100             :     this->nvar   = n;
    2101             :     this->domain = b;
    2102             : 
    2103             :     if (ar == nullptr) {
    2104             :         ar = m_arena;
    2105             :     }
    2106             : 
    2107             :     if (arena() != DataAllocator(ar).arena()) {
    2108             :         clear();
    2109             :         m_arena = ar;
    2110             :         define();
    2111             :     }
    2112             :     else if (this->dptr == nullptr || !this->ptr_owner)
    2113             :     {
    2114             :         if (this->shared_memory) {
    2115             :             amrex::Abort("BaseFab::resize: BaseFab in shared memory cannot increase size");
    2116             :         }
    2117             : 
    2118             :         this->dptr = nullptr;
    2119             :         define();
    2120             :     }
    2121             :     else if (this->nvar*this->domain.numPts() > this->truesize
    2122             : #ifdef AMREX_USE_GPU
    2123             :              || (arena()->isStreamOrderedArena() && alloc_stream != Gpu::gpuStream())
    2124             : #endif
    2125             :              )
    2126             :     {
    2127             :         if (this->shared_memory) {
    2128             :             amrex::Abort("BaseFab::resize: BaseFab in shared memory cannot increase size");
    2129             :         }
    2130             : 
    2131             :         clear();
    2132             : 
    2133             :         define();
    2134             :     }
    2135             : }
    2136             : 
    2137             : template <class T>
    2138             : template <class U, std::enable_if_t<std::is_trivially_destructible_v<U>,int>>
    2139             : Elixir
    2140             : BaseFab<T>::elixir () noexcept
    2141             : {
    2142             :     bool o;
    2143             :     if (Gpu::inLaunchRegion()) {
    2144             :         o = this->ptr_owner;
    2145             :         this->ptr_owner = false;
    2146             :         if (o && this->dptr) {
    2147             :             if (this->nvar > 1) {
    2148             :                 amrex::update_fab_stats(-this->truesize/this->nvar, -this->truesize, sizeof(T));
    2149             :             } else {
    2150             :                 amrex::update_fab_stats(0, -this->truesize, sizeof(T));
    2151             :             }
    2152             :         }
    2153             :     } else {
    2154             :         o = false;
    2155             :     }
    2156             :     return Elixir((o ? this->dptr : nullptr), this->arena());
    2157             : }
    2158             : 
    2159             : template <class T>
    2160             : void
    2161      409827 : BaseFab<T>::clear () noexcept
    2162             : {
    2163      409827 :     if (this->dptr)
    2164             :     {
    2165             :         //
    2166             :         // Call T::~T() on the to-be-destroyed memory.
    2167             :         //
    2168      409756 :         if (this->ptr_owner)
    2169             :         {
    2170      409456 :             if (this->shared_memory)
    2171             :             {
    2172             :                 amrex::Abort("BaseFab::clear: BaseFab cannot be owner of shared memory");
    2173             :             }
    2174             : 
    2175      409456 :             placementDelete(this->dptr, this->truesize);
    2176             : 
    2177             : #ifdef AMREX_USE_GPU
    2178             :             auto current_stream = Gpu::Device::gpuStream();
    2179             :             Gpu::Device::setStream(alloc_stream);
    2180             : #endif
    2181      409456 :             this->free(this->dptr);
    2182             : #ifdef AMREX_USE_GPU
    2183             :             Gpu::Device::setStream(current_stream);
    2184             : #endif
    2185             : 
    2186      409456 :             if (this->nvar > 1) {
    2187      402374 :                 amrex::update_fab_stats(-this->truesize/this->nvar, -this->truesize, sizeof(T));
    2188             :             } else {
    2189        7082 :                 amrex::update_fab_stats(0, -this->truesize, sizeof(T));
    2190             :             }
    2191             :         }
    2192             : 
    2193      409756 :         this->dptr = nullptr;
    2194      409756 :         this->truesize = 0;
    2195             :     }
    2196      409827 : }
    2197             : 
    2198             : template <class T>
    2199             : std::unique_ptr<T,DataDeleter>
    2200             : BaseFab<T>::release () noexcept
    2201             : {
    2202             :     std::unique_ptr<T,DataDeleter> r(nullptr, DataDeleter{this->arena()});
    2203             :     if (this->dptr && this->ptr_owner) {
    2204             :         r.reset(this->dptr);
    2205             :         this->ptr_owner = false;
    2206             :         if (this->nvar > 1) {
    2207             :             amrex::update_fab_stats(-this->truesize/this->nvar, -this->truesize, sizeof(T));
    2208             :         } else {
    2209             :             amrex::update_fab_stats(0, -this->truesize, sizeof(T));
    2210             :         }
    2211             :     }
    2212             :     return r;
    2213             : }
    2214             : 
    2215             : template <class T>
    2216             : template <RunOn run_on>
    2217             : std::size_t
    2218             : BaseFab<T>::copyToMem (const Box& srcbox,
    2219             :                        int        srccomp,
    2220             :                        int        numcomp,
    2221             :                        void*      dst) const noexcept
    2222             : {
    2223             :     BL_ASSERT(box().contains(srcbox));
    2224             :     BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= nComp());
    2225             : 
    2226             :     if (srcbox.ok())
    2227             :     {
    2228             :         Array4<T> d(static_cast<T*>(dst),amrex::begin(srcbox),amrex::end(srcbox),numcomp);
    2229             :         Array4<T const> const& s = this->const_array();
    2230             :         AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, srcbox, numcomp, i, j, k, n,
    2231             :         {
    2232             :             d(i,j,k,n) = s(i,j,k,n+srccomp);
    2233             :         });
    2234             :         return sizeof(T)*d.size();
    2235             :     }
    2236             :     else
    2237             :     {
    2238             :         return 0;
    2239             :     }
    2240             : }
    2241             : 
    2242             : template <class T>
    2243             : template <RunOn run_on, typename BUF>
    2244             : std::size_t
    2245           0 : BaseFab<T>::copyFromMem (const Box&  dstbox,
    2246             :                          int         dstcomp,
    2247             :                          int         numcomp,
    2248             :                          const void* src) noexcept
    2249             : {
    2250             :     BL_ASSERT(box().contains(dstbox));
    2251             :     BL_ASSERT(dstcomp >= 0 && dstcomp+numcomp <= nComp());
    2252             : 
    2253           0 :     if (dstbox.ok())
    2254             :     {
    2255           0 :         Array4<BUF const> s(static_cast<BUF const*>(src), amrex::begin(dstbox),
    2256           0 :                             amrex::end(dstbox), numcomp);
    2257           0 :         Array4<T> const& d = this->array();
    2258           0 :         AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, dstbox, numcomp, i, j, k, n,
    2259             :         {
    2260             :             d(i,j,k,n+dstcomp) = static_cast<T>(s(i,j,k,n));
    2261             :         });
    2262           0 :         return sizeof(BUF)*s.size();
    2263             :     }
    2264             :     else
    2265             :     {
    2266           0 :         return 0;
    2267             :     }
    2268             : }
    2269             : 
    2270             : template <class T>
    2271             : template <RunOn run_on, typename BUF>
    2272             : std::size_t
    2273           0 : BaseFab<T>::addFromMem (const Box&  dstbox,
    2274             :                         int         dstcomp,
    2275             :                         int         numcomp,
    2276             :                         const void* src) noexcept
    2277             : {
    2278             :     BL_ASSERT(box().contains(dstbox));
    2279             :     BL_ASSERT(dstcomp >= 0 && dstcomp+numcomp <= nComp());
    2280             : 
    2281           0 :     if (dstbox.ok())
    2282             :     {
    2283           0 :         Array4<BUF const> s(static_cast<BUF const*>(src), amrex::begin(dstbox),
    2284           0 :                             amrex::end(dstbox), numcomp);
    2285           0 :         Array4<T> const& d = this->array();
    2286           0 :         AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, dstbox, numcomp, i, j, k, n,
    2287             :         {
    2288             :             d(i,j,k,n+dstcomp) += static_cast<T>(s(i,j,k,n));
    2289             :         });
    2290           0 :         return sizeof(BUF)*s.size();
    2291             :     }
    2292             :     else
    2293             :     {
    2294           0 :         return 0;
    2295             :     }
    2296             : }
    2297             : 
    2298             : template <class T>
    2299             : template <RunOn run_on>
    2300             : void
    2301         216 : BaseFab<T>::setComplement (T const& x, const Box& b, int ns, int num) noexcept
    2302             : {
    2303         216 :     this->setComplement<run_on>(x, b, DestComp{ns}, NumComps{num});
    2304         216 : }
    2305             : 
    2306             : template <class T>
    2307             : template <RunOn run_on>
    2308             : void
    2309             : BaseFab<T>::abs () noexcept
    2310             : {
    2311             :     this->abs<run_on>(this->domain,0,this->nvar);
    2312             : }
    2313             : 
    2314             : template <class T>
    2315             : template <RunOn run_on>
    2316             : void
    2317             : BaseFab<T>::abs (int comp, int numcomp) noexcept
    2318             : {
    2319             :     this->abs<run_on>(this->domain,comp,numcomp);
    2320             : }
    2321             : 
    2322             : template <class T>
    2323             : template <RunOn run_on>
    2324             : void
    2325             : BaseFab<T>::abs (const Box& subbox, int comp, int numcomp) noexcept
    2326             : {
    2327             :     Array4<T> const& a = this->array();
    2328             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG (run_on, subbox, numcomp, i, j, k, n,
    2329             :     {
    2330             :         a(i,j,k,n+comp) = std::abs(a(i,j,k,n+comp));
    2331             :     });
    2332             : }
    2333             : 
    2334             : template <class T>
    2335             : template <RunOn run_on>
    2336             : Real
    2337             : BaseFab<T>::norminfmask (const Box& subbox, const BaseFab<int>& mask,
    2338             :                          int scomp, int ncomp) const noexcept
    2339             : {
    2340             :     BL_ASSERT(this->domain.contains(subbox));
    2341             :     BL_ASSERT(scomp >= 0 && scomp + ncomp <= this->nvar);
    2342             : 
    2343             :     Array4<T const> const& a = this->const_array();
    2344             :     Array4<int const> const& m = mask.const_array();
    2345             :     Real r = 0.0;
    2346             : #ifdef AMREX_USE_GPU
    2347             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2348             :         ReduceOps<ReduceOpMax> reduce_op;
    2349             :         ReduceData<Real> reduce_data(reduce_op);
    2350             :         using ReduceTuple = ReduceData<Real>::Type;
    2351             :         reduce_op.eval(subbox, reduce_data,
    2352             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2353             :         {
    2354             :             Real t = 0.0;
    2355             :             if (m(i,j,k)) {
    2356             :                 for (int n = 0; n < ncomp; ++n) {
    2357             :                     t = amrex::max(t,static_cast<Real>(std::abs(a(i,j,k,n+scomp))));
    2358             :                 }
    2359             :             }
    2360             :             return {t};
    2361             :         });
    2362             :         ReduceTuple hv = reduce_data.value(reduce_op);
    2363             :         r = amrex::get<0>(hv);
    2364             :     } else
    2365             : #endif
    2366             :     {
    2367             :         amrex::LoopOnCpu(subbox, ncomp, [=,&r] (int i, int j, int k, int n) noexcept
    2368             :         {
    2369             :             if (m(i,j,k)) {
    2370             :                 Real t = static_cast<Real>(std::abs(a(i,j,k,n+scomp)));
    2371             :                 r = amrex::max(r,t);
    2372             :             }
    2373             :         });
    2374             :     }
    2375             :     return r;
    2376             : }
    2377             : 
    2378             : template <class T>
    2379             : template <RunOn run_on>
    2380             : Real
    2381             : BaseFab<T>::norm (int p, int comp, int numcomp) const noexcept
    2382             : {
    2383             :     return norm<run_on>(this->domain,p,comp,numcomp);
    2384             : }
    2385             : 
    2386             : template <class T>
    2387             : template <RunOn run_on>
    2388             : Real
    2389             : BaseFab<T>::norm (const Box& subbox, int p, int comp, int numcomp) const noexcept
    2390             : {
    2391             :     BL_ASSERT(this->domain.contains(subbox));
    2392             :     BL_ASSERT(comp >= 0 && comp + numcomp <= this->nvar);
    2393             : 
    2394             :     Array4<T const> const& a = this->const_array();
    2395             :     Real nrm = 0.;
    2396             : #ifdef AMREX_USE_GPU
    2397             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2398             :         if (p == 0) {
    2399             :             ReduceOps<ReduceOpMax> reduce_op;
    2400             :             ReduceData<Real> reduce_data(reduce_op);
    2401             :             using ReduceTuple = ReduceData<Real>::Type;
    2402             :             reduce_op.eval(subbox, reduce_data,
    2403             :             [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2404             :             {
    2405             :                 Real t = 0.0;
    2406             :                 for (int n = 0; n < numcomp; ++n) {
    2407             :                     t = amrex::max(t, static_cast<Real>(std::abs(a(i,j,k,n+comp))));
    2408             :                 }
    2409             :                 return {t};
    2410             :             });
    2411             :             ReduceTuple hv = reduce_data.value(reduce_op);
    2412             :             nrm = amrex::get<0>(hv);
    2413             :         } else if (p == 1) {
    2414             :             ReduceOps<ReduceOpSum> reduce_op;
    2415             :             ReduceData<Real> reduce_data(reduce_op);
    2416             :             using ReduceTuple = ReduceData<Real>::Type;
    2417             :             reduce_op.eval(subbox, reduce_data,
    2418             :             [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2419             :             {
    2420             :                 Real t = 0.0;
    2421             :                 for (int n = 0; n < numcomp; ++n) {
    2422             :                     t += static_cast<Real>(std::abs(a(i,j,k,n+comp)));
    2423             :                 }
    2424             :                 return {t};
    2425             :             });
    2426             :             ReduceTuple hv = reduce_data.value(reduce_op);
    2427             :             nrm = amrex::get<0>(hv);
    2428             :         } else if (p == 2) {
    2429             :             ReduceOps<ReduceOpSum> reduce_op;
    2430             :             ReduceData<Real> reduce_data(reduce_op);
    2431             :             using ReduceTuple = ReduceData<Real>::Type;
    2432             :             reduce_op.eval(subbox, reduce_data,
    2433             :             [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2434             :             {
    2435             :                 Real t = 0.0;
    2436             :                 for (int n = 0; n < numcomp; ++n) {
    2437             :                     t += static_cast<Real>(a(i,j,k,n+comp)*a(i,j,k,n+comp));
    2438             :                 }
    2439             :                 return {t};
    2440             :             });
    2441             :             ReduceTuple hv = reduce_data.value(reduce_op);
    2442             :             nrm = amrex::get<0>(hv);
    2443             :         } else {
    2444             :             amrex::Error("BaseFab<T>::norm: wrong p");
    2445             :         }
    2446             :     } else
    2447             : #endif
    2448             :     {
    2449             :         if (p == 0) {
    2450             :             amrex::LoopOnCpu(subbox, numcomp, [=,&nrm] (int i, int j, int k, int n) noexcept
    2451             :             {
    2452             :                 Real t = static_cast<Real>(std::abs(a(i,j,k,n+comp)));
    2453             :                 nrm = amrex::max(nrm,t);
    2454             :             });
    2455             :         } else if (p == 1) {
    2456             :             amrex::LoopOnCpu(subbox, numcomp, [=,&nrm] (int i, int j, int k, int n) noexcept
    2457             :             {
    2458             :                 nrm += std::abs(a(i,j,k,n+comp));
    2459             :             });
    2460             :         } else if (p == 2) {
    2461             :             amrex::LoopOnCpu(subbox, numcomp, [=,&nrm] (int i, int j, int k, int n) noexcept
    2462             :             {
    2463             :                 nrm += a(i,j,k,n+comp)*a(i,j,k,n+comp);
    2464             :             });
    2465             :         } else {
    2466             :             amrex::Error("BaseFab<T>::norm: wrong p");
    2467             :         }
    2468             :     }
    2469             : 
    2470             :     return nrm;
    2471             : }
    2472             : 
    2473             : template <class T>
    2474             : template <RunOn run_on>
    2475             : T
    2476             : BaseFab<T>::min (int comp) const noexcept
    2477             : {
    2478             :     return this->min<run_on>(this->domain,comp);
    2479             : }
    2480             : 
    2481             : template <class T>
    2482             : template <RunOn run_on>
    2483             : T
    2484             : BaseFab<T>::min (const Box& subbox, int comp) const noexcept
    2485             : {
    2486             :     Array4<T const> const& a = this->const_array(comp);
    2487             : #ifdef AMREX_USE_GPU
    2488             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2489             :         ReduceOps<ReduceOpMin> reduce_op;
    2490             :         ReduceData<T> reduce_data(reduce_op);
    2491             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    2492             :         reduce_op.eval(subbox, reduce_data,
    2493             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2494             :         {
    2495             :             return { a(i,j,k) };
    2496             :         });
    2497             :         ReduceTuple hv = reduce_data.value(reduce_op);
    2498             :         return amrex::get<0>(hv);
    2499             :     } else
    2500             : #endif
    2501             :     {
    2502             :         T r = std::numeric_limits<T>::max();
    2503             :         amrex::LoopOnCpu(subbox, [=,&r] (int i, int j, int k) noexcept
    2504             :         {
    2505             :             r = amrex::min(r, a(i,j,k));
    2506             :         });
    2507             :         return r;
    2508             :     }
    2509             : }
    2510             : 
    2511             : template <class T>
    2512             : template <RunOn run_on>
    2513             : T
    2514             : BaseFab<T>::max (int comp) const noexcept
    2515             : {
    2516             :     return this->max<run_on>(this->domain,comp);
    2517             : }
    2518             : 
    2519             : template <class T>
    2520             : template <RunOn run_on>
    2521             : T
    2522             : BaseFab<T>::max (const Box& subbox, int comp) const noexcept
    2523             : {
    2524             :     Array4<T const> const& a = this->const_array(comp);
    2525             : #ifdef AMREX_USE_GPU
    2526             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2527             :         ReduceOps<ReduceOpMax> reduce_op;
    2528             :         ReduceData<T> reduce_data(reduce_op);
    2529             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    2530             :         reduce_op.eval(subbox, reduce_data,
    2531             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2532             :         {
    2533             :             return { a(i,j,k) };
    2534             :         });
    2535             :         ReduceTuple hv = reduce_data.value(reduce_op);
    2536             :         return amrex::get<0>(hv);
    2537             :     } else
    2538             : #endif
    2539             :     {
    2540             :         T r = std::numeric_limits<T>::lowest();
    2541             :         amrex::LoopOnCpu(subbox, [=,&r] (int i, int j, int k) noexcept
    2542             :         {
    2543             :             r = amrex::max(r, a(i,j,k));
    2544             :         });
    2545             :         return r;
    2546             :     }
    2547             : }
    2548             : 
    2549             : template <class T>
    2550             : template <RunOn run_on>
    2551             : std::pair<T,T>
    2552             : BaseFab<T>::minmax (int comp) const noexcept
    2553             : {
    2554             :     return this->minmax<run_on>(this->domain,comp);
    2555             : }
    2556             : 
    2557             : template <class T>
    2558             : template <RunOn run_on>
    2559             : std::pair<T,T>
    2560             : BaseFab<T>::minmax (const Box& subbox, int comp) const noexcept
    2561             : {
    2562             :     Array4<T const> const& a = this->const_array(comp);
    2563             : #ifdef AMREX_USE_GPU
    2564             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2565             :         ReduceOps<ReduceOpMin,ReduceOpMax> reduce_op;
    2566             :         ReduceData<T,T> reduce_data(reduce_op);
    2567             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    2568             :         reduce_op.eval(subbox, reduce_data,
    2569             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2570             :         {
    2571             :             auto const x = a(i,j,k);
    2572             :             return { x, x };
    2573             :         });
    2574             :         ReduceTuple hv = reduce_data.value(reduce_op);
    2575             :         return std::make_pair(amrex::get<0>(hv), amrex::get<1>(hv));
    2576             :     } else
    2577             : #endif
    2578             :     {
    2579             :         T rmax = std::numeric_limits<T>::lowest();
    2580             :         T rmin = std::numeric_limits<T>::max();
    2581             :         amrex::LoopOnCpu(subbox, [=,&rmin,&rmax] (int i, int j, int k) noexcept
    2582             :         {
    2583             :             auto const x = a(i,j,k);
    2584             :             rmin = amrex::min(rmin, x);
    2585             :             rmax = amrex::max(rmax, x);
    2586             :         });
    2587             :         return std::make_pair(rmin,rmax);
    2588             :     }
    2589             : }
    2590             : 
    2591             : template <class T>
    2592             : template <RunOn run_on>
    2593             : T
    2594             : BaseFab<T>::maxabs (int comp) const noexcept
    2595             : {
    2596             :     return this->maxabs<run_on>(this->domain,comp);
    2597             : }
    2598             : 
    2599             : template <class T>
    2600             : template <RunOn run_on>
    2601             : T
    2602             : BaseFab<T>::maxabs (const Box& subbox, int comp) const noexcept
    2603             : {
    2604             :     Array4<T const> const& a = this->const_array(comp);
    2605             : #ifdef AMREX_USE_GPU
    2606             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2607             :         ReduceOps<ReduceOpMax> reduce_op;
    2608             :         ReduceData<T> reduce_data(reduce_op);
    2609             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    2610             :         reduce_op.eval(subbox, reduce_data,
    2611             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2612             :         {
    2613             :             return { std::abs(a(i,j,k)) };
    2614             :         });
    2615             :         ReduceTuple hv = reduce_data.value(reduce_op);
    2616             :         return amrex::get<0>(hv);
    2617             :     } else
    2618             : #endif
    2619             :     {
    2620             :         T r = 0;
    2621             :         amrex::LoopOnCpu(subbox, [=,&r] (int i, int j, int k) noexcept
    2622             :         {
    2623             :             r = amrex::max(r, std::abs(a(i,j,k)));
    2624             :         });
    2625             :         return r;
    2626             :     }
    2627             : }
    2628             : 
    2629             : 
    2630             : template <class T>
    2631             : template <RunOn run_on>
    2632             : IntVect
    2633             : BaseFab<T>::indexFromValue (Box const& subbox, int comp, T const& value) const noexcept
    2634             : {
    2635             :     Array4<T const> const& a = this->const_array(comp);
    2636             : #ifdef AMREX_USE_GPU
    2637             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2638             :         Array<int,1+AMREX_SPACEDIM> ha{0,AMREX_D_DECL(std::numeric_limits<int>::lowest(),
    2639             :                                                       std::numeric_limits<int>::lowest(),
    2640             :                                                       std::numeric_limits<int>::lowest())};
    2641             :         Gpu::DeviceVector<int> dv(1+AMREX_SPACEDIM);
    2642             :         int* p = dv.data();
    2643             :         Gpu::htod_memcpy_async(p, ha.data(), sizeof(int)*ha.size());
    2644             :         amrex::ParallelFor(subbox, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
    2645             :         {
    2646             :             int* flag = p;
    2647             :             if ((*flag == 0) && (a(i,j,k) == value)) {
    2648             :                 if (Gpu::Atomic::Exch(flag,1) == 0) {
    2649             :                     AMREX_D_TERM(p[1] = i;,
    2650             :                                  p[2] = j;,
    2651             :                                  p[3] = k;);
    2652             :                 }
    2653             :             }
    2654             :         });
    2655             :         Gpu::dtoh_memcpy_async(ha.data(), p, sizeof(int)*ha.size());
    2656             :         Gpu::streamSynchronize();
    2657             :         return IntVect(AMREX_D_DECL(ha[1],ha[2],ha[2]));
    2658             :     } else
    2659             : #endif
    2660             :     {
    2661             :         AMREX_LOOP_3D(subbox, i, j, k,
    2662             :         {
    2663             :             if (a(i,j,k) == value) { return IntVect(AMREX_D_DECL(i,j,k)); }
    2664             :         });
    2665             :         return IntVect::TheMinVector();
    2666             :     }
    2667             : }
    2668             : 
    2669             : template <class T>
    2670             : template <RunOn run_on>
    2671             : IntVect
    2672             : BaseFab<T>::minIndex (int comp) const noexcept
    2673             : {
    2674             :     return this->minIndex<run_on>(this->domain,comp);
    2675             : }
    2676             : 
    2677             : template <class T>
    2678             : template <RunOn run_on>
    2679             : IntVect
    2680             : BaseFab<T>::minIndex (const Box& subbox, int comp) const noexcept
    2681             : {
    2682             :     T min_val = this->min<run_on>(subbox, comp);
    2683             :     return this->indexFromValue<run_on>(subbox, comp, min_val);
    2684             : }
    2685             : 
    2686             : template <class T>
    2687             : template <RunOn run_on>
    2688             : void
    2689             : BaseFab<T>::minIndex (const Box& subbox, Real& min_val, IntVect& min_idx, int comp) const noexcept
    2690             : {
    2691             :     min_val = this->min<run_on>(subbox, comp);
    2692             :     min_idx = this->indexFromValue<run_on>(subbox, comp, min_val);
    2693             : }
    2694             : 
    2695             : template <class T>
    2696             : template <RunOn run_on>
    2697             : IntVect
    2698             : BaseFab<T>::maxIndex (int comp) const noexcept
    2699             : {
    2700             :     return this->maxIndex<run_on>(this->domain,comp);
    2701             : }
    2702             : 
    2703             : template <class T>
    2704             : template <RunOn run_on>
    2705             : IntVect
    2706             : BaseFab<T>::maxIndex (const Box& subbox, int comp) const noexcept
    2707             : {
    2708             :     T max_val = this->max<run_on>(subbox, comp);
    2709             :     return this->indexFromValue<run_on>(subbox, comp, max_val);
    2710             : }
    2711             : 
    2712             : template <class T>
    2713             : template <RunOn run_on>
    2714             : void
    2715             : BaseFab<T>::maxIndex (const Box& subbox, Real& max_val, IntVect& max_idx, int comp) const noexcept
    2716             : {
    2717             :     max_val = this->max<run_on>(subbox, comp);
    2718             :     max_idx = this->indexFromValue<run_on>(subbox, comp, max_val);
    2719             : }
    2720             : 
    2721             : template <class T>
    2722             : template <RunOn run_on>
    2723             : int
    2724             : BaseFab<T>::maskLT (BaseFab<int>& mask, T const& val, int comp) const noexcept
    2725             : {
    2726             :     mask.resize(this->domain,1);
    2727             :     int cnt = 0;
    2728             :     Array4<int> const& m = mask.array();
    2729             :     Array4<T const> const& a = this->const_array(comp);
    2730             : #ifdef AMREX_USE_GPU
    2731             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2732             :         ReduceOps<ReduceOpSum> reduce_op;
    2733             :         ReduceData<int> reduce_data(reduce_op);
    2734             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    2735             :         reduce_op.eval(this->domain, reduce_data,
    2736             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2737             :         {
    2738             :             int t;
    2739             :             if (a(i,j,k) < val) {
    2740             :                 m(i,j,k) = 1;
    2741             :                 t = 1;
    2742             :             } else {
    2743             :                 m(i,j,k) = 0;
    2744             :                 t = 0;
    2745             :             }
    2746             :             return {t};
    2747             :         });
    2748             :         ReduceTuple hv = reduce_data.value(reduce_op);
    2749             :         cnt = amrex::get<0>(hv);
    2750             :     } else
    2751             : #endif
    2752             :     {
    2753             :         AMREX_LOOP_3D(this->domain, i, j, k,
    2754             :         {
    2755             :             if (a(i,j,k) < val) {
    2756             :                 m(i,j,k) = 1;
    2757             :                 ++cnt;
    2758             :             } else {
    2759             :                 m(i,j,k) = 0;
    2760             :             }
    2761             :         });
    2762             :     }
    2763             : 
    2764             :     return cnt;
    2765             : }
    2766             : 
    2767             : template <class T>
    2768             : template <RunOn run_on>
    2769             : int
    2770             : BaseFab<T>::maskLE (BaseFab<int>& mask, T const& val, int comp) const noexcept
    2771             : {
    2772             :     mask.resize(this->domain,1);
    2773             :     int cnt = 0;
    2774             :     Array4<int> const& m = mask.array();
    2775             :     Array4<T const> const& a = this->const_array(comp);
    2776             : #ifdef AMREX_USE_GPU
    2777             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2778             :         ReduceOps<ReduceOpSum> reduce_op;
    2779             :         ReduceData<int> reduce_data(reduce_op);
    2780             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    2781             :         reduce_op.eval(this->domain, reduce_data,
    2782             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2783             :         {
    2784             :             int t;
    2785             :             if (a(i,j,k) <= val) {
    2786             :                 m(i,j,k) = 1;
    2787             :                 t = 1;
    2788             :             } else {
    2789             :                 m(i,j,k) = 0;
    2790             :                 t = 0;
    2791             :             }
    2792             :             return {t};
    2793             :         });
    2794             :         ReduceTuple hv = reduce_data.value(reduce_op);
    2795             :         cnt = amrex::get<0>(hv);
    2796             :     } else
    2797             : #endif
    2798             :     {
    2799             :         AMREX_LOOP_3D(this->domain, i, j, k,
    2800             :         {
    2801             :             if (a(i,j,k) <= val) {
    2802             :                 m(i,j,k) = 1;
    2803             :                 ++cnt;
    2804             :             } else {
    2805             :                 m(i,j,k) = 0;
    2806             :             }
    2807             :         });
    2808             :     }
    2809             : 
    2810             :     return cnt;
    2811             : }
    2812             : 
    2813             : template <class T>
    2814             : template <RunOn run_on>
    2815             : int
    2816             : BaseFab<T>::maskEQ (BaseFab<int>& mask, T const& val, int comp) const noexcept
    2817             : {
    2818             :     mask.resize(this->domain,1);
    2819             :     int cnt = 0;
    2820             :     Array4<int> const& m = mask.array();
    2821             :     Array4<T const> const& a = this->const_array(comp);
    2822             : #ifdef AMREX_USE_GPU
    2823             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2824             :         ReduceOps<ReduceOpSum> reduce_op;
    2825             :         ReduceData<int> reduce_data(reduce_op);
    2826             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    2827             :         reduce_op.eval(this->domain, reduce_data,
    2828             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2829             :         {
    2830             :             int t;
    2831             :             if (a(i,j,k) == val) {
    2832             :                 m(i,j,k) = 1;
    2833             :                 t = 1;
    2834             :             } else {
    2835             :                 m(i,j,k) = 0;
    2836             :                 t = 0;
    2837             :             }
    2838             :             return {t};
    2839             :         });
    2840             :         ReduceTuple hv = reduce_data.value(reduce_op);
    2841             :         cnt = amrex::get<0>(hv);
    2842             :     } else
    2843             : #endif
    2844             :     {
    2845             :         AMREX_LOOP_3D(this->domain, i, j, k,
    2846             :         {
    2847             :             if (a(i,j,k) == val) {
    2848             :                 m(i,j,k) = 1;
    2849             :                 ++cnt;
    2850             :             } else {
    2851             :                 m(i,j,k) = 0;
    2852             :             }
    2853             :         });
    2854             :     }
    2855             : 
    2856             :     return cnt;
    2857             : }
    2858             : 
    2859             : template <class T>
    2860             : template <RunOn run_on>
    2861             : int
    2862             : BaseFab<T>::maskGT (BaseFab<int>& mask, T const& val, int comp) const noexcept
    2863             : {
    2864             :     mask.resize(this->domain,1);
    2865             :     int cnt = 0;
    2866             :     Array4<int> const& m = mask.array();
    2867             :     Array4<T const> const& a = this->const_array(comp);
    2868             : #ifdef AMREX_USE_GPU
    2869             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2870             :         ReduceOps<ReduceOpSum> reduce_op;
    2871             :         ReduceData<int> reduce_data(reduce_op);
    2872             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    2873             :         reduce_op.eval(this->domain, reduce_data,
    2874             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2875             :         {
    2876             :             int t;
    2877             :             if (a(i,j,k) > val) {
    2878             :                 m(i,j,k) = 1;
    2879             :                 t = 1;
    2880             :             } else {
    2881             :                 m(i,j,k) = 0;
    2882             :                 t = 0;
    2883             :             }
    2884             :             return {t};
    2885             :         });
    2886             :         ReduceTuple hv = reduce_data.value(reduce_op);
    2887             :         cnt = amrex::get<0>(hv);
    2888             :     } else
    2889             : #endif
    2890             :     {
    2891             :         AMREX_LOOP_3D(this->domain, i, j, k,
    2892             :         {
    2893             :             if (a(i,j,k) > val) {
    2894             :                 m(i,j,k) = 1;
    2895             :                 ++cnt;
    2896             :             } else {
    2897             :                 m(i,j,k) = 0;
    2898             :             }
    2899             :         });
    2900             :     }
    2901             : 
    2902             :     return cnt;
    2903             : }
    2904             : 
    2905             : template <class T>
    2906             : template <RunOn run_on>
    2907             : int
    2908             : BaseFab<T>::maskGE (BaseFab<int>& mask, T const& val, int comp) const noexcept
    2909             : {
    2910             :     mask.resize(this->domain,1);
    2911             :     int cnt = 0;
    2912             :     Array4<int> const& m = mask.array();
    2913             :     Array4<T const> const& a = this->const_array(comp);
    2914             : #ifdef AMREX_USE_GPU
    2915             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    2916             :         ReduceOps<ReduceOpSum> reduce_op;
    2917             :         ReduceData<int> reduce_data(reduce_op);
    2918             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    2919             :         reduce_op.eval(this->domain, reduce_data,
    2920             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    2921             :         {
    2922             :             int t;
    2923             :             if (a(i,j,k) >= val) {
    2924             :                 m(i,j,k) = 1;
    2925             :                 t = 1;
    2926             :             } else {
    2927             :                 m(i,j,k) = 0;
    2928             :                 t = 0;
    2929             :             }
    2930             :             return {t};
    2931             :         });
    2932             :         ReduceTuple hv = reduce_data.value(reduce_op);
    2933             :         cnt = amrex::get<0>(hv);
    2934             :     } else
    2935             : #endif
    2936             :     {
    2937             :         AMREX_LOOP_3D(this->domain, i, j, k,
    2938             :         {
    2939             :             if (a(i,j,k) >= val) {
    2940             :                 m(i,j,k) = 1;
    2941             :                 ++cnt;
    2942             :             } else {
    2943             :                 m(i,j,k) = 0;
    2944             :             }
    2945             :         });
    2946             :     }
    2947             : 
    2948             :     return cnt;
    2949             : }
    2950             : 
    2951             : template <class T>
    2952             : template <RunOn run_on>
    2953             : BaseFab<T>&
    2954             : BaseFab<T>::atomicAdd (const BaseFab<T>& x) noexcept
    2955             : {
    2956             :     Box ovlp(this->domain);
    2957             :     ovlp &= x.domain;
    2958             :     return ovlp.ok() ? this->atomicAdd<run_on>(x,ovlp,ovlp,0,0,this->nvar) : *this;
    2959             : }
    2960             : 
    2961             : template <class T>
    2962             : template <RunOn run_on>
    2963             : BaseFab<T>&
    2964             : BaseFab<T>::saxpy (T a, const BaseFab<T>& x, const Box& srcbox, const Box& destbox,
    2965             :                    int srccomp, int destcomp, int numcomp) noexcept
    2966             : {
    2967             :     BL_ASSERT(srcbox.ok());
    2968             :     BL_ASSERT(x.box().contains(srcbox));
    2969             :     BL_ASSERT(destbox.ok());
    2970             :     BL_ASSERT(box().contains(destbox));
    2971             :     BL_ASSERT(destbox.sameSize(srcbox));
    2972             :     BL_ASSERT( srccomp >= 0 &&  srccomp+numcomp <= x.nComp());
    2973             :     BL_ASSERT(destcomp >= 0 && destcomp+numcomp <=   nComp());
    2974             : 
    2975             :     Array4<T> const& d = this->array();
    2976             :     Array4<T const> const& s = x.const_array();
    2977             :     const auto dlo = amrex::lbound(destbox);
    2978             :     const auto slo = amrex::lbound(srcbox);
    2979             :     const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
    2980             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
    2981             :     {
    2982             :         d(i,j,k,n+destcomp) += a * s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
    2983             :     });
    2984             : 
    2985             :     return *this;
    2986             : }
    2987             : 
    2988             : template <class T>
    2989             : template <RunOn run_on>
    2990             : BaseFab<T>&
    2991             : BaseFab<T>::saxpy (T a, const BaseFab<T>& x) noexcept
    2992             : {
    2993             :     Box ovlp(this->domain);
    2994             :     ovlp &= x.domain;
    2995             :     return ovlp.ok() ? saxpy<run_on>(a,x,ovlp,ovlp,0,0,this->nvar) : *this;
    2996             : }
    2997             : 
    2998             : template <class T>
    2999             : template <RunOn run_on>
    3000             : BaseFab<T>&
    3001             : BaseFab<T>::xpay (T a, const BaseFab<T>& x,
    3002             :                   const Box& srcbox, const Box& destbox,
    3003             :                   int srccomp, int destcomp, int numcomp) noexcept
    3004             : {
    3005             :     BL_ASSERT(srcbox.ok());
    3006             :     BL_ASSERT(x.box().contains(srcbox));
    3007             :     BL_ASSERT(destbox.ok());
    3008             :     BL_ASSERT(box().contains(destbox));
    3009             :     BL_ASSERT(destbox.sameSize(srcbox));
    3010             :     BL_ASSERT( srccomp >= 0 &&  srccomp+numcomp <= x.nComp());
    3011             :     BL_ASSERT(destcomp >= 0 && destcomp+numcomp <=   nComp());
    3012             : 
    3013             :     Array4<T> const& d = this->array();
    3014             :     Array4<T const> const& s = x.const_array();
    3015             :     const auto dlo = amrex::lbound(destbox);
    3016             :     const auto slo = amrex::lbound(srcbox);
    3017             :     const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
    3018             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
    3019             :     {
    3020             :         d(i,j,k,n+destcomp) = s(i+offset.x,j+offset.y,k+offset.z,n+srccomp) + a*d(i,j,k,n+destcomp);
    3021             :     });
    3022             : 
    3023             :     return *this;
    3024             : }
    3025             : 
    3026             : template <class T>
    3027             : template <RunOn run_on>
    3028             : BaseFab<T>&
    3029             : BaseFab<T>::addproduct (const Box& destbox, int destcomp, int numcomp,
    3030             :                         const BaseFab<T>& src1, int comp1,
    3031             :                         const BaseFab<T>& src2, int comp2) noexcept
    3032             : {
    3033             :     BL_ASSERT(destbox.ok());
    3034             :     BL_ASSERT(box().contains(destbox));
    3035             :     BL_ASSERT(   comp1 >= 0 &&    comp1+numcomp <= src1.nComp());
    3036             :     BL_ASSERT(   comp2 >= 0 &&    comp2+numcomp <= src2.nComp());
    3037             :     BL_ASSERT(destcomp >= 0 && destcomp+numcomp <=      nComp());
    3038             : 
    3039             :     Array4<T> const& d = this->array();
    3040             :     Array4<T const> const& s1 = src1.const_array();
    3041             :     Array4<T const> const& s2 = src2.const_array();
    3042             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
    3043             :     {
    3044             :         d(i,j,k,n+destcomp) += s1(i,j,k,n+comp1) * s2(i,j,k,n+comp2);
    3045             :     });
    3046             : 
    3047             :     return *this;
    3048             : }
    3049             : 
    3050             : template <class T>
    3051             : template <RunOn run_on>
    3052             : BaseFab<T>&
    3053             : BaseFab<T>::linComb (const BaseFab<T>& f1, const Box& b1, int comp1,
    3054             :                      const BaseFab<T>& f2, const Box& b2, int comp2,
    3055             :                      Real alpha, Real beta, const Box& b,
    3056             :                      int comp, int numcomp) noexcept
    3057             : {
    3058             :     BL_ASSERT(b1.ok());
    3059             :     BL_ASSERT(f1.box().contains(b1));
    3060             :     BL_ASSERT(b2.ok());
    3061             :     BL_ASSERT(f2.box().contains(b2));
    3062             :     BL_ASSERT(b.ok());
    3063             :     BL_ASSERT(box().contains(b));
    3064             :     BL_ASSERT(b.sameSize(b1));
    3065             :     BL_ASSERT(b.sameSize(b2));
    3066             :     BL_ASSERT(comp1 >= 0 && comp1+numcomp <= f1.nComp());
    3067             :     BL_ASSERT(comp2 >= 0 && comp2+numcomp <= f2.nComp());
    3068             :     BL_ASSERT(comp  >= 0 && comp +numcomp <=    nComp());
    3069             : 
    3070             :     Array4<T> const& d = this->array();
    3071             :     Array4<T const> const& s1 = f1.const_array();
    3072             :     Array4<T const> const& s2 = f2.const_array();
    3073             :     const auto dlo = amrex::lbound(b);
    3074             :     const auto slo1 = amrex::lbound(b1);
    3075             :     const auto slo2 = amrex::lbound(b2);
    3076             :     const Dim3 off1{slo1.x-dlo.x,slo1.y-dlo.y,slo1.z-dlo.z};
    3077             :     const Dim3 off2{slo2.x-dlo.x,slo2.y-dlo.y,slo2.z-dlo.z};
    3078             : 
    3079             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, b, numcomp, i, j, k, n,
    3080             :     {
    3081             :         d(i,j,k,n+comp) = alpha*s1(i+off1.x,j+off1.y,k+off1.z,n+comp1)
    3082             :                          + beta*s2(i+off2.x,j+off2.y,k+off2.z,n+comp2);
    3083             :     });
    3084             :     return *this;
    3085             : }
    3086             : 
    3087             : template <class T>
    3088             : template <RunOn run_on>
    3089             : T
    3090             : BaseFab<T>::dot (const Box& xbx, int xcomp,
    3091             :                  const BaseFab<T>& y, const Box& ybx, int ycomp,
    3092             :                  int numcomp) const noexcept
    3093             : {
    3094             :     BL_ASSERT(xbx.ok());
    3095             :     BL_ASSERT(box().contains(xbx));
    3096             :     BL_ASSERT(y.box().contains(ybx));
    3097             :     BL_ASSERT(xbx.sameSize(ybx));
    3098             :     BL_ASSERT(xcomp >= 0 && xcomp+numcomp <=   nComp());
    3099             :     BL_ASSERT(ycomp >= 0 && ycomp+numcomp <= y.nComp());
    3100             : 
    3101             :     T r = 0;
    3102             : 
    3103             :     const auto xlo = amrex::lbound(xbx);
    3104             :     const auto ylo = amrex::lbound(ybx);
    3105             :     const Dim3 offset{ylo.x-xlo.x,ylo.y-xlo.y,ylo.z-xlo.z};
    3106             :     Array4<T const> const& xa = this->const_array();
    3107             :     Array4<T const> const& ya = y.const_array();
    3108             : 
    3109             : #ifdef AMREX_USE_GPU
    3110             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    3111             :         ReduceOps<ReduceOpSum> reduce_op;
    3112             :         ReduceData<T> reduce_data(reduce_op);
    3113             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    3114             :         reduce_op.eval(xbx, reduce_data,
    3115             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    3116             :         {
    3117             :             T t = 0;
    3118             :             for (int n = 0; n < numcomp; ++n) {
    3119             :                 t += xa(i,j,k,n+xcomp) * ya(i+offset.x,j+offset.y,k+offset.z,n+ycomp);
    3120             :             }
    3121             :             return {t};
    3122             :         });
    3123             :         ReduceTuple hv = reduce_data.value(reduce_op);
    3124             :         r = amrex::get<0>(hv);
    3125             :     } else
    3126             : #endif
    3127             :     {
    3128             :         AMREX_LOOP_4D(xbx, numcomp, i, j, k, n,
    3129             :         {
    3130             :             r += xa(i,j,k,n+xcomp) * ya(i+offset.x,j+offset.y,k+offset.z,n+ycomp);
    3131             :         });
    3132             :     }
    3133             : 
    3134             :     return r;
    3135             : }
    3136             : 
    3137             : template <class T>
    3138             : template <RunOn run_on>
    3139             : T
    3140             : BaseFab<T>::dotmask (const BaseFab<int>& mask, const Box& xbx, int xcomp,
    3141             :                      const BaseFab<T>& y, const Box& ybx, int ycomp,
    3142             :                      int numcomp) const noexcept
    3143             : {
    3144             :     BL_ASSERT(xbx.ok());
    3145             :     BL_ASSERT(box().contains(xbx));
    3146             :     BL_ASSERT(y.box().contains(ybx));
    3147             :     BL_ASSERT(xbx.sameSize(ybx));
    3148             :     BL_ASSERT(xcomp >= 0 && xcomp+numcomp <=   nComp());
    3149             :     BL_ASSERT(ycomp >= 0 && ycomp+numcomp <= y.nComp());
    3150             : 
    3151             :     T r = 0;
    3152             : 
    3153             :     const auto xlo = amrex::lbound(xbx);
    3154             :     const auto ylo = amrex::lbound(ybx);
    3155             :     const Dim3 offset{ylo.x-xlo.x,ylo.y-xlo.y,ylo.z-xlo.z};
    3156             : 
    3157             :     Array4<T const> const& xa = this->const_array();
    3158             :     Array4<T const> const& ya = y.const_array();
    3159             :     Array4<int const> const& ma = mask.const_array();
    3160             : 
    3161             : #ifdef AMREX_USE_GPU
    3162             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    3163             :         ReduceOps<ReduceOpSum> reduce_op;
    3164             :         ReduceData<T> reduce_data(reduce_op);
    3165             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    3166             :         reduce_op.eval(xbx, reduce_data,
    3167             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    3168             :         {
    3169             :             int m = static_cast<int>(static_cast<bool>(ma(i,j,k)));
    3170             :             T t = 0;
    3171             :             for (int n = 0; n < numcomp; ++n) {
    3172             :                 t += xa(i,j,k,n+xcomp) * ya(i+offset.x,j+offset.y,k+offset.z,n+ycomp) * m;
    3173             :             }
    3174             :             return {t};
    3175             :         });
    3176             :         ReduceTuple hv = reduce_data.value(reduce_op);
    3177             :         r = amrex::get<0>(hv);
    3178             :     } else
    3179             : #endif
    3180             :     {
    3181             :         AMREX_LOOP_4D(xbx, numcomp, i, j, k, n,
    3182             :         {
    3183             :             int m = static_cast<int>(static_cast<bool>(ma(i,j,k)));
    3184             :             r += xa(i,j,k,n+xcomp) * ya(i+offset.x,j+offset.y,k+offset.z,n+ycomp) * m;
    3185             :         });
    3186             :     }
    3187             : 
    3188             :     return r;
    3189             : }
    3190             : 
    3191             : template <class T>
    3192             : template <RunOn run_on>
    3193             : T
    3194             : BaseFab<T>::sum (int comp, int numcomp) const noexcept
    3195             : {
    3196             :     return this->sum<run_on>(this->domain, DestComp{comp}, NumComps{numcomp});
    3197             : }
    3198             : 
    3199             : template <class T>
    3200             : template <RunOn run_on>
    3201             : T
    3202             : BaseFab<T>::sum (const Box& subbox, int comp, int numcomp) const noexcept
    3203             : {
    3204             :     return this->sum<run_on>(subbox, DestComp{comp}, NumComps{numcomp});
    3205             : }
    3206             : 
    3207             : template <class T>
    3208             : template <RunOn run_on>
    3209             : BaseFab<T>&
    3210             : BaseFab<T>::negate (int comp, int numcomp) noexcept
    3211             : {
    3212             :     return this->negate<run_on>(this->domain, DestComp{comp}, NumComps{numcomp});
    3213             : }
    3214             : 
    3215             : template <class T>
    3216             : template <RunOn run_on>
    3217             : BaseFab<T>&
    3218             : BaseFab<T>::negate (const Box& b, int comp, int numcomp) noexcept
    3219             : {
    3220             :     return this->negate<run_on>(b, DestComp{comp}, NumComps{numcomp});
    3221             : }
    3222             : 
    3223             : template <class T>
    3224             : template <RunOn run_on>
    3225             : BaseFab<T>&
    3226             : BaseFab<T>::invert (T const& r, int comp, int numcomp) noexcept
    3227             : {
    3228             :     return this->invert<run_on>(r, this->domain, DestComp{comp}, NumComps{numcomp});
    3229             : }
    3230             : 
    3231             : template <class T>
    3232             : template <RunOn run_on>
    3233             : BaseFab<T>&
    3234             : BaseFab<T>::invert (T const& r, const Box& b, int comp, int numcomp) noexcept
    3235             : {
    3236             :     return this->invert<run_on>(r, b, DestComp{comp}, NumComps{numcomp});
    3237             : }
    3238             : 
    3239             : template <class T>
    3240             : template <RunOn run_on>
    3241             : BaseFab<T>&
    3242             : BaseFab<T>::plus (T const& r, int comp, int numcomp) noexcept
    3243             : {
    3244             :     return this->plus<run_on>(r, this->domain, DestComp{comp}, NumComps{numcomp});
    3245             : }
    3246             : 
    3247             : template <class T>
    3248             : template <RunOn run_on>
    3249             : BaseFab<T>&
    3250             : BaseFab<T>::plus (T const& r, const Box& b, int comp, int numcomp) noexcept
    3251             : {
    3252             :     return this->plus<run_on>(r, b, DestComp{comp}, NumComps{numcomp});
    3253             : }
    3254             : 
    3255             : template <class T>
    3256             : template <RunOn run_on>
    3257             : BaseFab<T>&
    3258           0 : BaseFab<T>::plus (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
    3259             : {
    3260           0 :     return this->plus<run_on>(src, this->domain, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
    3261             : }
    3262             : 
    3263             : template <class T>
    3264             : template <RunOn run_on>
    3265             : BaseFab<T>&
    3266             : BaseFab<T>::atomicAdd (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
    3267             : {
    3268             :     Box ovlp(this->domain);
    3269             :     ovlp &= src.domain;
    3270             :     return ovlp.ok() ? this->atomicAdd<run_on>(src,ovlp,ovlp,srccomp,destcomp,numcomp) : *this;
    3271             : }
    3272             : 
    3273             : template <class T>
    3274             : template <RunOn run_on>
    3275             : BaseFab<T>&
    3276             : BaseFab<T>::plus (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
    3277             :                   int numcomp) noexcept
    3278             : {
    3279             :     return this->plus<run_on>(src, subbox, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
    3280             : }
    3281             : 
    3282             : template <class T>
    3283             : template <RunOn run_on>
    3284             : BaseFab<T>&
    3285             : BaseFab<T>::atomicAdd (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
    3286             :                        int numcomp) noexcept
    3287             : {
    3288             :     Box ovlp(this->domain);
    3289             :     ovlp &= src.domain;
    3290             :     ovlp &= subbox;
    3291             :     return ovlp.ok() ? this->atomicAdd<run_on>(src,ovlp,ovlp,srccomp,destcomp,numcomp) : *this;
    3292             : }
    3293             : 
    3294             : template <class T>
    3295             : template <RunOn run_on>
    3296             : BaseFab<T>&
    3297        4770 : BaseFab<T>::plus (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    3298             :                   int srccomp, int destcomp, int numcomp) noexcept
    3299             : {
    3300             :     BL_ASSERT(destbox.ok());
    3301             :     BL_ASSERT(src.box().contains(srcbox));
    3302             :     BL_ASSERT(box().contains(destbox));
    3303             :     BL_ASSERT(destbox.sameSize(srcbox));
    3304             :     BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
    3305             :     BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
    3306             : 
    3307        4770 :     Array4<T> const& d = this->array();
    3308        4770 :     Array4<T const> const& s = src.const_array();
    3309             :     const auto dlo = amrex::lbound(destbox);
    3310             :     const auto slo = amrex::lbound(srcbox);
    3311        4770 :     const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
    3312     3582270 :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
    3313             :     {
    3314             :         d(i,j,k,n+destcomp) += s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
    3315             :     });
    3316             : 
    3317        4770 :     return *this;
    3318             : }
    3319             : 
    3320             : template <class T>
    3321             : template <RunOn run_on>
    3322             : BaseFab<T>&
    3323             : BaseFab<T>::atomicAdd (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    3324             :                        int srccomp, int destcomp, int numcomp) noexcept
    3325             : {
    3326             :     BL_ASSERT(destbox.ok());
    3327             :     BL_ASSERT(src.box().contains(srcbox));
    3328             :     BL_ASSERT(box().contains(destbox));
    3329             :     BL_ASSERT(destbox.sameSize(srcbox));
    3330             :     BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
    3331             :     BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
    3332             : 
    3333             :     Array4<T> const& d = this->array();
    3334             :     Array4<T const> const& s = src.const_array();
    3335             :     const auto dlo = amrex::lbound(destbox);
    3336             :     const auto slo = amrex::lbound(srcbox);
    3337             :     const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
    3338             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
    3339             :     {
    3340             :         T* p = d.ptr(i,j,k,n+destcomp);
    3341             :         HostDevice::Atomic::Add(p, s(i+offset.x,j+offset.y,k+offset.z,n+srccomp));
    3342             :     });
    3343             : 
    3344             :     return *this;
    3345             : }
    3346             : 
    3347             : template <class T>
    3348             : template <RunOn run_on>
    3349             : BaseFab<T>&
    3350             : BaseFab<T>::lockAdd (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    3351             :                      int srccomp, int destcomp, int numcomp) noexcept
    3352             : {
    3353             : #if defined(AMREX_USE_OMP) && (AMREX_SPACEDIM > 1)
    3354             : #if defined(AMREX_USE_GPU)
    3355             :     if (run_on == RunOn::Host || Gpu::notInLaunchRegion()) {
    3356             : #endif
    3357             :         BL_ASSERT(destbox.ok());
    3358             :         BL_ASSERT(src.box().contains(srcbox));
    3359             :         BL_ASSERT(box().contains(destbox));
    3360             :         BL_ASSERT(destbox.sameSize(srcbox));
    3361             :         BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
    3362             :         BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
    3363             : 
    3364             :         Array4<T> const& d = this->array();
    3365             :         Array4<T const> const& s = src.const_array();
    3366             :         auto const& dlo = amrex::lbound(destbox);
    3367             :         auto const& dhi = amrex::ubound(destbox);
    3368             :         auto const& len = amrex::length(destbox);
    3369             :         auto const& slo = amrex::lbound(srcbox);
    3370             :         Dim3 const offset{slo.x-dlo.x, slo.y-dlo.y, slo.z-dlo.z};
    3371             : 
    3372             :         int planedim;
    3373             :         int nplanes;
    3374             :         int plo;
    3375             :         if (len.z == 1) {
    3376             :             planedim = 1;
    3377             :             nplanes = len.y;
    3378             :             plo = dlo.y;
    3379             :         } else {
    3380             :             planedim = 2;
    3381             :             nplanes = len.z;
    3382             :             plo = dlo.z;
    3383             :         }
    3384             : 
    3385             :         auto* mask = (bool*) amrex_mempool_alloc(sizeof(bool)*nplanes);
    3386             :         for (int ip = 0; ip < nplanes; ++ip) {
    3387             :             mask[ip] = false;
    3388             :         }
    3389             : 
    3390             :         int mm = 0;
    3391             :         int planes_left = nplanes;
    3392             :         while (planes_left > 0) {
    3393             :             AMREX_ASSERT(mm < nplanes);
    3394             :             auto const m = mm + plo;
    3395             :             auto* lock = OpenMP::get_lock(m);
    3396             :             if (omp_test_lock(lock))
    3397             :             {
    3398             :                 auto lo = dlo;
    3399             :                 auto hi = dhi;
    3400             :                 if (planedim == 1) {
    3401             :                     lo.y = m;
    3402             :                     hi.y = m;
    3403             :                 } else {
    3404             :                     lo.z = m;
    3405             :                     hi.z = m;
    3406             :                 }
    3407             : 
    3408             :                 for (int n = 0; n < numcomp; ++n) {
    3409             :                     for     (int k = lo.z; k <= hi.z; ++k) {
    3410             :                         for (int j = lo.y; j <= hi.y; ++j) {
    3411             :                             auto      * pdst = d.ptr(dlo.x,j         ,k         ,n+destcomp);
    3412             :                             auto const* psrc = s.ptr(slo.x,j+offset.y,k+offset.z,n+ srccomp);
    3413             : #pragma omp simd
    3414             :                             for (int ii = 0; ii < len.x; ++ii) {
    3415             :                                 pdst[ii] += psrc[ii];
    3416             :                             }
    3417             :                         }
    3418             :                     }
    3419             :                 }
    3420             : 
    3421             :                 mask[mm] = true;
    3422             :                 --planes_left;
    3423             :                 omp_unset_lock(lock);
    3424             :                 if (planes_left == 0) { break; }
    3425             :             }
    3426             : 
    3427             :             ++mm;
    3428             :             for (int ip = 0; ip < nplanes; ++ip) {
    3429             :                 int new_mm = (mm+ip) % nplanes;
    3430             :                 if ( ! mask[new_mm] ) {
    3431             :                     mm = new_mm;
    3432             :                     break;
    3433             :                 }
    3434             :             }
    3435             :         }
    3436             : 
    3437             :         amrex_mempool_free(mask);
    3438             : 
    3439             :         return *this;
    3440             : 
    3441             : #if defined(AMREX_USE_GPU)
    3442             :     } else {
    3443             :         return this->template atomicAdd<run_on>(src, srcbox, destbox, srccomp, destcomp, numcomp);
    3444             :     }
    3445             : #endif
    3446             : #else
    3447             :     return this->template atomicAdd<run_on>(src, srcbox, destbox, srccomp, destcomp, numcomp);
    3448             : #endif
    3449             : }
    3450             : 
    3451             : template <class T>
    3452             : template <RunOn run_on>
    3453             : BaseFab<T>&
    3454             : BaseFab<T>::minus (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
    3455             : {
    3456             :     return this->minus<run_on>(src, this->domain, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
    3457             : }
    3458             : 
    3459             : template <class T>
    3460             : template <RunOn run_on>
    3461             : BaseFab<T>&
    3462             : BaseFab<T>::minus (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp, int numcomp) noexcept
    3463             : {
    3464             :     return this->minus<run_on>(src, subbox, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
    3465             : }
    3466             : 
    3467             : template <class T>
    3468             : template <RunOn run_on>
    3469             : BaseFab<T>&
    3470             : BaseFab<T>::minus (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    3471             :                    int srccomp, int destcomp, int numcomp) noexcept
    3472             : {
    3473             :     BL_ASSERT(destbox.ok());
    3474             :     BL_ASSERT(src.box().contains(srcbox));
    3475             :     BL_ASSERT(box().contains(destbox));
    3476             :     BL_ASSERT(destbox.sameSize(srcbox));
    3477             :     BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
    3478             :     BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
    3479             : 
    3480             :     Array4<T> const& d = this->array();
    3481             :     Array4<T const> const& s = src.const_array();
    3482             :     const auto dlo = amrex::lbound(destbox);
    3483             :     const auto slo = amrex::lbound(srcbox);
    3484             :     const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
    3485             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
    3486             :     {
    3487             :         d(i,j,k,n+destcomp) -= s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
    3488             :     });
    3489             : 
    3490             :     return *this;
    3491             : }
    3492             : 
    3493             : template <class T>
    3494             : template <RunOn run_on>
    3495             : BaseFab<T>&
    3496             : BaseFab<T>::mult (T const& r, int comp, int numcomp) noexcept
    3497             : {
    3498             :     return this->mult<run_on>(r, this->domain, DestComp{comp}, NumComps{numcomp});
    3499             : }
    3500             : 
    3501             : template <class T>
    3502             : template <RunOn run_on>
    3503             : BaseFab<T>&
    3504             : BaseFab<T>::mult (T const& r, const Box& b, int comp, int numcomp) noexcept
    3505             : {
    3506             :     return this->mult<run_on>(r, b, DestComp{comp}, NumComps{numcomp});
    3507             : }
    3508             : 
    3509             : template <class T>
    3510             : template <RunOn run_on>
    3511             : BaseFab<T>&
    3512           0 : BaseFab<T>::mult (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
    3513             : {
    3514           0 :     return this->mult<run_on>(src, this->domain, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
    3515             : }
    3516             : 
    3517             : template <class T>
    3518             : template <RunOn run_on>
    3519             : BaseFab<T>&
    3520             : BaseFab<T>::mult (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp, int numcomp) noexcept
    3521             : {
    3522             :     return this->mult<run_on>(src, subbox, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
    3523             : }
    3524             : 
    3525             : template <class T>
    3526             : template <RunOn run_on>
    3527             : BaseFab<T>&
    3528             : BaseFab<T>::mult (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    3529             :                   int srccomp, int destcomp, int numcomp) noexcept
    3530             : {
    3531             :     BL_ASSERT(destbox.ok());
    3532             :     BL_ASSERT(src.box().contains(srcbox));
    3533             :     BL_ASSERT(box().contains(destbox));
    3534             :     BL_ASSERT(destbox.sameSize(srcbox));
    3535             :     BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
    3536             :     BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
    3537             : 
    3538             :     Array4<T> const& d = this->array();
    3539             :     Array4<T const> const& s = src.const_array();
    3540             :     const auto dlo = amrex::lbound(destbox);
    3541             :     const auto slo = amrex::lbound(srcbox);
    3542             :     const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
    3543             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
    3544             :     {
    3545             :         d(i,j,k,n+destcomp) *= s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
    3546             :     });
    3547             : 
    3548             :     return *this;
    3549             : }
    3550             : 
    3551             : template <class T>
    3552             : template <RunOn run_on>
    3553             : BaseFab<T>&
    3554             : BaseFab<T>::divide (T const& r, int comp, int numcomp) noexcept
    3555             : {
    3556             :     return this->divide<run_on>(r, this->domain, DestComp{comp}, NumComps{numcomp});
    3557             : }
    3558             : 
    3559             : template <class T>
    3560             : template <RunOn run_on>
    3561             : BaseFab<T>&
    3562             : BaseFab<T>::divide (T const& r, const Box& b, int comp, int numcomp) noexcept
    3563             : {
    3564             :     return this->divide<run_on>(r, b, DestComp{comp}, NumComps{numcomp});
    3565             : }
    3566             : 
    3567             : template <class T>
    3568             : template <RunOn run_on>
    3569             : BaseFab<T>&
    3570             : BaseFab<T>::divide (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
    3571             : {
    3572             :     return this->divide<run_on>(src, this->domain, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
    3573             : }
    3574             : 
    3575             : template <class T>
    3576             : template <RunOn run_on>
    3577             : BaseFab<T>&
    3578             : BaseFab<T>::divide (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp, int numcomp) noexcept
    3579             : {
    3580             :     return this->divide<run_on>(src, subbox, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
    3581             : }
    3582             : 
    3583             : template <class T>
    3584             : template <RunOn run_on>
    3585             : BaseFab<T>&
    3586             : BaseFab<T>::divide (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    3587             :                     int srccomp, int destcomp, int numcomp) noexcept
    3588             : {
    3589             :     BL_ASSERT(destbox.ok());
    3590             :     BL_ASSERT(src.box().contains(srcbox));
    3591             :     BL_ASSERT(box().contains(destbox));
    3592             :     BL_ASSERT(destbox.sameSize(srcbox));
    3593             :     BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
    3594             :     BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
    3595             : 
    3596             :     Array4<T> const& d = this->array();
    3597             :     Array4<T const> const& s = src.const_array();
    3598             :     const auto dlo = amrex::lbound(destbox);
    3599             :     const auto slo = amrex::lbound(srcbox);
    3600             :     const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
    3601             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
    3602             :     {
    3603             :         d(i,j,k,n+destcomp) /= s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
    3604             :     });
    3605             : 
    3606             :     return *this;
    3607             : }
    3608             : 
    3609             : template <class T>
    3610             : template <RunOn run_on>
    3611             : BaseFab<T>&
    3612             : BaseFab<T>::protected_divide (const BaseFab<T>& src) noexcept
    3613             : {
    3614             :     Box ovlp(this->domain);
    3615             :     ovlp &= src.domain;
    3616             :     return ovlp.ok() ? this->protected_divide<run_on>(src,ovlp,ovlp,0,0,this->nvar) : *this;
    3617             : }
    3618             : 
    3619             : template <class T>
    3620             : template <RunOn run_on>
    3621             : BaseFab<T>&
    3622             : BaseFab<T>::protected_divide (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
    3623             : {
    3624             :     Box ovlp(this->domain);
    3625             :     ovlp &= src.domain;
    3626             :     return ovlp.ok() ? this->protected_divide<run_on>(src,ovlp,ovlp,srccomp,destcomp,numcomp) : *this;
    3627             : }
    3628             : 
    3629             : template <class T>
    3630             : template <RunOn run_on>
    3631             : BaseFab<T>&
    3632             : BaseFab<T>::protected_divide (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
    3633             :                               int numcomp) noexcept
    3634             : {
    3635             :     Box ovlp(this->domain);
    3636             :     ovlp &= src.domain;
    3637             :     ovlp &= subbox;
    3638             :     return ovlp.ok() ? this->protected_divide<run_on>(src,ovlp,ovlp,srccomp,destcomp,numcomp) : *this;
    3639             : }
    3640             : 
    3641             : template <class T>
    3642             : template <RunOn run_on>
    3643             : BaseFab<T>&
    3644             : BaseFab<T>::protected_divide (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
    3645             :                               int srccomp, int destcomp, int numcomp) noexcept
    3646             : {
    3647             :     BL_ASSERT(destbox.ok());
    3648             :     BL_ASSERT(src.box().contains(srcbox));
    3649             :     BL_ASSERT(box().contains(destbox));
    3650             :     BL_ASSERT(destbox.sameSize(srcbox));
    3651             :     BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
    3652             :     BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
    3653             : 
    3654             :     Array4<T> const& d = this->array();
    3655             :     Array4<T const> const& s = src.const_array();
    3656             :     const auto dlo = amrex::lbound(destbox);
    3657             :     const auto slo = amrex::lbound(srcbox);
    3658             :     const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
    3659             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
    3660             :     {
    3661             :         if (s(i+offset.x,j+offset.y,k+offset.z,n+srccomp) != 0) {
    3662             :             d(i,j,k,n+destcomp) /= s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
    3663             :         }
    3664             :     });
    3665             : 
    3666             :     return *this;
    3667             : }
    3668             : 
    3669             : /**
    3670             : * Linear Interpolation / Extrapolation
    3671             : * Result is (t2-t)/(t2-t1)*f1 + (t-t1)/(t2-t1)*f2
    3672             : * Data is taken from b1 region of f1, b2 region of f2
    3673             : * and stored in b region of this FAB.
    3674             : * Boxes b, b1 and b2 must be the same size.
    3675             : * Data is taken from component comp1 of f1, comp2 of f2,
    3676             : * and stored in component comp of this FAB.
    3677             : * This fab is returned as a reference for chaining.
    3678             : */
    3679             : template <class T>
    3680             : template <RunOn run_on>
    3681             : BaseFab<T>&
    3682             : BaseFab<T>::linInterp (const BaseFab<T>& f1, const Box& b1, int comp1,
    3683             :                        const BaseFab<T>& f2, const Box& b2, int comp2,
    3684             :                        Real t1, Real t2, Real t,
    3685             :                        const Box& b, int comp, int numcomp) noexcept
    3686             : {
    3687             :     if (amrex::almostEqual(t1,t2) || amrex::almostEqual(t1,t)) {
    3688             :         return copy<run_on>(f1,b1,comp1,b,comp,numcomp);
    3689             :     } else if (amrex::almostEqual(t2,t)) {
    3690             :         return copy<run_on>(f2,b2,comp2,b,comp,numcomp);
    3691             :     } else {
    3692             :         Real alpha = (t2-t)/(t2-t1);
    3693             :         Real beta = (t-t1)/(t2-t1);
    3694             :         return linComb<run_on>(f1,b1,comp1,f2,b2,comp2,alpha,beta,b,comp,numcomp);
    3695             :     }
    3696             : }
    3697             : 
    3698             : template <class T>
    3699             : template <RunOn run_on>
    3700             : BaseFab<T>&
    3701             : BaseFab<T>::linInterp (const BaseFab<T>& f1, int comp1,
    3702             :                        const BaseFab<T>& f2, int comp2,
    3703             :                        Real t1, Real t2, Real t,
    3704             :                        const Box& b, int comp, int numcomp) noexcept
    3705             : {
    3706             :     if (amrex::almostEqual(t1,t2) || amrex::almostEqual(t1,t)) {
    3707             :         return copy<run_on>(f1,b,comp1,b,comp,numcomp);
    3708             :     } else if (amrex::almostEqual(t2,t)) {
    3709             :         return copy<run_on>(f2,b,comp2,b,comp,numcomp);
    3710             :     } else {
    3711             :         Real alpha = (t2-t)/(t2-t1);
    3712             :         Real beta = (t-t1)/(t2-t1);
    3713             :         return linComb<run_on>(f1,b,comp1,f2,b,comp2,alpha,beta,b,comp,numcomp);
    3714             :     }
    3715             : }
    3716             : 
    3717             : //
    3718             : // New interfaces
    3719             : //
    3720             : 
    3721             : template <class T>
    3722             : template <RunOn run_on>
    3723             : void
    3724        4770 : BaseFab<T>::setVal (T const& val) noexcept
    3725             : {
    3726        4770 :     this->setVal<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
    3727        4770 : }
    3728             : 
    3729             : template <class T>
    3730             : template <RunOn run_on>
    3731             : void
    3732        5184 : BaseFab<T>::setVal (T const& x, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept
    3733             : {
    3734             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
    3735        5184 :     Array4<T> const& a = this->array();
    3736     1822280 :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG (run_on, bx, ncomp.n, i, j, k, n,
    3737             :     {
    3738             :         a(i,j,k,n+dcomp.i) = x;
    3739             :     });
    3740        5184 : }
    3741             : 
    3742             : template <class T>
    3743             : template <RunOn run_on>
    3744             : void
    3745             : BaseFab<T>::setValIf (T const& val, const BaseFab<int>& mask) noexcept
    3746             : {
    3747             :     this->setValIf<run_on>(val, this->domain, mask, DestComp{0}, NumComps{this->nvar});
    3748             : }
    3749             : 
    3750             : template <class T>
    3751             : template <RunOn run_on>
    3752             : void
    3753             : BaseFab<T>::setValIf (T const& val, Box const& bx, const BaseFab<int>& mask, DestComp dcomp, NumComps ncomp) noexcept
    3754             : {
    3755             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
    3756             :     Array4<T> const& a = this->array();
    3757             :     Array4<int const> const& m = mask.const_array();
    3758             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG (run_on, bx, ncomp.n, i, j, k, n,
    3759             :     {
    3760             :         if (m(i,j,k)) { a(i,j,k,n+dcomp.i) = val; }
    3761             :     });
    3762             : }
    3763             : 
    3764             : template <class T>
    3765             : template <RunOn run_on>
    3766             : void
    3767             : BaseFab<T>::setValIfNot (T const& val, const BaseFab<int>& mask) noexcept
    3768             : {
    3769             :     this->setValIfNot<run_on>(val, this->domain, mask, DestComp{0}, NumComps{this->nvar});
    3770             : }
    3771             : 
    3772             : template <class T>
    3773             : template <RunOn run_on>
    3774             : void
    3775             : BaseFab<T>::setValIfNot (T const& val, Box const& bx, const BaseFab<int>& mask, DestComp dcomp, NumComps ncomp) noexcept
    3776             : {
    3777             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
    3778             :     Array4<T> const& a = this->array();
    3779             :     Array4<int const> const& m = mask.const_array();
    3780             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG (run_on, bx, ncomp.n, i, j, k, n,
    3781             :     {
    3782             :         if (!m(i,j,k)) { a(i,j,k,n+dcomp.i) = val; }
    3783             :     });
    3784             : }
    3785             : 
    3786             : template <class T>
    3787             : template <RunOn run_on>
    3788             : void
    3789         216 : BaseFab<T>::setComplement (T const& x, const Box& bx, DestComp dcomp, NumComps ncomp) noexcept
    3790             : {
    3791             : #ifdef AMREX_USE_GPU
    3792             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    3793             :         Array4<T> const& a = this->array();
    3794             :         amrex::ParallelFor(this->domain, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
    3795             :         {
    3796             :             if (! bx.contains(IntVect(AMREX_D_DECL(i,j,k)))) {
    3797             :                 for (int n = dcomp.i; n < ncomp.n+dcomp.i; ++n) {
    3798             :                     a(i,j,k,n) = x;
    3799             :                 }
    3800             :             }
    3801             :         });
    3802             :     } else
    3803             : #endif
    3804             :     {
    3805         432 :         const BoxList b_lst = amrex::boxDiff(this->domain,bx);
    3806         630 :         for (auto const& b : b_lst) {
    3807         414 :             this->setVal<RunOn::Host>(x, b, dcomp, ncomp);
    3808             :         }
    3809             :     }
    3810         216 : }
    3811             : 
    3812             : template <class T>
    3813             : template <RunOn run_on>
    3814             : BaseFab<T>&
    3815             : BaseFab<T>::copy (const BaseFab<T>& src) noexcept
    3816             : {
    3817             :     this->copy<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
    3818             :     return *this;
    3819             : }
    3820             : 
    3821             : template <class T>
    3822             : template <RunOn run_on>
    3823             : BaseFab<T>&
    3824             : BaseFab<T>::copy (const BaseFab<T>& src, Box bx,
    3825             :                   SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
    3826             : {
    3827             :     AMREX_ASSERT(this->domain.sameType(src.domain));
    3828             :     AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
    3829             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
    3830             : 
    3831             :     bx &= src.domain;
    3832             : 
    3833             :     Array4<T> const& d = this->array();
    3834             :     Array4<T const> const& s = src.const_array();
    3835             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
    3836             :     {
    3837             :         d(i,j,k,n+dcomp.i) = s(i,j,k,n+scomp.i);
    3838             :     });
    3839             : 
    3840             :     return *this;
    3841             : }
    3842             : 
    3843             : template <class T>
    3844             : template <RunOn run_on>
    3845             : BaseFab<T>&
    3846             : BaseFab<T>::plus (T const& val) noexcept
    3847             : {
    3848             :     return this->plus<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
    3849             : }
    3850             : 
    3851             : template <class T>
    3852             : template <RunOn run_on>
    3853             : BaseFab<T>&
    3854             : BaseFab<T>::operator+= (T const& val) noexcept
    3855             : {
    3856             :     return this->plus<run_on>(val, this->domain,  DestComp{0}, NumComps{this->nvar});
    3857             : }
    3858             : 
    3859             : template <class T>
    3860             : template <RunOn run_on>
    3861             : BaseFab<T>&
    3862             : BaseFab<T>::plus (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept
    3863             : {
    3864             :     BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
    3865             : 
    3866             :     Array4<T> const& a = this->array();
    3867             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
    3868             :     {
    3869             :         a(i,j,k,n+dcomp.i) += val;
    3870             :     });
    3871             : 
    3872             :     return *this;
    3873             : }
    3874             : 
    3875             : template <class T>
    3876             : template <RunOn run_on>
    3877             : BaseFab<T>&
    3878             : BaseFab<T>::plus (const BaseFab<T>& src) noexcept
    3879             : {
    3880             :     return this->plus<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
    3881             : }
    3882             : 
    3883             : template <class T>
    3884             : template <RunOn run_on>
    3885             : BaseFab<T>&
    3886             : BaseFab<T>::operator+= (const BaseFab<T>& src) noexcept
    3887             : {
    3888             :     return this->plus<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
    3889             : }
    3890             : 
    3891             : template <class T>
    3892             : template <RunOn run_on>
    3893             : BaseFab<T>&
    3894           0 : BaseFab<T>::plus (const BaseFab<T>& src, Box bx,
    3895             :                   SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
    3896             : {
    3897             :     AMREX_ASSERT(this->domain.sameType(src.domain));
    3898             :     AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
    3899             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
    3900             : 
    3901           0 :     bx &= src.domain;
    3902             : 
    3903           0 :     Array4<T> const& d = this->array();
    3904           0 :     Array4<T const> const& s = src.const_array();
    3905           0 :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
    3906             :     {
    3907             :         d(i,j,k,n+dcomp.i) += s(i,j,k,n+scomp.i);
    3908             :     });
    3909             : 
    3910           0 :     return *this;
    3911             : }
    3912             : 
    3913             : template <class T>
    3914             : template <RunOn run_on>
    3915             : BaseFab<T>&
    3916             : BaseFab<T>::minus (T const& val) noexcept
    3917             : {
    3918             :     return this->minus<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
    3919             : }
    3920             : 
    3921             : template <class T>
    3922             : template <RunOn run_on>
    3923             : BaseFab<T>&
    3924             : BaseFab<T>::operator-= (T const& val) noexcept
    3925             : {
    3926             :     return this->minus<run_on>(val, this->domain,  DestComp{0}, NumComps{this->nvar});
    3927             : }
    3928             : 
    3929             : template <class T>
    3930             : template <RunOn run_on>
    3931             : BaseFab<T>&
    3932             : BaseFab<T>::minus (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept
    3933             : {
    3934             :     BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
    3935             : 
    3936             :     Array4<T> const& a = this->array();
    3937             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
    3938             :     {
    3939             :         a(i,j,k,n+dcomp.i) -= val;
    3940             :     });
    3941             : 
    3942             :     return *this;
    3943             : }
    3944             : 
    3945             : template <class T>
    3946             : template <RunOn run_on>
    3947             : BaseFab<T>&
    3948             : BaseFab<T>::minus (const BaseFab<T>& src) noexcept
    3949             : {
    3950             :     return this->minus<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
    3951             : }
    3952             : 
    3953             : template <class T>
    3954             : template <RunOn run_on>
    3955             : BaseFab<T>&
    3956             : BaseFab<T>::operator-= (const BaseFab<T>& src) noexcept
    3957             : {
    3958             :     return this->minus<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
    3959             : }
    3960             : 
    3961             : template <class T>
    3962             : template <RunOn run_on>
    3963             : BaseFab<T>&
    3964             : BaseFab<T>::minus (const BaseFab<T>& src, Box bx,
    3965             :                    SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
    3966             : {
    3967             :     AMREX_ASSERT(this->domain.sameType(src.domain));
    3968             :     AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
    3969             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
    3970             : 
    3971             :     bx &= src.domain;
    3972             : 
    3973             :     Array4<T> const& d = this->array();
    3974             :     Array4<T const> const& s = src.const_array();
    3975             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
    3976             :     {
    3977             :         d(i,j,k,n+dcomp.i) -= s(i,j,k,n+scomp.i);
    3978             :     });
    3979             : 
    3980             :     return *this;
    3981             : }
    3982             : 
    3983             : template <class T>
    3984             : template <RunOn run_on>
    3985             : BaseFab<T>&
    3986             : BaseFab<T>::mult (T const& val) noexcept
    3987             : {
    3988             :     return this->mult<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
    3989             : }
    3990             : 
    3991             : template <class T>
    3992             : template <RunOn run_on>
    3993             : BaseFab<T>&
    3994             : BaseFab<T>::operator*= (T const& val) noexcept
    3995             : {
    3996             :     return this->mult<run_on>(val, this->domain,  DestComp{0}, NumComps{this->nvar});
    3997             : }
    3998             : 
    3999             : template <class T>
    4000             : template <RunOn run_on>
    4001             : BaseFab<T>&
    4002             : BaseFab<T>::mult (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept
    4003             : {
    4004             :     BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
    4005             : 
    4006             :     Array4<T> const& a = this->array();
    4007             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
    4008             :     {
    4009             :         a(i,j,k,n+dcomp.i) *= val;
    4010             :     });
    4011             : 
    4012             :     return *this;
    4013             : }
    4014             : 
    4015             : template <class T>
    4016             : template <RunOn run_on>
    4017             : BaseFab<T>&
    4018             : BaseFab<T>::mult (const BaseFab<T>& src) noexcept
    4019             : {
    4020             :     return this->mult<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
    4021             : }
    4022             : 
    4023             : template <class T>
    4024             : template <RunOn run_on>
    4025             : BaseFab<T>&
    4026             : BaseFab<T>::operator*= (const BaseFab<T>& src) noexcept
    4027             : {
    4028             :     return this->mult<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
    4029             : }
    4030             : 
    4031             : template <class T>
    4032             : template <RunOn run_on>
    4033             : BaseFab<T>&
    4034           0 : BaseFab<T>::mult (const BaseFab<T>& src, Box bx,
    4035             :                   SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
    4036             : {
    4037             :     AMREX_ASSERT(this->domain.sameType(src.domain));
    4038             :     AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
    4039             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
    4040             : 
    4041           0 :     bx &= src.domain;
    4042             : 
    4043           0 :     Array4<T> const& d = this->array();
    4044           0 :     Array4<T const> const& s = src.const_array();
    4045           0 :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
    4046             :     {
    4047             :         d(i,j,k,n+dcomp.i) *= s(i,j,k,n+scomp.i);
    4048             :     });
    4049             : 
    4050           0 :     return *this;
    4051             : }
    4052             : 
    4053             : template <class T>
    4054             : template <RunOn run_on>
    4055             : BaseFab<T>&
    4056             : BaseFab<T>::divide (T const& val) noexcept
    4057             : {
    4058             :     return this->divide<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
    4059             : }
    4060             : 
    4061             : template <class T>
    4062             : template <RunOn run_on>
    4063             : BaseFab<T>&
    4064             : BaseFab<T>::operator/= (T const& val) noexcept
    4065             : {
    4066             :     return this->divide<run_on>(val, this->domain,  DestComp{0}, NumComps{this->nvar});
    4067             : }
    4068             : 
    4069             : template <class T>
    4070             : template <RunOn run_on>
    4071             : BaseFab<T>&
    4072             : BaseFab<T>::divide (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept
    4073             : {
    4074             :     BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
    4075             : 
    4076             :     Array4<T> const& a = this->array();
    4077             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
    4078             :     {
    4079             :         a(i,j,k,n+dcomp.i) /= val;
    4080             :     });
    4081             : 
    4082             :     return *this;
    4083             : }
    4084             : 
    4085             : template <class T>
    4086             : template <RunOn run_on>
    4087             : BaseFab<T>&
    4088             : BaseFab<T>::divide (const BaseFab<T>& src) noexcept
    4089             : {
    4090             :     return this->divide<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
    4091             : }
    4092             : 
    4093             : template <class T>
    4094             : template <RunOn run_on>
    4095             : BaseFab<T>&
    4096             : BaseFab<T>::operator/= (const BaseFab<T>& src) noexcept
    4097             : {
    4098             :     return this->divide<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
    4099             : }
    4100             : 
    4101             : template <class T>
    4102             : template <RunOn run_on>
    4103             : BaseFab<T>&
    4104             : BaseFab<T>::divide (const BaseFab<T>& src, Box bx,
    4105             :                     SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
    4106             : {
    4107             :     AMREX_ASSERT(this->domain.sameType(src.domain));
    4108             :     AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
    4109             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
    4110             : 
    4111             :     bx &= src.domain;
    4112             : 
    4113             :     Array4<T> const& d = this->array();
    4114             :     Array4<T const> const& s = src.const_array();
    4115             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
    4116             :     {
    4117             :         d(i,j,k,n+dcomp.i) /= s(i,j,k,n+scomp.i);
    4118             :     });
    4119             : 
    4120             :     return *this;
    4121             : }
    4122             : 
    4123             : template <class T>
    4124             : template <RunOn run_on>
    4125             : BaseFab<T>&
    4126             : BaseFab<T>::negate () noexcept
    4127             : {
    4128             :     return this->negate<run_on>(this->domain, DestComp{0}, NumComps{this->nvar});
    4129             : }
    4130             : 
    4131             : template <class T>
    4132             : template <RunOn run_on>
    4133             : BaseFab<T>&
    4134             : BaseFab<T>::negate (const Box& bx, DestComp dcomp, NumComps ncomp) noexcept
    4135             : {
    4136             :     BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
    4137             : 
    4138             :     Array4<T> const& a = this->array();
    4139             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
    4140             :     {
    4141             :         a(i,j,k,n+dcomp.i) = -a(i,j,k,n+dcomp.i);
    4142             :     });
    4143             : 
    4144             :     return *this;
    4145             : }
    4146             : 
    4147             : template <class T>
    4148             : template <RunOn run_on>
    4149             : BaseFab<T>&
    4150             : BaseFab<T>::invert (T const& r) noexcept
    4151             : {
    4152             :     return this->invert<run_on>(r, this->domain, DestComp{0}, NumComps{this->nvar});
    4153             : }
    4154             : 
    4155             : template <class T>
    4156             : template <RunOn run_on>
    4157             : BaseFab<T>&
    4158             : BaseFab<T>::invert (T const& r, const Box& bx, DestComp dcomp, NumComps ncomp) noexcept
    4159             : {
    4160             :     BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
    4161             : 
    4162             :     Array4<T> const& a = this->array();
    4163             :     AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
    4164             :     {
    4165             :         a(i,j,k,n+dcomp.i) = r / a(i,j,k,n+dcomp.i);
    4166             :     });
    4167             : 
    4168             :     return *this;
    4169             : }
    4170             : 
    4171             : template <class T>
    4172             : template <RunOn run_on>
    4173             : T
    4174             : BaseFab<T>::sum (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept
    4175             : {
    4176             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
    4177             : 
    4178             :     T r = 0;
    4179             :     Array4<T const> const& a = this->const_array();
    4180             : #ifdef AMREX_USE_GPU
    4181             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    4182             :         ReduceOps<ReduceOpSum> reduce_op;
    4183             :         ReduceData<T> reduce_data(reduce_op);
    4184             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    4185             :         reduce_op.eval(bx, reduce_data,
    4186             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    4187             :         {
    4188             :             T t = 0;
    4189             :             for (int n = 0; n < ncomp.n; ++n) {
    4190             :                 t += a(i,j,k,n+dcomp.i);
    4191             :             }
    4192             :             return { t };
    4193             :         });
    4194             :         ReduceTuple hv = reduce_data.value(reduce_op);
    4195             :         r = amrex::get<0>(hv);
    4196             :     } else
    4197             : #endif
    4198             :     {
    4199             :         amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
    4200             :         {
    4201             :             r += a(i,j,k,n+dcomp.i);
    4202             :         });
    4203             :     }
    4204             : 
    4205             :     return r;
    4206             : }
    4207             : 
    4208             : template <class T>
    4209             : template <RunOn run_on>
    4210             : T
    4211             : BaseFab<T>::dot (const BaseFab<T>& src, const Box& bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) const noexcept
    4212             : {
    4213             :     AMREX_ASSERT(this->domain.sameType(src.domain));
    4214             :     AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
    4215             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
    4216             : 
    4217             :     T r = 0;
    4218             :     Array4<T const> const& d = this->const_array();
    4219             :     Array4<T const> const& s = src.const_array();
    4220             : #ifdef AMREX_USE_GPU
    4221             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    4222             :         ReduceOps<ReduceOpSum> reduce_op;
    4223             :         ReduceData<T> reduce_data(reduce_op);
    4224             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    4225             :         reduce_op.eval(bx, reduce_data,
    4226             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    4227             :         {
    4228             :             T t = 0;
    4229             :             for (int n = 0; n < ncomp.n; ++n) {
    4230             :                 t += d(i,j,k,n+dcomp.i) * s(i,j,k,n+scomp.i);
    4231             :             }
    4232             :             return { t };
    4233             :         });
    4234             :         ReduceTuple hv = reduce_data.value(reduce_op);
    4235             :         r = amrex::get<0>(hv);
    4236             :     } else
    4237             : #endif
    4238             :     {
    4239             :         amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
    4240             :         {
    4241             :             r += d(i,j,k,n+dcomp.i) * s(i,j,k,n+scomp.i);
    4242             :         });
    4243             :     }
    4244             : 
    4245             :     return r;
    4246             : }
    4247             : 
    4248             : template <class T>
    4249             : template <RunOn run_on>
    4250             : T
    4251             : BaseFab<T>::dot (const Box& bx, int destcomp, int numcomp) const noexcept
    4252             : {
    4253             :     return dot<run_on>(bx, DestComp{destcomp}, NumComps{numcomp});
    4254             : }
    4255             : 
    4256             : 
    4257             : template <class T>
    4258             : template <RunOn run_on>
    4259             : T
    4260             : BaseFab<T>::dot (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept
    4261             : {
    4262             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
    4263             : 
    4264             :     T r = 0;
    4265             :     Array4<T const> const& a = this->const_array();
    4266             : #ifdef AMREX_USE_GPU
    4267             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    4268             :         ReduceOps<ReduceOpSum> reduce_op;
    4269             :         ReduceData<T> reduce_data(reduce_op);
    4270             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    4271             :         reduce_op.eval(bx, reduce_data,
    4272             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    4273             :         {
    4274             :             T t = 0;
    4275             :             for (int n = 0; n < ncomp.n; ++n) {
    4276             :                 t += a(i,j,k,n+dcomp.i)*a(i,j,k,n+dcomp.i);
    4277             :             }
    4278             :             return { t };
    4279             :         });
    4280             :         ReduceTuple hv = reduce_data.value(reduce_op);
    4281             :         r = amrex::get<0>(hv);
    4282             :     } else
    4283             : #endif
    4284             :     {
    4285             :         amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
    4286             :         {
    4287             :             r += a(i,j,k,n+dcomp.i)*a(i,j,k,n+dcomp.i);
    4288             :         });
    4289             :     }
    4290             : 
    4291             :     return r;
    4292             : }
    4293             : 
    4294             : template <class T>
    4295             : template <RunOn run_on>
    4296             : T
    4297             : BaseFab<T>::dotmask (const BaseFab<T>& src, const Box& bx, const BaseFab<int>& mask,
    4298             :                      SrcComp scomp, DestComp dcomp, NumComps ncomp) const noexcept
    4299             : {
    4300             :     AMREX_ASSERT(this->domain.sameType(src.domain));
    4301             :     AMREX_ASSERT(this->domain.sameType(mask.domain));
    4302             :     AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
    4303             :     AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
    4304             : 
    4305             :     T r = 0;
    4306             :     Array4<T const> const& d = this->const_array();
    4307             :     Array4<T const> const& s = src.const_array();
    4308             :     Array4<int const> const& m = mask.const_array();
    4309             : #ifdef AMREX_USE_GPU
    4310             :     if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
    4311             :         ReduceOps<ReduceOpSum> reduce_op;
    4312             :         ReduceData<T> reduce_data(reduce_op);
    4313             :         using ReduceTuple = typename decltype(reduce_data)::Type;
    4314             :         reduce_op.eval(bx, reduce_data,
    4315             :         [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
    4316             :         {
    4317             :             T t = 0;
    4318             :             T mi = static_cast<T>(static_cast<int>(static_cast<bool>(m(i,j,k))));
    4319             :             for (int n = 0; n < ncomp.n; ++n) {
    4320             :                 t += d(i,j,k,n+dcomp.i)*s(i,j,k,n+scomp.i)*mi;
    4321             :             }
    4322             :             return { t };
    4323             :         });
    4324             :         ReduceTuple hv = reduce_data.value(reduce_op);
    4325             :         r = amrex::get<0>(hv);
    4326             :     } else
    4327             : #endif
    4328             :     {
    4329             :         amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
    4330             :         {
    4331             :             int mi = static_cast<int>(static_cast<bool>(m(i,j,k)));
    4332             :             r += d(i,j,k,n+dcomp.i)*s(i,j,k,n+scomp.i)*mi;
    4333             :         });
    4334             :     }
    4335             : 
    4336             :     return r;
    4337             : }
    4338             : 
    4339             : }
    4340             : 
    4341             : #endif /*BL_BASEFAB_H*/

Generated by: LCOV version 1.14