Line data Source code
1 : #ifndef AMREX_BASEFAB_H_
2 : #define AMREX_BASEFAB_H_
3 : #include <AMReX_Config.H>
4 :
5 : #include <AMReX_Algorithm.H>
6 : #include <AMReX_Extension.H>
7 : #include <AMReX_BLassert.H>
8 : #include <AMReX_Array.H>
9 : #include <AMReX_Box.H>
10 : #include <AMReX_Loop.H>
11 : #include <AMReX_BoxList.H>
12 : #include <AMReX_BArena.H>
13 : #include <AMReX_CArena.H>
14 : #include <AMReX_DataAllocator.H>
15 : #include <AMReX_REAL.H>
16 : #include <AMReX_BLProfiler.H>
17 : #include <AMReX_BoxIterator.H>
18 : #include <AMReX_MakeType.H>
19 : #include <AMReX_Utility.H>
20 : #include <AMReX_Reduce.H>
21 : #include <AMReX_Gpu.H>
22 : #include <AMReX_Scan.H>
23 : #include <AMReX_Math.H>
24 : #include <AMReX_OpenMP.H>
25 : #include <AMReX_MemPool.H>
26 :
27 : #include <cmath>
28 : #include <cstdlib>
29 : #include <algorithm>
30 : #include <limits>
31 : #include <climits>
32 : #include <array>
33 : #include <type_traits>
34 : #include <memory>
35 : #include <atomic>
36 : #include <utility>
37 :
38 : namespace amrex
39 : {
40 :
41 : extern std::atomic<Long> atomic_total_bytes_allocated_in_fabs;
42 : extern std::atomic<Long> atomic_total_bytes_allocated_in_fabs_hwm;
43 : extern std::atomic<Long> atomic_total_cells_allocated_in_fabs;
44 : extern std::atomic<Long> atomic_total_cells_allocated_in_fabs_hwm;
45 : extern Long private_total_bytes_allocated_in_fabs; //!< total bytes at any given time
46 : extern Long private_total_bytes_allocated_in_fabs_hwm; //!< high-water-mark over a given interval
47 : extern Long private_total_cells_allocated_in_fabs; //!< total cells at any given time
48 : extern Long private_total_cells_allocated_in_fabs_hwm; //!< high-water-mark over a given interval
49 : #ifdef AMREX_USE_OMP
50 : #pragma omp threadprivate(private_total_bytes_allocated_in_fabs)
51 : #pragma omp threadprivate(private_total_bytes_allocated_in_fabs_hwm)
52 : #pragma omp threadprivate(private_total_cells_allocated_in_fabs)
53 : #pragma omp threadprivate(private_total_cells_allocated_in_fabs_hwm)
54 : #endif
55 :
56 : Long TotalBytesAllocatedInFabs () noexcept;
57 : Long TotalBytesAllocatedInFabsHWM () noexcept;
58 : Long TotalCellsAllocatedInFabs () noexcept;
59 : Long TotalCellsAllocatedInFabsHWM () noexcept;
60 : void ResetTotalBytesAllocatedInFabsHWM () noexcept;
61 : void update_fab_stats (Long n, Long s, std::size_t szt) noexcept;
62 :
63 : void BaseFab_Initialize ();
64 : void BaseFab_Finalize ();
65 :
66 : struct SrcComp {
67 : AMREX_GPU_HOST_DEVICE
68 0 : explicit SrcComp (int ai) noexcept : i(ai) {}
69 : int i;
70 : };
71 :
72 : struct DestComp {
73 : AMREX_GPU_HOST_DEVICE
74 4986 : explicit DestComp (int ai) noexcept : i(ai) {}
75 : int i;
76 : };
77 :
78 : struct NumComps {
79 : AMREX_GPU_HOST_DEVICE
80 4986 : explicit NumComps (int an) noexcept : n(an) {}
81 : int n;
82 : };
83 :
84 : template <typename T>
85 : AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
86 : Array4<T>
87 : makeArray4 (T* p, Box const& bx, int ncomp) noexcept
88 : {
89 9288024 : return Array4<T>{p, amrex::begin(bx), amrex::end(bx), ncomp};
90 : }
91 :
92 : template <typename T>
93 : std::enable_if_t<std::is_arithmetic_v<T>>
94 : placementNew (T* const /*ptr*/, Long /*n*/)
95 : {}
96 :
97 : template <typename T>
98 : std::enable_if_t<std::is_trivially_default_constructible_v<T>
99 : && !std::is_arithmetic_v<T>>
100 : placementNew (T* const ptr, Long n)
101 : {
102 : for (Long i = 0; i < n; ++i) {
103 : new (ptr+i) T;
104 : }
105 : }
106 :
107 : template <typename T>
108 : std::enable_if_t<!std::is_trivially_default_constructible_v<T>>
109 3315 : placementNew (T* const ptr, Long n)
110 : {
111 1788150 : AMREX_HOST_DEVICE_FOR_1D ( n, i,
112 : {
113 : new (ptr+i) T;
114 : });
115 3315 : }
116 :
117 : template <typename T>
118 : std::enable_if_t<std::is_trivially_destructible_v<T>>
119 409453 : placementDelete (T* const /*ptr*/, Long /*n*/)
120 409453 : {}
121 :
122 : template <typename T>
123 : std::enable_if_t<!std::is_trivially_destructible_v<T>>
124 3 : placementDelete (T* const ptr, Long n)
125 : {
126 2190 : AMREX_HOST_DEVICE_FOR_1D (n, i,
127 : {
128 : (ptr+i)->~T();
129 : });
130 3 : }
131 :
132 : /**
133 : * \brief A FortranArrayBox(FAB)-like object
134 : *
135 : * BaseFab emulates the Fortran array concept.
136 : * Useful operations can be performed upon
137 : * BaseFabs in C++, and they provide a convenient interface to
138 : * Fortran when it is necessary to retreat into that language.
139 : *
140 : * BaseFab is a template class. Through use of the
141 : * template, a BaseFab may be based upon any class. So far at least,
142 : * most applications have been based upon simple types like integers,
143 : * real*4s, or real*8s. Most applications do not use BaseFabs
144 : * directly, but utilize specialized classes derived from BaseFab.
145 : *
146 : * Classes derived from BaseFab include FArrayBox, IArrayBox, TagBox,
147 : * Mask, EBFArrayBox, EBCellFlag and CutFab.
148 : *
149 : * BaseFab objects depend on the dimensionality of space
150 : * (indirectly through the DOMAIN Box member). It is
151 : * typical to define the macro SPACEDIM to be 1, 2, or 3 to indicate
152 : * the dimension of space. See the discussion of class Box for more
153 : * information. A BaseFab contains a Box DOMAIN, which indicates the
154 : * integer indexing space over which the array is defined. A BaseFab
155 : * also has NVAR components. By components, we mean that for each
156 : * point in the rectangular indexing space, there are NVAR values
157 : * associated with that point. A Fortran array corresponding to a
158 : * BaseFab would have (SPACEDIM+1) dimensions.
159 : *
160 : * By design, the array layout in a BaseFab mirrors that of a
161 : * Fortran array. The first index (x direction for example) varies
162 : * most rapidly, the next index (y direction), if any, varies next
163 : * fastest. The component index varies last, after all the spatial
164 : * indices.
165 : *
166 : * It is sometimes convenient to be able to treat a sub-array within an
167 : * existing BaseFab as a BaseFab in its own right. This is often
168 : * referred to as aliasing the BaseFab. Note that when aliasing is
169 : * used, the BaseFabs domain will not, in general, be the same as the
170 : * parent BaseFabs domain, nor will the number of components.
171 : * BaseFab is a dimension dependent class, so SPACEDIM must be
172 : * defined as either 1, 2, or 3 when compiling.
173 : *
174 : * This is NOT a polymorphic class.
175 : *
176 : * It does NOT provide a copy constructor or assignment operator.
177 : *
178 : * \tparam T MUST have a default constructor and an assignment operator.
179 : */
180 : template <class T>
181 : class BaseFab
182 : : public DataAllocator
183 : {
184 : public:
185 :
186 : template <class U> friend class BaseFab;
187 :
188 : using value_type = T;
189 :
190 : //! Construct an empty BaseFab, which must be resized (see BaseFab::resize) before use.
191 : BaseFab () noexcept = default;
192 :
193 : explicit BaseFab (Arena* ar) noexcept;
194 :
195 : BaseFab (const Box& bx, int n, Arena* ar);
196 :
197 : //! Make BaseFab with desired domain (box) and number of components.
198 : explicit BaseFab (const Box& bx, int n = 1, bool alloc = true,
199 : bool shared = false, Arena* ar = nullptr);
200 :
201 : BaseFab (const BaseFab<T>& rhs, MakeType make_type, int scomp, int ncomp);
202 :
203 : /**
204 : * \brief Create an NON-OWNING BaseFab. Thus BaseFab is not
205 : * responsible for memory management. And it's caller's responsibility that
206 : * p points to a chunk of memory large enough.
207 : */
208 : BaseFab (const Box& bx, int ncomp, T* p);
209 : BaseFab (const Box& bx, int ncomp, T const* p);
210 :
211 : explicit BaseFab (Array4<T> const& a) noexcept;
212 :
213 : explicit BaseFab (Array4<T> const& a, IndexType t) noexcept;
214 :
215 : explicit BaseFab (Array4<T const> const& a) noexcept;
216 :
217 : explicit BaseFab (Array4<T const> const& a, IndexType t) noexcept;
218 :
219 : //! The destructor deletes the array memory.
220 : virtual ~BaseFab () noexcept;
221 :
222 : BaseFab (const BaseFab<T>& rhs) = delete;
223 : BaseFab<T>& operator= (const BaseFab<T>& rhs) = delete;
224 :
225 : BaseFab (BaseFab<T>&& rhs) noexcept;
226 : BaseFab<T>& operator= (BaseFab<T>&& rhs) noexcept;
227 :
228 : #if defined(AMREX_USE_GPU)
229 : template <RunOn run_on>
230 : #else
231 : template <RunOn run_on=RunOn::Host>
232 : #endif
233 : BaseFab& operator= (T const&) noexcept;
234 :
235 : static void Initialize();
236 : static void Finalize();
237 :
238 : /**
239 : * \brief This function resizes a BaseFab so it covers the Box b
240 : * with N components.
241 :
242 : * The default action is that under resizing, the memory allocated for the
243 : * BaseFab only grows and never shrinks. This function is
244 : * particularly useful when a BaseFab is used as a temporary
245 : * space which must be a different size whenever it is used.
246 : * Resizing is typically faster than re-allocating a
247 : * BaseFab because memory allocation can often be avoided.
248 : * If a nullptr is provided as Arena*, the Arena already in BaseFab will
249 : * be used. Otherwise, the Arena argument will be used.
250 : */
251 : void resize (const Box& b, int N = 1, Arena* ar = nullptr);
252 :
253 : template <class U=T, std::enable_if_t<std::is_trivially_destructible_v<U>,int> = 0>
254 : [[nodiscard]] Elixir elixir () noexcept;
255 :
256 : /**
257 : * \brief The function returns the BaseFab to the invalid state.
258 : * The memory is freed.
259 : */
260 : void clear () noexcept;
261 :
262 : //! Release ownership of memory
263 : [[nodiscard]] std::unique_ptr<T,DataDeleter> release () noexcept;
264 :
265 : //! Returns how many bytes used
266 811212 : [[nodiscard]] std::size_t nBytes () const noexcept { return this->truesize*sizeof(T); }
267 :
268 811512 : [[nodiscard]] std::size_t nBytesOwned () const noexcept {
269 811512 : return (this->ptr_owner) ? nBytes() : 0;
270 : }
271 :
272 : //! Returns bytes used in the Box for those components
273 : [[nodiscard]] std::size_t nBytes (const Box& bx, int ncomps) const noexcept
274 : { return bx.numPts() * sizeof(T) * ncomps; }
275 :
276 : //! Returns the number of components
277 14628 : [[nodiscard]] int nComp () const noexcept { return this->nvar; }
278 :
279 : //! for calls to fortran.
280 : [[nodiscard]] const int* nCompPtr() const noexcept {
281 : return &(this->nvar);
282 : }
283 :
284 : //! Returns the number of points
285 0 : [[nodiscard]] Long numPts () const noexcept { return this->domain.numPts(); }
286 :
287 : //! Returns the total number of points of all components
288 : [[nodiscard]] Long size () const noexcept { return this->nvar*this->domain.numPts(); }
289 :
290 : //! Returns the domain (box) where the array is defined
291 648 : [[nodiscard]] const Box& box () const noexcept { return this->domain; }
292 :
293 : /**
294 : * \brief Returns a pointer to an array of SPACEDIM integers
295 : * giving the length of the domain in each direction
296 : */
297 : [[nodiscard]] IntVect length () const noexcept { return this->domain.length(); }
298 :
299 : /**
300 : * \brief Returns the lower corner of the domain
301 : * See class Box for analogue.
302 : */
303 : [[nodiscard]] const IntVect& smallEnd () const noexcept { return this->domain.smallEnd(); }
304 :
305 : //! Returns the upper corner of the domain. See class Box for analogue.
306 : [[nodiscard]] const IntVect& bigEnd () const noexcept { return this->domain.bigEnd(); }
307 :
308 : /**
309 : * \brief Returns the lower corner of the domain.
310 :
311 : *Instead of returning them in the form of INTVECTs, as in smallEnd and
312 : * bigEnd, it returns the values as a pointer to an array of
313 : * constant integers. This is useful when interfacing to
314 : * Fortran subroutines.
315 : */
316 : [[nodiscard]] const int* loVect () const noexcept { return this->domain.loVect(); }
317 :
318 : /**
319 : * \brief Returns the upper corner of the domain.
320 :
321 : *Instead of returning them in the form of INTVECTs, as in smallEnd and
322 : * bigEnd, it returns the values as a pointer to an array of
323 : * constant integers. This is useful when interfacing to
324 : * Fortran subroutines.
325 : */
326 : [[nodiscard]] const int* hiVect () const noexcept { return this->domain.hiVect(); }
327 :
328 : /**
329 : * \brief Returns true if the domain of fab is totally contained within
330 : * the domain of this BaseFab.
331 : */
332 : [[nodiscard]] bool contains (const BaseFab<T>& fab) const noexcept
333 : {
334 : return box().contains(fab.box()) && this->nvar <= fab.nvar;
335 : }
336 :
337 : /**
338 : * \brief Returns true if bx is totally contained
339 : * within the domain of this BaseFab.
340 : */
341 : [[nodiscard]] bool contains (const Box& bx) const noexcept { return box().contains(bx); }
342 :
343 : /**
344 : * \brief Returns a pointer to an object of type T that is the
345 : * value of the Nth component associated with the cell at the
346 : * low end of the domain. This is commonly used to get a pointer
347 : * to data in the array which is then handed off to a Fortran
348 : * subroutine. Remember that data is stored in Fortran array
349 : * order, with the component index coming last. In other words,
350 : * dataPtr returns a pointer to all the Nth components.
351 : */
352 365725 : [[nodiscard]] T* dataPtr (int n = 0) noexcept {
353 365725 : if (this->dptr) {
354 365725 : return &(this->dptr[n*this->domain.numPts()]);
355 : } else {
356 0 : return nullptr;
357 : }
358 : }
359 :
360 : //! Same as above except works on const FABs.
361 365725 : [[nodiscard]] const T* dataPtr (int n = 0) const noexcept {
362 365725 : if (this->dptr) {
363 365725 : return &(this->dptr[n*this->domain.numPts()]);
364 : } else {
365 0 : return nullptr;
366 : }
367 : }
368 :
369 : [[nodiscard]] T* dataPtr (const IntVect& p, int n = 0) noexcept;
370 :
371 : [[nodiscard]] const T* dataPtr (const IntVect& p, int n = 0) const noexcept;
372 :
373 : void setPtr (T* p, Long sz) noexcept { AMREX_ASSERT(this->dptr == nullptr && this->truesize == 0); this->dptr = p; this->truesize = sz; }
374 :
375 : void prefetchToHost () const noexcept;
376 : void prefetchToDevice () const noexcept;
377 :
378 : [[nodiscard]] AMREX_FORCE_INLINE
379 : Array4<T const> array () const noexcept
380 : {
381 0 : return makeArray4<T const>(this->dptr, this->domain, this->nvar);
382 : }
383 :
384 : [[nodiscard]] AMREX_FORCE_INLINE
385 : Array4<T const> array (int start_comp) const noexcept
386 : {
387 : return Array4<T const>(makeArray4<T const>(this->dptr, this->domain, this->nvar),start_comp);
388 : }
389 :
390 : [[nodiscard]] AMREX_FORCE_INLINE
391 : Array4<T const> array (int start_comp, int num_comps) const noexcept
392 : {
393 : return Array4<T const>(makeArray4<T const>(this->dptr, this->domain, this->nvar), start_comp, num_comps);
394 : }
395 :
396 : [[nodiscard]] AMREX_FORCE_INLINE
397 : Array4<T> array () noexcept
398 : {
399 5391752 : return makeArray4<T>(this->dptr, this->domain, this->nvar);
400 : }
401 :
402 : [[nodiscard]] AMREX_FORCE_INLINE
403 : Array4<T> array (int start_comp) noexcept
404 : {
405 : return Array4<T>(makeArray4<T>(this->dptr, this->domain, this->nvar),start_comp);
406 : }
407 :
408 : [[nodiscard]] AMREX_FORCE_INLINE
409 : Array4<T> array (int start_comp, int num_comps) noexcept
410 : {
411 : return Array4<T>(makeArray4<T>(this->dptr, this->domain, this->nvar), start_comp, num_comps);
412 : }
413 :
414 : [[nodiscard]] AMREX_FORCE_INLINE
415 : Array4<T const> const_array () const noexcept
416 : {
417 3854400 : return makeArray4<T const>(this->dptr, this->domain, this->nvar);
418 : }
419 :
420 : [[nodiscard]] AMREX_FORCE_INLINE
421 : Array4<T const> const_array (int start_comp) const noexcept
422 : {
423 : return Array4<T const>(makeArray4<T const>(this->dptr, this->domain, this->nvar),start_comp);
424 : }
425 :
426 : [[nodiscard]] AMREX_FORCE_INLINE
427 : Array4<T const> const_array (int start_comp, int num_comps) const noexcept
428 : {
429 : return Array4<T const>(makeArray4<T const>(this->dptr, this->domain, this->nvar), start_comp, num_comps);
430 : }
431 :
432 : //! Returns true if the data for the FAB has been allocated.
433 : [[nodiscard]] bool isAllocated () const noexcept { return this->dptr != nullptr; }
434 :
435 : /**
436 : * \brief Returns a reference to the Nth component value
437 : * defined at position p in the domain. This operator may be
438 : * inefficient if the C++ compiler is unable to optimize the
439 : * C++ code.
440 : */
441 : [[nodiscard]] T& operator() (const IntVect& p, int N) noexcept;
442 :
443 : //! Same as above, except returns component 0.
444 : [[nodiscard]] T& operator() (const IntVect& p) noexcept;
445 :
446 : //! Same as above except works on const FABs.
447 : [[nodiscard]] const T& operator() (const IntVect& p, int N) const noexcept;
448 :
449 : //! Same as above, except returns component 0.
450 : [[nodiscard]] const T& operator() (const IntVect& p) const noexcept;
451 :
452 : /**
453 : * \brief This function puts numcomp component values, starting at
454 : * component N, from position pos in the domain into array data,
455 : * that must be allocated by the user.
456 : */
457 : void getVal (T* data, const IntVect& pos, int N, int numcomp) const noexcept;
458 : //! Same as above, except that starts at component 0 and copies all comps.
459 : void getVal (T* data, const IntVect& pos) const noexcept;
460 :
461 : #if defined(AMREX_USE_GPU)
462 : template <RunOn run_on,
463 : #else
464 : template <RunOn run_on=RunOn::Host,
465 : #endif
466 : class U=T, std::enable_if_t<std::is_same_v<U,float> || std::is_same_v<U,double>,int> FOO = 0>
467 : void fill_snan () noexcept;
468 :
469 : /**
470 : * \brief The setVal functions set sub-regions in the BaseFab to a
471 : * constant value. This most general form specifies the sub-box,
472 : * the starting component number, and the number of components
473 : * to be set.
474 : */
475 : #if defined(AMREX_USE_GPU)
476 : template <RunOn run_on>
477 : #else
478 : template <RunOn run_on=RunOn::Host>
479 : #endif
480 : void setVal (T const& x, const Box& bx, int dcomp, int ncomp) noexcept;
481 : //! Same as above, except the number of modified components is one. N is the component to be modified.
482 : #if defined(AMREX_USE_GPU)
483 : template <RunOn run_on>
484 : #else
485 : template <RunOn run_on=RunOn::Host>
486 : #endif
487 : void setVal (T const& x, const Box& bx, int N = 0) noexcept;
488 : //! Same as above, except the sub-box defaults to the entire domain.
489 : #if defined(AMREX_USE_GPU)
490 : template <RunOn run_on>
491 : #else
492 : template <RunOn run_on=RunOn::Host>
493 : #endif
494 : void setVal (T const& x, int N) noexcept;
495 :
496 : #if defined(AMREX_USE_GPU)
497 : template <RunOn run_on>
498 : #else
499 : template <RunOn run_on=RunOn::Host>
500 : #endif
501 : void setValIfNot (T const& val, const Box& bx, const BaseFab<int>& mask, int nstart, int num) noexcept;
502 :
503 : /**
504 : * \brief This function is analogous to the fourth form of
505 : * setVal above, except that instead of setting values on the
506 : * Box b, values are set on the complement of b in the domain.
507 : */
508 : #if defined(AMREX_USE_GPU)
509 : template <RunOn run_on>
510 : #else
511 : template <RunOn run_on=RunOn::Host>
512 : #endif
513 : void setComplement (T const& x, const Box& b, int ns, int num) noexcept;
514 :
515 : /**
516 : * \brief The copy functions copy the contents of one BaseFab into
517 : * another. The destination BaseFab is always the object which
518 : * invokes the function. This, the most general form of copy,
519 : * specifies the contents of any sub-box srcbox in BaseFab src
520 : * may be copied into a (possibly different) destbox in the
521 : * destination BaseFab. Note that although the srcbox and the
522 : * destbox may be disjoint, they must be the same size and shape.
523 : * If the sizes differ, the copy is undefined and a runtime error
524 : * results. This copy function is the only one of the copy
525 : * functions to allow a copy between differing boxes. The user
526 : * also specifies how many components are copied, starting at
527 : * component srccomp in src and stored starting at component
528 : * destcomp. The results are UNDEFINED if the src and dest are the
529 : * same and the srcbox and destbox overlap.
530 : */
531 : #if defined(AMREX_USE_GPU)
532 : template <RunOn run_on>
533 : #else
534 : template <RunOn run_on=RunOn::Host>
535 : #endif
536 : BaseFab<T>& copy (const BaseFab<T>& src, const Box& srcbox, int srccomp,
537 : const Box& destbox, int destcomp, int numcomp) noexcept;
538 :
539 : /**
540 : * \brief As above, except the destination Box and the source Box
541 : * are taken to be the entire domain of the destination. A copy
542 : * of the intersecting region is performed.
543 : * class.
544 : */
545 : #if defined(AMREX_USE_GPU)
546 : template <RunOn run_on>
547 : #else
548 : template <RunOn run_on=RunOn::Host>
549 : #endif
550 : BaseFab<T>& copy (const BaseFab<T>& src, int srccomp, int destcomp,
551 : int numcomp = 1) noexcept;
552 : /**
553 : * \brief As above, except that the destination Box is specified,
554 : * but the source Box is taken to the equal to the destination
555 : * Box, and all components of the destination BaseFab are
556 : * copied.
557 : */
558 : #if defined(AMREX_USE_GPU)
559 : template <RunOn run_on>
560 : #else
561 : template <RunOn run_on=RunOn::Host>
562 : #endif
563 : BaseFab<T>& copy (const BaseFab<T>& src, const Box& destbox) noexcept;
564 :
565 : //! Copy from the srcbox of this Fab to raw memory and return the number of bytes copied
566 : #if defined(AMREX_USE_GPU)
567 : template <RunOn run_on>
568 : #else
569 : template <RunOn run_on=RunOn::Host>
570 : #endif
571 : std::size_t copyToMem (const Box& srcbox, int srccomp,
572 : int numcomp, void* dst) const noexcept;
573 :
574 : //! Copy from raw memory to the dstbox of this Fab and return the number of bytes copied
575 : #if defined(AMREX_USE_GPU)
576 : template <RunOn run_on, typename BUF = T>
577 : #else
578 : template <RunOn run_on=RunOn::Host, typename BUF = T>
579 : #endif
580 : std::size_t copyFromMem (const Box& dstbox, int dstcomp,
581 : int numcomp, const void* src) noexcept;
582 :
583 : //! Add from raw memory to the dstbox of this Fab and return the number of bytes copied
584 : #if defined(AMREX_USE_GPU)
585 : template <RunOn run_on, typename BUF = T>
586 : #else
587 : template <RunOn run_on=RunOn::Host, typename BUF = T>
588 : #endif
589 : std::size_t addFromMem (const Box& dstbox, int dstcomp,
590 : int numcomp, const void* src) noexcept;
591 :
592 : /**
593 : * \brief Perform shifts upon the domain of the BaseFab. They are
594 : * completely analogous to the corresponding Box functions.
595 : * There is no effect upon the array memory.
596 : */
597 : BaseFab<T>& shift (const IntVect& v) noexcept;
598 : /**
599 : * \brief Perform shifts upon the domain of the BaseFab. They are
600 : * completely analogous to the corresponding Box functions.
601 : * There is no effect upon the array memory.
602 : */
603 : BaseFab<T>& shift (int idir, int n_cell) noexcept;
604 : /**
605 : * \brief Perform shifts upon the domain of the BaseFab. They are
606 : * completely analogous to the corresponding Box functions.
607 : * There is no effect upon the array memory.
608 : */
609 : BaseFab<T>& shiftHalf (int dir, int n_cell) noexcept;
610 : /**
611 : * \brief Perform shifts upon the domain of the BaseFab. They are
612 : * completely analogous to the corresponding Box functions.
613 : * There is no effect upon the array memory.
614 : */
615 : BaseFab<T>& shiftHalf (const IntVect& v) noexcept;
616 :
617 : #if defined(AMREX_USE_GPU)
618 : template <RunOn run_on>
619 : #else
620 : template <RunOn run_on=RunOn::Host>
621 : #endif
622 : [[nodiscard]] Real norminfmask (const Box& subbox, const BaseFab<int>& mask, int scomp=0, int ncomp=1) const noexcept;
623 :
624 : /**
625 : * \brief Compute the Lp-norm of this FAB using components (scomp : scomp+ncomp-1).
626 : * p < 0 -> ERROR
627 : * p = 0 -> infinity norm (max norm)
628 : * p = 1 -> sum of ABS(FAB)
629 : */
630 : #if defined(AMREX_USE_GPU)
631 : template <RunOn run_on>
632 : #else
633 : template <RunOn run_on=RunOn::Host>
634 : #endif
635 : [[nodiscard]] Real norm (int p, int scomp = 0, int numcomp = 1) const noexcept;
636 :
637 : //! Same as above except only on given subbox.
638 : #if defined(AMREX_USE_GPU)
639 : template <RunOn run_on>
640 : #else
641 : template <RunOn run_on=RunOn::Host>
642 : #endif
643 : [[nodiscard]] Real norm (const Box& subbox, int p, int scomp = 0, int numcomp = 1) const noexcept;
644 : //!Compute absolute value for all components of this FAB.
645 : #if defined(AMREX_USE_GPU)
646 : template <RunOn run_on>
647 : #else
648 : template <RunOn run_on=RunOn::Host>
649 : #endif
650 : void abs () noexcept;
651 : //! Same as above except only for components (comp: comp+numcomp-1)
652 : #if defined(AMREX_USE_GPU)
653 : template <RunOn run_on>
654 : #else
655 : template <RunOn run_on=RunOn::Host>
656 : #endif
657 : void abs (int comp, int numcomp=1) noexcept;
658 : /**
659 : * \brief Calculate abs() on subbox for given component range.
660 : */
661 : #if defined(AMREX_USE_GPU)
662 : template <RunOn run_on>
663 : #else
664 : template <RunOn run_on=RunOn::Host>
665 : #endif
666 : void abs (const Box& subbox, int comp = 0, int numcomp=1) noexcept;
667 : /**
668 : * \return Minimum value of given component.
669 : */
670 : #if defined(AMREX_USE_GPU)
671 : template <RunOn run_on>
672 : #else
673 : template <RunOn run_on=RunOn::Host>
674 : #endif
675 : [[nodiscard]] T min (int comp = 0) const noexcept;
676 : /**
677 : * \return Minimum value of given component in given subbox.
678 : */
679 : #if defined(AMREX_USE_GPU)
680 : template <RunOn run_on>
681 : #else
682 : template <RunOn run_on=RunOn::Host>
683 : #endif
684 : [[nodiscard]] T min (const Box& subbox, int comp = 0) const noexcept;
685 : /**
686 : * \return Maximum value of given component.
687 : */
688 : #if defined(AMREX_USE_GPU)
689 : template <RunOn run_on>
690 : #else
691 : template <RunOn run_on=RunOn::Host>
692 : #endif
693 : [[nodiscard]] T max (int comp = 0) const noexcept;
694 : /**
695 : * \return Maximum value of given component in given subbox.
696 : */
697 : #if defined(AMREX_USE_GPU)
698 : template <RunOn run_on>
699 : #else
700 : template <RunOn run_on=RunOn::Host>
701 : #endif
702 : [[nodiscard]] T max (const Box& subbox, int comp = 0) const noexcept;
703 : /**
704 : * \return Minimum and Maximum of given component
705 : */
706 : #if defined(AMREX_USE_GPU)
707 : template <RunOn run_on>
708 : #else
709 : template <RunOn run_on=RunOn::Host>
710 : #endif
711 : [[nodiscard]] std::pair<T,T> minmax (int comp = 0) const noexcept;
712 : /**
713 : * \return Minimum and Maximum of given component in given subbox.
714 : */
715 : #if defined(AMREX_USE_GPU)
716 : template <RunOn run_on>
717 : #else
718 : template <RunOn run_on=RunOn::Host>
719 : #endif
720 : [[nodiscard]] std::pair<T,T> minmax (const Box& subbox, int comp = 0) const noexcept;
721 : /**
722 : * \return Maximum of the absolute value of given component.
723 : */
724 : #if defined(AMREX_USE_GPU)
725 : template <RunOn run_on>
726 : #else
727 : template <RunOn run_on=RunOn::Host>
728 : #endif
729 : [[nodiscard]] T maxabs (int comp = 0) const noexcept;
730 : /**
731 : * \return Maximum of the absolute value of given component in given subbox.
732 : */
733 : #if defined(AMREX_USE_GPU)
734 : template <RunOn run_on>
735 : #else
736 : template <RunOn run_on=RunOn::Host>
737 : #endif
738 : [[nodiscard]] T maxabs (const Box& subbox, int comp = 0) const noexcept;
739 :
740 : /*(
741 : * \return location of given value
742 : */
743 : #if defined(AMREX_USE_GPU)
744 : template <RunOn run_on>
745 : #else
746 : template <RunOn run_on=RunOn::Host>
747 : #endif
748 : [[nodiscard]] IntVect indexFromValue (const Box& subbox, int comp, T const& value) const noexcept;
749 :
750 : /**
751 : * \return location of minimum value in given component.
752 : */
753 : #if defined(AMREX_USE_GPU)
754 : template <RunOn run_on>
755 : #else
756 : template <RunOn run_on=RunOn::Host>
757 : #endif
758 : [[nodiscard]] IntVect minIndex (int comp = 0) const noexcept;
759 : /**
760 : * \return location of minimum value in given component in
761 : * given subbox.
762 : */
763 : #if defined(AMREX_USE_GPU)
764 : template <RunOn run_on>
765 : #else
766 : template <RunOn run_on=RunOn::Host>
767 : #endif
768 : [[nodiscard]] IntVect minIndex (const Box& subbox, int comp = 0) const noexcept;
769 : /**
770 : * \return return minimum value and location to allow
771 : * efficient looping over multiple boxes.
772 : */
773 : #if defined(AMREX_USE_GPU)
774 : template <RunOn run_on>
775 : #else
776 : template <RunOn run_on=RunOn::Host>
777 : #endif
778 : void minIndex (const Box& subbox, Real& min_val, IntVect& min_idx, int comp = 0) const noexcept;
779 :
780 : /**
781 : * \return location of maximum value in given component.
782 : */
783 : #if defined(AMREX_USE_GPU)
784 : template <RunOn run_on>
785 : #else
786 : template <RunOn run_on=RunOn::Host>
787 : #endif
788 : [[nodiscard]] IntVect maxIndex (int comp = 0) const noexcept;
789 : /**
790 : * \return location of maximum value in given component in given
791 : * subbox.
792 : */
793 : #if defined(AMREX_USE_GPU)
794 : template <RunOn run_on>
795 : #else
796 : template <RunOn run_on=RunOn::Host>
797 : #endif
798 : [[nodiscard]] IntVect maxIndex (const Box& subbox, int comp = 0) const noexcept;
799 : /**
800 : * \return return maximum value and location to allow
801 : * efficient looping over multiple boxes.
802 : */
803 : #if defined(AMREX_USE_GPU)
804 : template <RunOn run_on>
805 : #else
806 : template <RunOn run_on=RunOn::Host>
807 : #endif
808 : void maxIndex (const Box& subbox, Real& max_value, IntVect& max_idx, int comp = 0) const noexcept;
809 :
810 : /**
811 : * \brief Compute mask array with value of 1 in cells where
812 : * BaseFab has value less than val, 0 otherwise.
813 : * mask is resized by this function.
814 : * The number of cells marked with 1 returned.
815 : */
816 : #if defined(AMREX_USE_GPU)
817 : template <RunOn run_on>
818 : #else
819 : template <RunOn run_on=RunOn::Host>
820 : #endif
821 : int maskLT (BaseFab<int>& mask, T const& val, int comp = 0) const noexcept;
822 : //! Same as above except mark cells with value less than or equal to val.
823 : #if defined(AMREX_USE_GPU)
824 : template <RunOn run_on>
825 : #else
826 : template <RunOn run_on=RunOn::Host>
827 : #endif
828 : int maskLE (BaseFab<int>& mask, T const& val, int comp = 0) const noexcept;
829 :
830 : //! Same as above except mark cells with value equal to val.
831 : #if defined(AMREX_USE_GPU)
832 : template <RunOn run_on>
833 : #else
834 : template <RunOn run_on=RunOn::Host>
835 : #endif
836 : int maskEQ (BaseFab<int>& mask, T const& val, int comp = 0) const noexcept;
837 : //! Same as above except mark cells with value greater than val.
838 : #if defined(AMREX_USE_GPU)
839 : template <RunOn run_on>
840 : #else
841 : template <RunOn run_on=RunOn::Host>
842 : #endif
843 : int maskGT (BaseFab<int>& mask, T const& val, int comp = 0) const noexcept;
844 : //! Same as above except mark cells with value greater than or equal to val.
845 : #if defined(AMREX_USE_GPU)
846 : template <RunOn run_on>
847 : #else
848 : template <RunOn run_on=RunOn::Host>
849 : #endif
850 : int maskGE (BaseFab<int>& mask, T const& val, int comp = 0) const noexcept;
851 :
852 : //! Returns sum of given component of FAB state vector.
853 : #if defined(AMREX_USE_GPU)
854 : template <RunOn run_on>
855 : #else
856 : template <RunOn run_on=RunOn::Host>
857 : #endif
858 : [[nodiscard]] T sum (int comp, int numcomp = 1) const noexcept;
859 : //! Compute sum of given component of FAB state vector in given subbox.
860 : #if defined(AMREX_USE_GPU)
861 : template <RunOn run_on>
862 : #else
863 : template <RunOn run_on=RunOn::Host>
864 : #endif
865 : [[nodiscard]] T sum (const Box& subbox, int comp, int numcomp = 1) const noexcept;
866 :
867 : //! Most general version, specify subbox and which components.
868 : #if defined(AMREX_USE_GPU)
869 : template <RunOn run_on>
870 : #else
871 : template <RunOn run_on=RunOn::Host>
872 : #endif
873 : BaseFab<T>& invert (T const& r, const Box& b, int comp=0, int numcomp=1) noexcept;
874 : //! As above except on entire domain.
875 : #if defined(AMREX_USE_GPU)
876 : template <RunOn run_on>
877 : #else
878 : template <RunOn run_on=RunOn::Host>
879 : #endif
880 : BaseFab<T>& invert (T const& r, int comp, int numcomp=1) noexcept;
881 :
882 : //! Negate BaseFab, most general.
883 : #if defined(AMREX_USE_GPU)
884 : template <RunOn run_on>
885 : #else
886 : template <RunOn run_on=RunOn::Host>
887 : #endif
888 : BaseFab<T>& negate (const Box& b, int comp=0, int numcomp=1) noexcept;
889 : //! As above, except on entire domain.
890 : #if defined(AMREX_USE_GPU)
891 : template <RunOn run_on>
892 : #else
893 : template <RunOn run_on=RunOn::Host>
894 : #endif
895 : BaseFab<T>& negate (int comp, int numcomp=1) noexcept;
896 :
897 : //! Scalar addition (a[i] <- a[i] + r), most general.
898 : #if defined(AMREX_USE_GPU)
899 : template <RunOn run_on>
900 : #else
901 : template <RunOn run_on=RunOn::Host>
902 : #endif
903 : BaseFab<T>& plus (T const& r, const Box& b, int comp=0, int numcomp=1) noexcept;
904 :
905 : //! As above, except on entire domain.
906 : #if defined(AMREX_USE_GPU)
907 : template <RunOn run_on>
908 : #else
909 : template <RunOn run_on=RunOn::Host>
910 : #endif
911 : BaseFab<T>& plus (T const& r, int comp, int numcomp=1) noexcept;
912 :
913 : /**
914 : * \brief Add src components (srccomp:srccomp+numcomp-1) to
915 : * this FABs components (destcomp:destcomp+numcomp-1)
916 : * where the two FABs intersect.
917 : */
918 : #if defined(AMREX_USE_GPU)
919 : template <RunOn run_on>
920 : #else
921 : template <RunOn run_on=RunOn::Host>
922 : #endif
923 : BaseFab<T>& plus (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
924 : /**
925 : * \brief Same as above except addition is restricted to intersection
926 : * of subbox and src FAB. NOTE: subbox must be contained in this
927 : * FAB.
928 : */
929 : #if defined(AMREX_USE_GPU)
930 : template <RunOn run_on>
931 : #else
932 : template <RunOn run_on=RunOn::Host>
933 : #endif
934 : BaseFab<T>& plus (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp, int numcomp=1) noexcept;
935 : /**
936 : * \brief Add srcbox region of src FAB to destbox region of this FAB.
937 : * The srcbox and destbox must be same size.
938 : */
939 : #if defined(AMREX_USE_GPU)
940 : template <RunOn run_on>
941 : #else
942 : template <RunOn run_on=RunOn::Host>
943 : #endif
944 : BaseFab<T>& plus (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
945 : int srccomp, int destcomp, int numcomp=1) noexcept;
946 :
947 : //! Atomic FAB addition (a[i] <- a[i] + b[i]).
948 : #if defined(AMREX_USE_GPU)
949 : template <RunOn run_on>
950 : #else
951 : template <RunOn run_on=RunOn::Host>
952 : #endif
953 : BaseFab<T>& atomicAdd (const BaseFab<T>& x) noexcept;
954 :
955 : /**
956 : * \brief Atomically add src components (srccomp:srccomp+numcomp-1) to
957 : * this FABs components (destcomp:destcomp+numcomp-1)
958 : * where the two FABs intersect.
959 : */
960 : #if defined(AMREX_USE_GPU)
961 : template <RunOn run_on>
962 : #else
963 : template <RunOn run_on=RunOn::Host>
964 : #endif
965 : BaseFab<T>& atomicAdd (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
966 : /**
967 : * \brief Same as above except addition is restricted to intersection
968 : * of subbox and src FAB. NOTE: subbox must be contained in this
969 : * FAB.
970 : */
971 : #if defined(AMREX_USE_GPU)
972 : template <RunOn run_on>
973 : #else
974 : template <RunOn run_on=RunOn::Host>
975 : #endif
976 : BaseFab<T>& atomicAdd (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
977 : int numcomp=1) noexcept;
978 : /**
979 : * \brief Atomically add srcbox region of src FAB to destbox region of this FAB.
980 : * The srcbox and destbox must be same size.
981 : */
982 : #if defined(AMREX_USE_GPU)
983 : template <RunOn run_on>
984 : #else
985 : template <RunOn run_on=RunOn::Host>
986 : #endif
987 : BaseFab<T>& atomicAdd (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
988 : int srccomp, int destcomp, int numcomp=1) noexcept;
989 :
990 : /**
991 : * \brief Atomically add srcbox region of src FAB to destbox region of this FAB.
992 : * The srcbox and destbox must be same size. When OMP is on, this uses OMP locks
993 : * in the implementation and it's usually faster than atomicAdd.
994 : */
995 : #if defined(AMREX_USE_GPU)
996 : template <RunOn run_on>
997 : #else
998 : template <RunOn run_on=RunOn::Host>
999 : #endif
1000 : BaseFab<T>& lockAdd (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
1001 : int srccomp, int destcomp, int numcomp) noexcept;
1002 :
1003 : //! FAB SAXPY (y[i] <- y[i] + a * x[i]), in place.
1004 : #if defined(AMREX_USE_GPU)
1005 : template <RunOn run_on>
1006 : #else
1007 : template <RunOn run_on=RunOn::Host>
1008 : #endif
1009 : BaseFab<T>& saxpy (T a, const BaseFab<T>& x, const Box& srcbox, const Box& destbox,
1010 : int srccomp, int destcomp, int numcomp=1) noexcept;
1011 : //! FAB SAXPY (y[i] <- y[i] + a * x[i]), in place. All components.
1012 : #if defined(AMREX_USE_GPU)
1013 : template <RunOn run_on>
1014 : #else
1015 : template <RunOn run_on=RunOn::Host>
1016 : #endif
1017 : BaseFab<T>& saxpy (T a, const BaseFab<T>& x) noexcept;
1018 :
1019 : //! FAB XPAY (y[i] <- x[i] + a * y[i])
1020 : #if defined(AMREX_USE_GPU)
1021 : template <RunOn run_on>
1022 : #else
1023 : template <RunOn run_on=RunOn::Host>
1024 : #endif
1025 : BaseFab<T>& xpay (T a, const BaseFab<T>& x, const Box& srcbox, const Box& destbox,
1026 : int srccomp, int destcomp, int numcomp=1) noexcept;
1027 :
1028 : //! y[i] <- y[i] + x1[i] * x2[i])
1029 : #if defined(AMREX_USE_GPU)
1030 : template <RunOn run_on>
1031 : #else
1032 : template <RunOn run_on=RunOn::Host>
1033 : #endif
1034 : BaseFab<T>& addproduct (const Box& destbox, int destcomp, int numcomp,
1035 : const BaseFab<T>& src1, int comp1,
1036 : const BaseFab<T>& src2, int comp2) noexcept;
1037 :
1038 : /**
1039 : * \brief Subtract src components (srccomp:srccomp+numcomp-1) to
1040 : * this FABs components (destcomp:destcomp+numcomp-1) where
1041 : * the two FABs intersect.
1042 : */
1043 : #if defined(AMREX_USE_GPU)
1044 : template <RunOn run_on>
1045 : #else
1046 : template <RunOn run_on=RunOn::Host>
1047 : #endif
1048 : BaseFab<T>& minus (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
1049 : /**
1050 : * \brief Same as above except subtraction is restricted to intersection
1051 : * of subbox and src FAB. NOTE: subbox must be contained in
1052 : * this FAB.
1053 : */
1054 : #if defined(AMREX_USE_GPU)
1055 : template <RunOn run_on>
1056 : #else
1057 : template <RunOn run_on=RunOn::Host>
1058 : #endif
1059 : BaseFab<T>& minus (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
1060 : int numcomp=1) noexcept;
1061 : /**
1062 : * \brief Subtract srcbox region of src FAB from destbox region
1063 : * of this FAB. srcbox and destbox must be same size.
1064 : */
1065 : #if defined(AMREX_USE_GPU)
1066 : template <RunOn run_on>
1067 : #else
1068 : template <RunOn run_on=RunOn::Host>
1069 : #endif
1070 : BaseFab<T>& minus (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
1071 : int srccomp, int destcomp, int numcomp=1) noexcept;
1072 :
1073 : //! Scalar multiplication, except control which components are multiplied.
1074 : #if defined(AMREX_USE_GPU)
1075 : template <RunOn run_on>
1076 : #else
1077 : template <RunOn run_on=RunOn::Host>
1078 : #endif
1079 : BaseFab<T>& mult (T const& r, int comp, int numcomp=1) noexcept;
1080 : /**
1081 : * \brief As above, except specify sub-box.
1082 : */
1083 : #if defined(AMREX_USE_GPU)
1084 : template <RunOn run_on>
1085 : #else
1086 : template <RunOn run_on=RunOn::Host>
1087 : #endif
1088 : BaseFab<T>& mult (T const& r, const Box& b, int comp=0, int numcomp=1) noexcept;
1089 :
1090 : /**
1091 : * \brief Multiply src components (srccomp:srccomp+numcomp-1) with
1092 : * this FABs components (destcomp:destcomp+numcomp-1) where
1093 : * the two FABs intersect.
1094 : */
1095 : #if defined(AMREX_USE_GPU)
1096 : template <RunOn run_on>
1097 : #else
1098 : template <RunOn run_on=RunOn::Host>
1099 : #endif
1100 : BaseFab<T>& mult (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
1101 :
1102 : /**
1103 : * \brief Same as above except multiplication is restricted to
1104 : * intersection of subbox and src FAB. NOTE: subbox must be
1105 : * contained in this FAB.
1106 : */
1107 : #if defined(AMREX_USE_GPU)
1108 : template <RunOn run_on>
1109 : #else
1110 : template <RunOn run_on=RunOn::Host>
1111 : #endif
1112 : BaseFab<T>& mult (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
1113 : int numcomp=1) noexcept;
1114 :
1115 : /**
1116 : * \brief Multiply srcbox region of src FAB with destbox region
1117 : * of this FAB. The srcbox and destbox must be same size.
1118 : */
1119 : #if defined(AMREX_USE_GPU)
1120 : template <RunOn run_on>
1121 : #else
1122 : template <RunOn run_on=RunOn::Host>
1123 : #endif
1124 : BaseFab<T>& mult (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
1125 : int srccomp, int destcomp, int numcomp=1) noexcept;
1126 :
1127 : //! As above except specify which components.
1128 : #if defined(AMREX_USE_GPU)
1129 : template <RunOn run_on>
1130 : #else
1131 : template <RunOn run_on=RunOn::Host>
1132 : #endif
1133 : BaseFab<T>& divide (T const& r, int comp, int numcomp=1) noexcept;
1134 :
1135 : //! As above except specify sub-box.
1136 : #if defined(AMREX_USE_GPU)
1137 : template <RunOn run_on>
1138 : #else
1139 : template <RunOn run_on=RunOn::Host>
1140 : #endif
1141 : BaseFab<T>& divide (T const& r, const Box& b, int comp=0, int numcomp=1) noexcept;
1142 :
1143 : /**
1144 : * \brief This FAB is numerator, src FAB is denominator
1145 : * divide src components (srccomp:srccomp+numcomp-1) into
1146 : * this FABs components (destcomp:destcomp+numcomp-1)
1147 : * where the two FABs intersect.
1148 : */
1149 : #if defined(AMREX_USE_GPU)
1150 : template <RunOn run_on>
1151 : #else
1152 : template <RunOn run_on=RunOn::Host>
1153 : #endif
1154 : BaseFab<T>& divide (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
1155 : /**
1156 : * \brief Same as above except division is restricted to
1157 : * intersection of subbox and src FAB. NOTE: subbox must be
1158 : * contained in this FAB.
1159 : */
1160 : #if defined(AMREX_USE_GPU)
1161 : template <RunOn run_on>
1162 : #else
1163 : template <RunOn run_on=RunOn::Host>
1164 : #endif
1165 : BaseFab<T>& divide (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
1166 : int numcomp=1) noexcept;
1167 : /**
1168 : * \brief destbox region of this FAB is numerator. srcbox regions of
1169 : * src FAB is denominator. srcbox and destbox must be same size.
1170 : */
1171 : #if defined(AMREX_USE_GPU)
1172 : template <RunOn run_on>
1173 : #else
1174 : template <RunOn run_on=RunOn::Host>
1175 : #endif
1176 : BaseFab<T>& divide (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
1177 : int srccomp, int destcomp, int numcomp=1) noexcept;
1178 : /**
1179 : * \brief Divide wherever "src" is "true" or "non-zero".
1180 : */
1181 : #if defined(AMREX_USE_GPU)
1182 : template <RunOn run_on>
1183 : #else
1184 : template <RunOn run_on=RunOn::Host>
1185 : #endif
1186 : BaseFab<T>& protected_divide (const BaseFab<T>& src) noexcept;
1187 :
1188 : /**
1189 : * \brief Divide wherever "src" is "true" or "non-zero".
1190 : * This FAB is numerator, src FAB is denominator
1191 : * divide src components (srccomp:srccomp+numcomp-1) into
1192 : * this FABs components (destcomp:destcomp+numcomp-1)
1193 : * where the two FABs intersect.
1194 : */
1195 : #if defined(AMREX_USE_GPU)
1196 : template <RunOn run_on>
1197 : #else
1198 : template <RunOn run_on=RunOn::Host>
1199 : #endif
1200 : BaseFab<T>& protected_divide (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp=1) noexcept;
1201 :
1202 : /**
1203 : * \brief Divide wherever "src" is "true" or "non-zero".
1204 : * Same as above except division is restricted to
1205 : * intersection of subbox and src FAB. NOTE: subbox must be
1206 : * contained in this FAB.
1207 : */
1208 : #if defined(AMREX_USE_GPU)
1209 : template <RunOn run_on>
1210 : #else
1211 : template <RunOn run_on=RunOn::Host>
1212 : #endif
1213 : BaseFab<T>& protected_divide (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
1214 : int numcomp=1) noexcept;
1215 :
1216 : /**
1217 : * Divide wherever "src" is "true" or "non-zero".
1218 : * destbox region of this FAB is numerator. srcbox regions of
1219 : * src FAB is denominator. srcbox and destbox must be same size.
1220 : */
1221 : #if defined(AMREX_USE_GPU)
1222 : template <RunOn run_on>
1223 : #else
1224 : template <RunOn run_on=RunOn::Host>
1225 : #endif
1226 : BaseFab<T>& protected_divide (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
1227 : int srccomp, int destcomp, int numcomp=1) noexcept;
1228 :
1229 : /**
1230 : * \brief Linear interpolation / extrapolation.
1231 : * Result is (t2-t)/(t2-t1)*f1 + (t-t1)/(t2-t1)*f2
1232 : * Data is taken from b1 region of f1, b2 region of f2
1233 : * and stored in b region of this FAB.
1234 : * Boxes b, b1 and b2 must be the same size.
1235 : * Data is taken from component comp1 of f1, comp2 of f2,
1236 : * and stored in component comp of this FAB.
1237 : * This FAB is returned as a reference for chaining.
1238 : */
1239 : #if defined(AMREX_USE_GPU)
1240 : template <RunOn run_on>
1241 : #else
1242 : template <RunOn run_on=RunOn::Host>
1243 : #endif
1244 : BaseFab<T>& linInterp (const BaseFab<T>& f1, const Box& b1, int comp1,
1245 : const BaseFab<T>& f2, const Box& b2, int comp2,
1246 : Real t1, Real t2, Real t,
1247 : const Box& b, int comp, int numcomp = 1) noexcept;
1248 :
1249 : //! Version of linInterp() in which b, b1, & b2 are the same.
1250 : #if defined(AMREX_USE_GPU)
1251 : template <RunOn run_on>
1252 : #else
1253 : template <RunOn run_on=RunOn::Host>
1254 : #endif
1255 : BaseFab<T>& linInterp (const BaseFab<T>& f1, int comp1,
1256 : const BaseFab<T>& f2, int comp2,
1257 : Real t1, Real t2, Real t,
1258 : const Box& b, int comp, int numcomp = 1) noexcept;
1259 :
1260 : /**
1261 : * \brief Linear combination. Result is alpha*f1 + beta*f2.
1262 : * Data is taken from b1 region of f1, b2 region of f2
1263 : * and stored in b region of this FAB.
1264 : * Boxes b, b1 and b2 must be the same size.
1265 : * Data is taken from component comp1 of f1, comp2 of f2,
1266 : * and stored in component comp of this FAB.
1267 : * This FAB is returned as a reference for chaining.
1268 : */
1269 : #if defined(AMREX_USE_GPU)
1270 : template <RunOn run_on>
1271 : #else
1272 : template <RunOn run_on=RunOn::Host>
1273 : #endif
1274 : BaseFab<T>& linComb (const BaseFab<T>& f1, const Box& b1, int comp1,
1275 : const BaseFab<T>& f2, const Box& b2, int comp2,
1276 : Real alpha, Real beta, const Box& b,
1277 : int comp, int numcomp = 1) noexcept;
1278 :
1279 : //! Dot product of x (i.e.,this) and y
1280 : #if defined(AMREX_USE_GPU)
1281 : template <RunOn run_on>
1282 : #else
1283 : template <RunOn run_on=RunOn::Host>
1284 : #endif
1285 : [[nodiscard]] T dot (const Box& xbx, int xcomp, const BaseFab<T>& y, const Box& ybx, int ycomp,
1286 : int numcomp = 1) const noexcept;
1287 :
1288 : #if defined(AMREX_USE_GPU)
1289 : template <RunOn run_on>
1290 : #else
1291 : template <RunOn run_on=RunOn::Host>
1292 : #endif
1293 : [[nodiscard]] T dotmask (const BaseFab<int>& mask, const Box& xbx, int xcomp,
1294 : const BaseFab<T>& y, const Box& ybx, int ycomp,
1295 : int numcomp) const noexcept;
1296 :
1297 : //! Change the Box type without change the length
1298 : void SetBoxType (const IndexType& typ) noexcept { this->domain.setType(typ); }
1299 :
1300 : //
1301 : // New interfaces
1302 : //
1303 :
1304 : //! Set value on the whole domain and all components
1305 : #if defined(AMREX_USE_GPU)
1306 : template <RunOn run_on>
1307 : #else
1308 : template <RunOn run_on=RunOn::Host>
1309 : #endif
1310 : void setVal (T const& val) noexcept;
1311 : //
1312 : //! Do nothing if bx is empty.
1313 : #if defined(AMREX_USE_GPU)
1314 : template <RunOn run_on>
1315 : #else
1316 : template <RunOn run_on=RunOn::Host>
1317 : #endif
1318 : void setVal (T const& x, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
1319 :
1320 : #if defined(AMREX_USE_GPU)
1321 : template <RunOn run_on>
1322 : #else
1323 : template <RunOn run_on=RunOn::Host>
1324 : #endif
1325 : void setValIf (T const& val, const BaseFab<int>& mask) noexcept;
1326 : //
1327 : //! Do nothing if bx is empty.
1328 : #if defined(AMREX_USE_GPU)
1329 : template <RunOn run_on>
1330 : #else
1331 : template <RunOn run_on=RunOn::Host>
1332 : #endif
1333 : void setValIf (T const& val, Box const& bx, const BaseFab<int>& mask, DestComp dcomp, NumComps ncomp) noexcept;
1334 :
1335 : #if defined(AMREX_USE_GPU)
1336 : template <RunOn run_on>
1337 : #else
1338 : template <RunOn run_on=RunOn::Host>
1339 : #endif
1340 : void setValIfNot (T const& val, const BaseFab<int>& mask) noexcept;
1341 : //
1342 : //! Do nothing if bx is empty.
1343 : #if defined(AMREX_USE_GPU)
1344 : template <RunOn run_on>
1345 : #else
1346 : template <RunOn run_on=RunOn::Host>
1347 : #endif
1348 : void setValIfNot (T const& val, Box const& bx, const BaseFab<int>& mask, DestComp dcomp, NumComps ncomp) noexcept;
1349 :
1350 : //! setVal on the complement of bx in the fab's domain
1351 : #if defined(AMREX_USE_GPU)
1352 : template <RunOn run_on>
1353 : #else
1354 : template <RunOn run_on=RunOn::Host>
1355 : #endif
1356 : void setComplement (T const& x, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
1357 :
1358 : /**
1359 : * copy is performed on the intersection of dest and src fabs.
1360 : * All components of dest fab are copied. src fab must have enough
1361 : * components (more is OK).
1362 : */
1363 : #if defined(AMREX_USE_GPU)
1364 : template <RunOn run_on>
1365 : #else
1366 : template <RunOn run_on=RunOn::Host>
1367 : #endif
1368 : BaseFab<T>& copy (const BaseFab<T>& src) noexcept;
1369 : //
1370 : //! Do nothing if bx does not intersect with src fab.
1371 : #if defined(AMREX_USE_GPU)
1372 : template <RunOn run_on>
1373 : #else
1374 : template <RunOn run_on=RunOn::Host>
1375 : #endif
1376 : BaseFab<T>& copy (const BaseFab<T>& src, Box bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept;
1377 :
1378 : //! Scalar addition on the whole domain and all components
1379 : #if defined(AMREX_USE_GPU)
1380 : template <RunOn run_on>
1381 : #else
1382 : template <RunOn run_on=RunOn::Host>
1383 : #endif
1384 : BaseFab<T>& plus (T const& val) noexcept;
1385 : //
1386 : #if defined(AMREX_USE_GPU)
1387 : template <RunOn run_on>
1388 : #else
1389 : template <RunOn run_on=RunOn::Host>
1390 : #endif
1391 : BaseFab<T>& operator+= (T const& val) noexcept;
1392 : //
1393 : //! Do nothing if bx is empty.
1394 : #if defined(AMREX_USE_GPU)
1395 : template <RunOn run_on>
1396 : #else
1397 : template <RunOn run_on=RunOn::Host>
1398 : #endif
1399 : BaseFab<T>& plus (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
1400 : /**
1401 : * Fab addition is performed on the intersection of dest and src fabs.
1402 : * All components of dest fab are copied. src fab must have enough
1403 : * components (more is OK).
1404 : */
1405 : #if defined(AMREX_USE_GPU)
1406 : template <RunOn run_on>
1407 : #else
1408 : template <RunOn run_on=RunOn::Host>
1409 : #endif
1410 : BaseFab<T>& plus (const BaseFab<T>& src) noexcept;
1411 : //
1412 : #if defined(AMREX_USE_GPU)
1413 : template <RunOn run_on>
1414 : #else
1415 : template <RunOn run_on=RunOn::Host>
1416 : #endif
1417 : BaseFab<T>& operator+= (const BaseFab<T>& src) noexcept;
1418 : //
1419 : //! Do nothing if bx does not intersect with src fab.
1420 : #if defined(AMREX_USE_GPU)
1421 : template <RunOn run_on>
1422 : #else
1423 : template <RunOn run_on=RunOn::Host>
1424 : #endif
1425 : BaseFab<T>& plus (const BaseFab<T>& src, Box bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept;
1426 :
1427 : //! Scalar subtraction on the whole domain and all components
1428 : #if defined(AMREX_USE_GPU)
1429 : template <RunOn run_on>
1430 : #else
1431 : template <RunOn run_on=RunOn::Host>
1432 : #endif
1433 : BaseFab<T>& minus (T const& val) noexcept;
1434 : //
1435 : #if defined(AMREX_USE_GPU)
1436 : template <RunOn run_on>
1437 : #else
1438 : template <RunOn run_on=RunOn::Host>
1439 : #endif
1440 : BaseFab<T>& operator-= (T const& val) noexcept;
1441 : //
1442 : //! Do nothing if bx is empty.
1443 : #if defined(AMREX_USE_GPU)
1444 : template <RunOn run_on>
1445 : #else
1446 : template <RunOn run_on=RunOn::Host>
1447 : #endif
1448 : BaseFab<T>& minus (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
1449 : /**
1450 : * Fab subtraction is performed on the intersection of dest and src fabs.
1451 : * All components of dest fab are copied. src fab must have enough
1452 : * components (more is OK).
1453 : */
1454 : #if defined(AMREX_USE_GPU)
1455 : template <RunOn run_on>
1456 : #else
1457 : template <RunOn run_on=RunOn::Host>
1458 : #endif
1459 : BaseFab<T>& minus (const BaseFab<T>& src) noexcept;
1460 : //
1461 : #if defined(AMREX_USE_GPU)
1462 : template <RunOn run_on>
1463 : #else
1464 : template <RunOn run_on=RunOn::Host>
1465 : #endif
1466 : BaseFab<T>& operator-= (const BaseFab<T>& src) noexcept;
1467 : //
1468 : //! Do nothing if bx does not intersect with src fab.
1469 : #if defined(AMREX_USE_GPU)
1470 : template <RunOn run_on>
1471 : #else
1472 : template <RunOn run_on=RunOn::Host>
1473 : #endif
1474 : BaseFab<T>& minus (const BaseFab<T>& src, Box bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept;
1475 :
1476 : //! Scalar multiplication on the whole domain and all components
1477 : #if defined(AMREX_USE_GPU)
1478 : template <RunOn run_on>
1479 : #else
1480 : template <RunOn run_on=RunOn::Host>
1481 : #endif
1482 : BaseFab<T>& mult (T const& val) noexcept;
1483 : //
1484 : #if defined(AMREX_USE_GPU)
1485 : template <RunOn run_on>
1486 : #else
1487 : template <RunOn run_on=RunOn::Host>
1488 : #endif
1489 : BaseFab<T>& operator*= (T const& val) noexcept;
1490 : //
1491 : //! Do nothing if bx is empty.
1492 : #if defined(AMREX_USE_GPU)
1493 : template <RunOn run_on>
1494 : #else
1495 : template <RunOn run_on=RunOn::Host>
1496 : #endif
1497 : BaseFab<T>& mult (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
1498 : /**
1499 : * Fab multiplication is performed on the intersection of dest and src fabs.
1500 : * All components of dest fab are copied. src fab must have enough
1501 : * components (more is OK).
1502 : */
1503 : #if defined(AMREX_USE_GPU)
1504 : template <RunOn run_on>
1505 : #else
1506 : template <RunOn run_on=RunOn::Host>
1507 : #endif
1508 : BaseFab<T>& mult (const BaseFab<T>& src) noexcept;
1509 : //
1510 : #if defined(AMREX_USE_GPU)
1511 : template <RunOn run_on>
1512 : #else
1513 : template <RunOn run_on=RunOn::Host>
1514 : #endif
1515 : BaseFab<T>& operator*= (const BaseFab<T>& src) noexcept;
1516 : //
1517 : //! Do nothing if bx does not intersect with src fab.
1518 : #if defined(AMREX_USE_GPU)
1519 : template <RunOn run_on>
1520 : #else
1521 : template <RunOn run_on=RunOn::Host>
1522 : #endif
1523 : BaseFab<T>& mult (const BaseFab<T>& src, Box bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept;
1524 :
1525 : //! Scalar division on the whole domain and all components
1526 : #if defined(AMREX_USE_GPU)
1527 : template <RunOn run_on>
1528 : #else
1529 : template <RunOn run_on=RunOn::Host>
1530 : #endif
1531 : BaseFab<T>& divide (T const& val) noexcept;
1532 : //
1533 : #if defined(AMREX_USE_GPU)
1534 : template <RunOn run_on>
1535 : #else
1536 : template <RunOn run_on=RunOn::Host>
1537 : #endif
1538 : BaseFab<T>& operator/= (T const& val) noexcept;
1539 : //
1540 : //! Do nothing if bx is empty.
1541 : #if defined(AMREX_USE_GPU)
1542 : template <RunOn run_on>
1543 : #else
1544 : template <RunOn run_on=RunOn::Host>
1545 : #endif
1546 : BaseFab<T>& divide (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept;
1547 : /**
1548 : * Fab division is performed on the intersection of dest and src fabs.
1549 : * All components of dest fab are copied. src fab must have enough
1550 : * components (more is OK).
1551 : */
1552 : #if defined(AMREX_USE_GPU)
1553 : template <RunOn run_on>
1554 : #else
1555 : template <RunOn run_on=RunOn::Host>
1556 : #endif
1557 : BaseFab<T>& divide (const BaseFab<T>& src) noexcept;
1558 : //
1559 : #if defined(AMREX_USE_GPU)
1560 : template <RunOn run_on>
1561 : #else
1562 : template <RunOn run_on=RunOn::Host>
1563 : #endif
1564 : BaseFab<T>& operator/= (const BaseFab<T>& src) noexcept;
1565 : //
1566 : //! Do nothing if bx does not intersect with src fab.
1567 : #if defined(AMREX_USE_GPU)
1568 : template <RunOn run_on>
1569 : #else
1570 : template <RunOn run_on=RunOn::Host>
1571 : #endif
1572 : BaseFab<T>& divide (const BaseFab<T>& src, Box bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept;
1573 :
1574 : //! on the whole domain and all components
1575 : #if defined(AMREX_USE_GPU)
1576 : template <RunOn run_on>
1577 : #else
1578 : template <RunOn run_on=RunOn::Host>
1579 : #endif
1580 : BaseFab<T>& negate () noexcept;
1581 : //
1582 : #if defined(AMREX_USE_GPU)
1583 : template <RunOn run_on>
1584 : #else
1585 : template <RunOn run_on=RunOn::Host>
1586 : #endif
1587 : BaseFab<T>& negate (const Box& bx, DestComp dcomp, NumComps ncomp) noexcept;
1588 :
1589 : //! Fab <- Fab/r on the whole domain and all components
1590 : #if defined(AMREX_USE_GPU)
1591 : template <RunOn run_on>
1592 : #else
1593 : template <RunOn run_on=RunOn::Host>
1594 : #endif
1595 : BaseFab<T>& invert (T const& r) noexcept;
1596 : //
1597 : #if defined(AMREX_USE_GPU)
1598 : template <RunOn run_on>
1599 : #else
1600 : template <RunOn run_on=RunOn::Host>
1601 : #endif
1602 : BaseFab<T>& invert (T const& r, const Box& bx, DestComp dcomp, NumComps ncomp) noexcept;
1603 :
1604 : //! Sum
1605 : #if defined(AMREX_USE_GPU)
1606 : template <RunOn run_on>
1607 : #else
1608 : template <RunOn run_on=RunOn::Host>
1609 : #endif
1610 : [[nodiscard]] T sum (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept;
1611 :
1612 : //! Dot product of two Fabs
1613 : #if defined(AMREX_USE_GPU)
1614 : template <RunOn run_on>
1615 : #else
1616 : template <RunOn run_on=RunOn::Host>
1617 : #endif
1618 : [[nodiscard]] T dot (const BaseFab<T>& src, const Box& bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) const noexcept;
1619 :
1620 : //! Int wrapper for dot.
1621 : #if defined(AMREX_USE_GPU)
1622 : template <RunOn run_on>
1623 : #else
1624 : template <RunOn run_on=RunOn::Host>
1625 : #endif
1626 : [[nodiscard]] T dot (const Box& bx, int destcomp, int numcomp) const noexcept;
1627 :
1628 : //! Dot product
1629 : #if defined(AMREX_USE_GPU)
1630 : template <RunOn run_on>
1631 : #else
1632 : template <RunOn run_on=RunOn::Host>
1633 : #endif
1634 : [[nodiscard]] T dot (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept;
1635 :
1636 : //! Dot product of two Fabs with mask
1637 : #if defined(AMREX_USE_GPU)
1638 : template <RunOn run_on>
1639 : #else
1640 : template <RunOn run_on=RunOn::Host>
1641 : #endif
1642 : [[nodiscard]] T dotmask (const BaseFab<T>& src, const Box& bx, const BaseFab<int>& mask,
1643 : SrcComp scomp, DestComp dcomp, NumComps ncomp) const noexcept;
1644 :
1645 : protected:
1646 : //! Allocates memory for the BaseFab<T>.
1647 : void define ();
1648 :
1649 : T* dptr = nullptr; //!< The data pointer.
1650 : Box domain; //!< My index space.
1651 : int nvar = 0; //!< Number components.
1652 : Long truesize = 0L; //!< nvar*numpts that was allocated on heap.
1653 : bool ptr_owner = false; //!< Owner of T*?
1654 : bool shared_memory = false; //!< Is the memory allocated in shared memory?
1655 : #ifdef AMREX_USE_GPU
1656 : gpuStream_t alloc_stream{};
1657 : #endif
1658 : };
1659 :
1660 : template <class T>
1661 : AMREX_FORCE_INLINE
1662 : T*
1663 : BaseFab<T>::dataPtr (const IntVect& p, int n) noexcept
1664 : {
1665 : AMREX_ASSERT(n >= 0);
1666 : AMREX_ASSERT(n < this->nvar);
1667 : AMREX_ASSERT(!(this->dptr == nullptr));
1668 : AMREX_ASSERT(this->domain.contains(p));
1669 :
1670 : return this->dptr + (this->domain.index(p)+n*this->domain.numPts());
1671 : }
1672 :
1673 : template <class T>
1674 : AMREX_FORCE_INLINE
1675 : const T*
1676 : BaseFab<T>::dataPtr (const IntVect& p, int n) const noexcept
1677 : {
1678 : AMREX_ASSERT(n >= 0);
1679 : AMREX_ASSERT(n < this->nvar);
1680 : AMREX_ASSERT(!(this->dptr == nullptr));
1681 : AMREX_ASSERT(this->domain.contains(p));
1682 :
1683 : return this->dptr + (this->domain.index(p)+n*this->domain.numPts());
1684 : }
1685 :
1686 : template <class T>
1687 : void
1688 : BaseFab<T>::prefetchToHost () const noexcept
1689 : {
1690 : #ifdef AMREX_USE_GPU
1691 : if (this->arena()->isManaged()) {
1692 : #if defined(AMREX_USE_SYCL)
1693 : // xxxxx SYCL todo: prefetchToHost
1694 : // std::size_t s = sizeof(T)*this->nvar*this->domain.numPts();
1695 : // auto& q = Gpu::Device::streamQueue();
1696 : // q.submit([&] (sycl::handler& h) { h.prefetch(this->dptr, s); });
1697 : #elif defined(AMREX_USE_CUDA) && !defined(_WIN32)
1698 : if (Gpu::Device::devicePropMajor() >= 6) {
1699 : std::size_t s = sizeof(T)*this->nvar*this->domain.numPts();
1700 : AMREX_CUDA_SAFE_CALL(cudaMemPrefetchAsync(this->dptr, s,
1701 : cudaCpuDeviceId,
1702 : Gpu::gpuStream()));
1703 : }
1704 : #elif defined(AMREX_USE_HIP)
1705 : // xxxxx HIP FIX HERE after managed memory is supported
1706 : #endif
1707 : }
1708 : #endif
1709 : }
1710 :
1711 : template <class T>
1712 : void
1713 : BaseFab<T>::prefetchToDevice () const noexcept
1714 : {
1715 : #ifdef AMREX_USE_GPU
1716 : if (this->arena()->isManaged()) {
1717 : #if defined(AMREX_USE_SYCL)
1718 : std::size_t s = sizeof(T)*this->nvar*this->domain.numPts();
1719 : auto& q = Gpu::Device::streamQueue();
1720 : q.submit([&] (sycl::handler& h) { h.prefetch(this->dptr, s); });
1721 : #elif defined(AMREX_USE_CUDA) && !defined(_WIN32)
1722 : if (Gpu::Device::devicePropMajor() >= 6) {
1723 : std::size_t s = sizeof(T)*this->nvar*this->domain.numPts();
1724 : AMREX_CUDA_SAFE_CALL(cudaMemPrefetchAsync(this->dptr, s,
1725 : Gpu::Device::deviceId(),
1726 : Gpu::gpuStream()));
1727 : }
1728 : #elif defined(AMREX_USE_HIP)
1729 : // xxxxx HIP FIX HERE after managed memory is supported
1730 : #endif
1731 : }
1732 : #endif
1733 : }
1734 :
1735 : template <class T>
1736 : AMREX_FORCE_INLINE
1737 : T&
1738 : BaseFab<T>::operator() (const IntVect& p, int n) noexcept
1739 : {
1740 : AMREX_ASSERT(n >= 0);
1741 : AMREX_ASSERT(n < this->nvar);
1742 : AMREX_ASSERT(!(this->dptr == nullptr));
1743 : AMREX_ASSERT(this->domain.contains(p));
1744 :
1745 61983700 : return this->dptr[this->domain.index(p)+n*this->domain.numPts()];
1746 : }
1747 :
1748 : template <class T>
1749 : AMREX_FORCE_INLINE
1750 : T&
1751 : BaseFab<T>::operator() (const IntVect& p) noexcept
1752 : {
1753 : AMREX_ASSERT(!(this->dptr == nullptr));
1754 : AMREX_ASSERT(this->domain.contains(p));
1755 :
1756 : return this->dptr[this->domain.index(p)];
1757 : }
1758 :
1759 : template <class T>
1760 : AMREX_FORCE_INLINE
1761 : const T&
1762 : BaseFab<T>::operator() (const IntVect& p, int n) const noexcept
1763 : {
1764 : AMREX_ASSERT(n >= 0);
1765 : AMREX_ASSERT(n < this->nvar);
1766 : AMREX_ASSERT(!(this->dptr == nullptr));
1767 : AMREX_ASSERT(this->domain.contains(p));
1768 :
1769 85867800 : return this->dptr[this->domain.index(p)+n*this->domain.numPts()];
1770 : }
1771 :
1772 : template <class T>
1773 : AMREX_FORCE_INLINE
1774 : const T&
1775 : BaseFab<T>::operator() (const IntVect& p) const noexcept
1776 : {
1777 : AMREX_ASSERT(!(this->dptr == nullptr));
1778 : AMREX_ASSERT(this->domain.contains(p));
1779 :
1780 : return this->dptr[this->domain.index(p)];
1781 : }
1782 :
1783 : template <class T>
1784 : void
1785 : BaseFab<T>::getVal (T* data,
1786 : const IntVect& pos,
1787 : int n,
1788 : int numcomp) const noexcept
1789 : {
1790 : const int loc = this->domain.index(pos);
1791 : const Long sz = this->domain.numPts();
1792 :
1793 : AMREX_ASSERT(!(this->dptr == nullptr));
1794 : AMREX_ASSERT(n >= 0 && n + numcomp <= this->nvar);
1795 :
1796 : for (int k = 0; k < numcomp; k++) {
1797 : data[k] = this->dptr[loc+(n+k)*sz];
1798 : }
1799 : }
1800 :
1801 : template <class T>
1802 : void
1803 : BaseFab<T>::getVal (T* data,
1804 : const IntVect& pos) const noexcept
1805 : {
1806 : getVal(data,pos,0,this->nvar);
1807 : }
1808 :
1809 : template <class T>
1810 : BaseFab<T>&
1811 : BaseFab<T>::shift (const IntVect& v) noexcept
1812 : {
1813 : this->domain += v;
1814 : return *this;
1815 : }
1816 :
1817 : template <class T>
1818 : BaseFab<T>&
1819 : BaseFab<T>::shift (int idir, int n_cell) noexcept
1820 : {
1821 : this->domain.shift(idir,n_cell);
1822 : return *this;
1823 : }
1824 :
1825 : template <class T>
1826 : BaseFab<T> &
1827 : BaseFab<T>::shiftHalf (const IntVect& v) noexcept
1828 : {
1829 : this->domain.shiftHalf(v);
1830 : return *this;
1831 : }
1832 :
1833 : template <class T>
1834 : BaseFab<T> &
1835 : BaseFab<T>::shiftHalf (int idir, int n_cell) noexcept
1836 : {
1837 : this->domain.shiftHalf(idir,n_cell);
1838 : return *this;
1839 : }
1840 :
1841 : template <class T>
1842 : template <RunOn run_on, class U,
1843 : std::enable_if_t<std::is_same_v<U,float> || std::is_same_v<U,double>, int> FOO>
1844 : void
1845 : BaseFab<T>::fill_snan () noexcept
1846 : {
1847 : amrex::fill_snan<run_on>(this->dptr, this->truesize);
1848 : }
1849 :
1850 : template <class T>
1851 : template <RunOn run_on>
1852 : void
1853 : BaseFab<T>::setVal (T const& x, const Box& bx, int n) noexcept
1854 : {
1855 : this->setVal<run_on>(x, bx, DestComp{n}, NumComps{1});
1856 : }
1857 :
1858 : template <class T>
1859 : template <RunOn run_on>
1860 : void
1861 : BaseFab<T>::setVal (T const& x, int n) noexcept
1862 : {
1863 : this->setVal<run_on>(x, this->domain, DestComp{n}, NumComps{1});
1864 : }
1865 :
1866 : template <class T>
1867 : template <RunOn run_on>
1868 : void
1869 0 : BaseFab<T>::setVal (T const& x, const Box& bx, int dcomp, int ncomp) noexcept
1870 : {
1871 0 : this->setVal<run_on>(x, bx, DestComp{dcomp}, NumComps{ncomp});
1872 0 : }
1873 :
1874 : template <class T>
1875 : template <RunOn run_on>
1876 : void
1877 : BaseFab<T>::setValIfNot (T const& val, const Box& bx, const BaseFab<int>& mask, int ns, int num) noexcept
1878 : {
1879 : this->setValIfNot<run_on>(val, bx, mask, DestComp{ns}, NumComps{num});
1880 : }
1881 :
1882 : template <class T>
1883 : template <RunOn run_on>
1884 : BaseFab<T>&
1885 13574 : BaseFab<T>::copy (const BaseFab<T>& src, const Box& srcbox, int srccomp,
1886 : const Box& destbox, int destcomp, int numcomp) noexcept
1887 : {
1888 : AMREX_ASSERT(destbox.ok());
1889 : AMREX_ASSERT(srcbox.sameSize(destbox));
1890 : AMREX_ASSERT(src.box().contains(srcbox));
1891 : AMREX_ASSERT(this->domain.contains(destbox));
1892 : AMREX_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
1893 : AMREX_ASSERT(destcomp >= 0 && destcomp+numcomp <= this->nvar);
1894 :
1895 13574 : Array4<T> const& d = this->array();
1896 13574 : Array4<T const> const& s = src.const_array();
1897 : const auto dlo = amrex::lbound(destbox);
1898 : const auto slo = amrex::lbound(srcbox);
1899 13574 : const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
1900 :
1901 8241854 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
1902 : {
1903 : d(i,j,k,n+destcomp) = s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
1904 : });
1905 :
1906 13574 : return *this;
1907 : }
1908 :
1909 : template <class T>
1910 : template <RunOn run_on>
1911 : BaseFab<T>&
1912 : BaseFab<T>::copy (const BaseFab<T>& src, const Box& destbox) noexcept
1913 : {
1914 : return this->copy<run_on>(src, destbox, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
1915 : }
1916 :
1917 : template <class T>
1918 : template <RunOn run_on>
1919 : BaseFab<T>&
1920 : BaseFab<T>::copy (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
1921 : {
1922 : return copy<run_on>(src, this->domain, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
1923 : }
1924 :
1925 : template <class T>
1926 : void
1927 3315 : BaseFab<T>::define ()
1928 : {
1929 : AMREX_ASSERT(this->dptr == nullptr);
1930 : AMREX_ASSERT(this->domain.numPts() > 0);
1931 : AMREX_ASSERT(this->nvar >= 0);
1932 3315 : if (this->nvar == 0) { return; }
1933 : AMREX_ASSERT(std::numeric_limits<Long>::max()/this->nvar > this->domain.numPts());
1934 :
1935 3315 : this->truesize = this->nvar*this->domain.numPts();
1936 3315 : this->ptr_owner = true;
1937 3315 : this->dptr = static_cast<T*>(this->alloc(this->truesize*sizeof(T)));
1938 : #ifdef AMREX_USE_GPU
1939 : this->alloc_stream = Gpu::gpuStream();
1940 : #endif
1941 :
1942 3315 : placementNew(this->dptr, this->truesize);
1943 :
1944 3315 : amrex::update_fab_stats(this->domain.numPts(), this->truesize, sizeof(T));
1945 :
1946 : if constexpr (std::is_same_v<T,float> || std::is_same_v<T,double>) {
1947 : if (amrex::InitSNaN() && this->truesize > 0) {
1948 : #ifdef AMREX_USE_GPU
1949 : if (Gpu::inLaunchRegion() && arena()->isDeviceAccessible()) {
1950 : this->template fill_snan<RunOn::Device>();
1951 : Gpu::streamSynchronize();
1952 : } else
1953 : #endif
1954 : {
1955 : this->template fill_snan<RunOn::Host>();
1956 : }
1957 : }
1958 : }
1959 : }
1960 :
1961 : template <class T>
1962 : BaseFab<T>::BaseFab (Arena* ar) noexcept
1963 : : DataAllocator{ar}
1964 : {}
1965 :
1966 : template <class T>
1967 : BaseFab<T>::BaseFab (const Box& bx, int n, Arena* ar)
1968 : : DataAllocator{ar}, domain(bx), nvar(n)
1969 : {
1970 : define();
1971 : }
1972 :
1973 : template <class T>
1974 3315 : BaseFab<T>::BaseFab (const Box& bx, int n, bool alloc, bool shared, Arena* ar)
1975 3315 : : DataAllocator{ar}, domain(bx), nvar(n), shared_memory(shared)
1976 : {
1977 3315 : if (!this->shared_memory && alloc) { define(); }
1978 3315 : }
1979 :
1980 : template <class T>
1981 0 : BaseFab<T>::BaseFab (const BaseFab<T>& rhs, MakeType make_type, int scomp, int ncomp)
1982 : : DataAllocator{rhs.arena()},
1983 0 : dptr(const_cast<T*>(rhs.dataPtr(scomp))),
1984 : domain(rhs.domain), nvar(ncomp),
1985 0 : truesize(ncomp*rhs.domain.numPts())
1986 : {
1987 : AMREX_ASSERT(scomp+ncomp <= rhs.nComp());
1988 0 : if (make_type == amrex::make_deep_copy)
1989 : {
1990 0 : this->dptr = nullptr;
1991 0 : define();
1992 0 : this->copy<RunOn::Device>(rhs, this->domain, scomp, this->domain, 0, ncomp);
1993 0 : } else if (make_type == amrex::make_alias) {
1994 : ; // nothing to do
1995 : } else {
1996 : amrex::Abort("BaseFab: unknown MakeType");
1997 : }
1998 0 : }
1999 :
2000 : template<class T>
2001 : BaseFab<T>::BaseFab (const Box& bx, int ncomp, T* p)
2002 : : dptr(p), domain(bx), nvar(ncomp), truesize(bx.numPts()*ncomp)
2003 : {
2004 : }
2005 :
2006 : template<class T>
2007 : BaseFab<T>::BaseFab (const Box& bx, int ncomp, T const* p)
2008 : : dptr(const_cast<T*>(p)), domain(bx), nvar(ncomp), truesize(bx.numPts()*ncomp)
2009 : {
2010 : }
2011 :
2012 : template<class T>
2013 : BaseFab<T>::BaseFab (Array4<T> const& a) noexcept
2014 : : dptr(a.p),
2015 : domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
2016 : IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1))),
2017 : nvar(a.ncomp), truesize(a.ncomp*a.nstride)
2018 : {}
2019 :
2020 : template<class T>
2021 : BaseFab<T>::BaseFab (Array4<T> const& a, IndexType t) noexcept
2022 : : dptr(a.p),
2023 : domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
2024 : IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1)), t),
2025 : nvar(a.ncomp), truesize(a.ncomp*a.nstride)
2026 : {}
2027 :
2028 : template<class T>
2029 : BaseFab<T>::BaseFab (Array4<T const> const& a) noexcept
2030 : : dptr(const_cast<T*>(a.p)),
2031 : domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
2032 : IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1))),
2033 : nvar(a.ncomp), truesize(a.ncomp*a.nstride)
2034 : {}
2035 :
2036 : template<class T>
2037 : BaseFab<T>::BaseFab (Array4<T const> const& a, IndexType t) noexcept
2038 : : dptr(const_cast<T*>(a.p)),
2039 : domain(IntVect(AMREX_D_DECL(a.begin.x,a.begin.y,a.begin.z)),
2040 : IntVect(AMREX_D_DECL(a.end.x-1,a.end.y-1,a.end.z-1)), t),
2041 : nvar(a.ncomp), truesize(a.ncomp*a.nstride)
2042 : {}
2043 :
2044 : template <class T>
2045 413071 : BaseFab<T>::~BaseFab () noexcept
2046 : {
2047 409756 : clear();
2048 413071 : }
2049 :
2050 : template <class T>
2051 : BaseFab<T>::BaseFab (BaseFab<T>&& rhs) noexcept
2052 : : DataAllocator{rhs.arena()},
2053 : dptr(rhs.dptr), domain(rhs.domain),
2054 : nvar(rhs.nvar), truesize(rhs.truesize),
2055 : ptr_owner(rhs.ptr_owner), shared_memory(rhs.shared_memory)
2056 : #ifdef AMREX_USE_GPU
2057 : , alloc_stream(rhs.alloc_stream)
2058 : #endif
2059 : {
2060 : rhs.dptr = nullptr;
2061 : rhs.ptr_owner = false;
2062 : }
2063 :
2064 : template <class T>
2065 : BaseFab<T>&
2066 : BaseFab<T>::operator= (BaseFab<T>&& rhs) noexcept
2067 : {
2068 : if (this != &rhs) {
2069 : clear();
2070 : DataAllocator::operator=(rhs);
2071 : dptr = rhs.dptr;
2072 : domain = rhs.domain;
2073 : nvar = rhs.nvar;
2074 : truesize = rhs.truesize;
2075 : ptr_owner = rhs.ptr_owner;
2076 : shared_memory = rhs.shared_memory;
2077 : #ifdef AMREX_USE_GPU
2078 : alloc_stream = rhs.alloc_stream;
2079 : #endif
2080 :
2081 : rhs.dptr = nullptr;
2082 : rhs.ptr_owner = false;
2083 : }
2084 : return *this;
2085 : }
2086 :
2087 : template <class T>
2088 : template <RunOn run_on>
2089 : BaseFab<T>&
2090 : BaseFab<T>::operator= (T const& t) noexcept
2091 : {
2092 : setVal<run_on>(t);
2093 : return *this;
2094 : }
2095 :
2096 : template <class T>
2097 : void
2098 : BaseFab<T>::resize (const Box& b, int n, Arena* ar)
2099 : {
2100 : this->nvar = n;
2101 : this->domain = b;
2102 :
2103 : if (ar == nullptr) {
2104 : ar = m_arena;
2105 : }
2106 :
2107 : if (arena() != DataAllocator(ar).arena()) {
2108 : clear();
2109 : m_arena = ar;
2110 : define();
2111 : }
2112 : else if (this->dptr == nullptr || !this->ptr_owner)
2113 : {
2114 : if (this->shared_memory) {
2115 : amrex::Abort("BaseFab::resize: BaseFab in shared memory cannot increase size");
2116 : }
2117 :
2118 : this->dptr = nullptr;
2119 : define();
2120 : }
2121 : else if (this->nvar*this->domain.numPts() > this->truesize
2122 : #ifdef AMREX_USE_GPU
2123 : || (arena()->isStreamOrderedArena() && alloc_stream != Gpu::gpuStream())
2124 : #endif
2125 : )
2126 : {
2127 : if (this->shared_memory) {
2128 : amrex::Abort("BaseFab::resize: BaseFab in shared memory cannot increase size");
2129 : }
2130 :
2131 : clear();
2132 :
2133 : define();
2134 : }
2135 : }
2136 :
2137 : template <class T>
2138 : template <class U, std::enable_if_t<std::is_trivially_destructible_v<U>,int>>
2139 : Elixir
2140 : BaseFab<T>::elixir () noexcept
2141 : {
2142 : bool o;
2143 : if (Gpu::inLaunchRegion()) {
2144 : o = this->ptr_owner;
2145 : this->ptr_owner = false;
2146 : if (o && this->dptr) {
2147 : if (this->nvar > 1) {
2148 : amrex::update_fab_stats(-this->truesize/this->nvar, -this->truesize, sizeof(T));
2149 : } else {
2150 : amrex::update_fab_stats(0, -this->truesize, sizeof(T));
2151 : }
2152 : }
2153 : } else {
2154 : o = false;
2155 : }
2156 : return Elixir((o ? this->dptr : nullptr), this->arena());
2157 : }
2158 :
2159 : template <class T>
2160 : void
2161 409827 : BaseFab<T>::clear () noexcept
2162 : {
2163 409827 : if (this->dptr)
2164 : {
2165 : //
2166 : // Call T::~T() on the to-be-destroyed memory.
2167 : //
2168 409756 : if (this->ptr_owner)
2169 : {
2170 409456 : if (this->shared_memory)
2171 : {
2172 : amrex::Abort("BaseFab::clear: BaseFab cannot be owner of shared memory");
2173 : }
2174 :
2175 409456 : placementDelete(this->dptr, this->truesize);
2176 :
2177 : #ifdef AMREX_USE_GPU
2178 : auto current_stream = Gpu::Device::gpuStream();
2179 : Gpu::Device::setStream(alloc_stream);
2180 : #endif
2181 409456 : this->free(this->dptr);
2182 : #ifdef AMREX_USE_GPU
2183 : Gpu::Device::setStream(current_stream);
2184 : #endif
2185 :
2186 409456 : if (this->nvar > 1) {
2187 402374 : amrex::update_fab_stats(-this->truesize/this->nvar, -this->truesize, sizeof(T));
2188 : } else {
2189 7082 : amrex::update_fab_stats(0, -this->truesize, sizeof(T));
2190 : }
2191 : }
2192 :
2193 409756 : this->dptr = nullptr;
2194 409756 : this->truesize = 0;
2195 : }
2196 409827 : }
2197 :
2198 : template <class T>
2199 : std::unique_ptr<T,DataDeleter>
2200 : BaseFab<T>::release () noexcept
2201 : {
2202 : std::unique_ptr<T,DataDeleter> r(nullptr, DataDeleter{this->arena()});
2203 : if (this->dptr && this->ptr_owner) {
2204 : r.reset(this->dptr);
2205 : this->ptr_owner = false;
2206 : if (this->nvar > 1) {
2207 : amrex::update_fab_stats(-this->truesize/this->nvar, -this->truesize, sizeof(T));
2208 : } else {
2209 : amrex::update_fab_stats(0, -this->truesize, sizeof(T));
2210 : }
2211 : }
2212 : return r;
2213 : }
2214 :
2215 : template <class T>
2216 : template <RunOn run_on>
2217 : std::size_t
2218 : BaseFab<T>::copyToMem (const Box& srcbox,
2219 : int srccomp,
2220 : int numcomp,
2221 : void* dst) const noexcept
2222 : {
2223 : BL_ASSERT(box().contains(srcbox));
2224 : BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= nComp());
2225 :
2226 : if (srcbox.ok())
2227 : {
2228 : Array4<T> d(static_cast<T*>(dst),amrex::begin(srcbox),amrex::end(srcbox),numcomp);
2229 : Array4<T const> const& s = this->const_array();
2230 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, srcbox, numcomp, i, j, k, n,
2231 : {
2232 : d(i,j,k,n) = s(i,j,k,n+srccomp);
2233 : });
2234 : return sizeof(T)*d.size();
2235 : }
2236 : else
2237 : {
2238 : return 0;
2239 : }
2240 : }
2241 :
2242 : template <class T>
2243 : template <RunOn run_on, typename BUF>
2244 : std::size_t
2245 0 : BaseFab<T>::copyFromMem (const Box& dstbox,
2246 : int dstcomp,
2247 : int numcomp,
2248 : const void* src) noexcept
2249 : {
2250 : BL_ASSERT(box().contains(dstbox));
2251 : BL_ASSERT(dstcomp >= 0 && dstcomp+numcomp <= nComp());
2252 :
2253 0 : if (dstbox.ok())
2254 : {
2255 0 : Array4<BUF const> s(static_cast<BUF const*>(src), amrex::begin(dstbox),
2256 0 : amrex::end(dstbox), numcomp);
2257 0 : Array4<T> const& d = this->array();
2258 0 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, dstbox, numcomp, i, j, k, n,
2259 : {
2260 : d(i,j,k,n+dstcomp) = static_cast<T>(s(i,j,k,n));
2261 : });
2262 0 : return sizeof(BUF)*s.size();
2263 : }
2264 : else
2265 : {
2266 0 : return 0;
2267 : }
2268 : }
2269 :
2270 : template <class T>
2271 : template <RunOn run_on, typename BUF>
2272 : std::size_t
2273 0 : BaseFab<T>::addFromMem (const Box& dstbox,
2274 : int dstcomp,
2275 : int numcomp,
2276 : const void* src) noexcept
2277 : {
2278 : BL_ASSERT(box().contains(dstbox));
2279 : BL_ASSERT(dstcomp >= 0 && dstcomp+numcomp <= nComp());
2280 :
2281 0 : if (dstbox.ok())
2282 : {
2283 0 : Array4<BUF const> s(static_cast<BUF const*>(src), amrex::begin(dstbox),
2284 0 : amrex::end(dstbox), numcomp);
2285 0 : Array4<T> const& d = this->array();
2286 0 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, dstbox, numcomp, i, j, k, n,
2287 : {
2288 : d(i,j,k,n+dstcomp) += static_cast<T>(s(i,j,k,n));
2289 : });
2290 0 : return sizeof(BUF)*s.size();
2291 : }
2292 : else
2293 : {
2294 0 : return 0;
2295 : }
2296 : }
2297 :
2298 : template <class T>
2299 : template <RunOn run_on>
2300 : void
2301 216 : BaseFab<T>::setComplement (T const& x, const Box& b, int ns, int num) noexcept
2302 : {
2303 216 : this->setComplement<run_on>(x, b, DestComp{ns}, NumComps{num});
2304 216 : }
2305 :
2306 : template <class T>
2307 : template <RunOn run_on>
2308 : void
2309 : BaseFab<T>::abs () noexcept
2310 : {
2311 : this->abs<run_on>(this->domain,0,this->nvar);
2312 : }
2313 :
2314 : template <class T>
2315 : template <RunOn run_on>
2316 : void
2317 : BaseFab<T>::abs (int comp, int numcomp) noexcept
2318 : {
2319 : this->abs<run_on>(this->domain,comp,numcomp);
2320 : }
2321 :
2322 : template <class T>
2323 : template <RunOn run_on>
2324 : void
2325 : BaseFab<T>::abs (const Box& subbox, int comp, int numcomp) noexcept
2326 : {
2327 : Array4<T> const& a = this->array();
2328 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG (run_on, subbox, numcomp, i, j, k, n,
2329 : {
2330 : a(i,j,k,n+comp) = std::abs(a(i,j,k,n+comp));
2331 : });
2332 : }
2333 :
2334 : template <class T>
2335 : template <RunOn run_on>
2336 : Real
2337 : BaseFab<T>::norminfmask (const Box& subbox, const BaseFab<int>& mask,
2338 : int scomp, int ncomp) const noexcept
2339 : {
2340 : BL_ASSERT(this->domain.contains(subbox));
2341 : BL_ASSERT(scomp >= 0 && scomp + ncomp <= this->nvar);
2342 :
2343 : Array4<T const> const& a = this->const_array();
2344 : Array4<int const> const& m = mask.const_array();
2345 : Real r = 0.0;
2346 : #ifdef AMREX_USE_GPU
2347 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2348 : ReduceOps<ReduceOpMax> reduce_op;
2349 : ReduceData<Real> reduce_data(reduce_op);
2350 : using ReduceTuple = ReduceData<Real>::Type;
2351 : reduce_op.eval(subbox, reduce_data,
2352 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2353 : {
2354 : Real t = 0.0;
2355 : if (m(i,j,k)) {
2356 : for (int n = 0; n < ncomp; ++n) {
2357 : t = amrex::max(t,static_cast<Real>(std::abs(a(i,j,k,n+scomp))));
2358 : }
2359 : }
2360 : return {t};
2361 : });
2362 : ReduceTuple hv = reduce_data.value(reduce_op);
2363 : r = amrex::get<0>(hv);
2364 : } else
2365 : #endif
2366 : {
2367 : amrex::LoopOnCpu(subbox, ncomp, [=,&r] (int i, int j, int k, int n) noexcept
2368 : {
2369 : if (m(i,j,k)) {
2370 : Real t = static_cast<Real>(std::abs(a(i,j,k,n+scomp)));
2371 : r = amrex::max(r,t);
2372 : }
2373 : });
2374 : }
2375 : return r;
2376 : }
2377 :
2378 : template <class T>
2379 : template <RunOn run_on>
2380 : Real
2381 : BaseFab<T>::norm (int p, int comp, int numcomp) const noexcept
2382 : {
2383 : return norm<run_on>(this->domain,p,comp,numcomp);
2384 : }
2385 :
2386 : template <class T>
2387 : template <RunOn run_on>
2388 : Real
2389 : BaseFab<T>::norm (const Box& subbox, int p, int comp, int numcomp) const noexcept
2390 : {
2391 : BL_ASSERT(this->domain.contains(subbox));
2392 : BL_ASSERT(comp >= 0 && comp + numcomp <= this->nvar);
2393 :
2394 : Array4<T const> const& a = this->const_array();
2395 : Real nrm = 0.;
2396 : #ifdef AMREX_USE_GPU
2397 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2398 : if (p == 0) {
2399 : ReduceOps<ReduceOpMax> reduce_op;
2400 : ReduceData<Real> reduce_data(reduce_op);
2401 : using ReduceTuple = ReduceData<Real>::Type;
2402 : reduce_op.eval(subbox, reduce_data,
2403 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2404 : {
2405 : Real t = 0.0;
2406 : for (int n = 0; n < numcomp; ++n) {
2407 : t = amrex::max(t, static_cast<Real>(std::abs(a(i,j,k,n+comp))));
2408 : }
2409 : return {t};
2410 : });
2411 : ReduceTuple hv = reduce_data.value(reduce_op);
2412 : nrm = amrex::get<0>(hv);
2413 : } else if (p == 1) {
2414 : ReduceOps<ReduceOpSum> reduce_op;
2415 : ReduceData<Real> reduce_data(reduce_op);
2416 : using ReduceTuple = ReduceData<Real>::Type;
2417 : reduce_op.eval(subbox, reduce_data,
2418 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2419 : {
2420 : Real t = 0.0;
2421 : for (int n = 0; n < numcomp; ++n) {
2422 : t += static_cast<Real>(std::abs(a(i,j,k,n+comp)));
2423 : }
2424 : return {t};
2425 : });
2426 : ReduceTuple hv = reduce_data.value(reduce_op);
2427 : nrm = amrex::get<0>(hv);
2428 : } else if (p == 2) {
2429 : ReduceOps<ReduceOpSum> reduce_op;
2430 : ReduceData<Real> reduce_data(reduce_op);
2431 : using ReduceTuple = ReduceData<Real>::Type;
2432 : reduce_op.eval(subbox, reduce_data,
2433 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2434 : {
2435 : Real t = 0.0;
2436 : for (int n = 0; n < numcomp; ++n) {
2437 : t += static_cast<Real>(a(i,j,k,n+comp)*a(i,j,k,n+comp));
2438 : }
2439 : return {t};
2440 : });
2441 : ReduceTuple hv = reduce_data.value(reduce_op);
2442 : nrm = amrex::get<0>(hv);
2443 : } else {
2444 : amrex::Error("BaseFab<T>::norm: wrong p");
2445 : }
2446 : } else
2447 : #endif
2448 : {
2449 : if (p == 0) {
2450 : amrex::LoopOnCpu(subbox, numcomp, [=,&nrm] (int i, int j, int k, int n) noexcept
2451 : {
2452 : Real t = static_cast<Real>(std::abs(a(i,j,k,n+comp)));
2453 : nrm = amrex::max(nrm,t);
2454 : });
2455 : } else if (p == 1) {
2456 : amrex::LoopOnCpu(subbox, numcomp, [=,&nrm] (int i, int j, int k, int n) noexcept
2457 : {
2458 : nrm += std::abs(a(i,j,k,n+comp));
2459 : });
2460 : } else if (p == 2) {
2461 : amrex::LoopOnCpu(subbox, numcomp, [=,&nrm] (int i, int j, int k, int n) noexcept
2462 : {
2463 : nrm += a(i,j,k,n+comp)*a(i,j,k,n+comp);
2464 : });
2465 : } else {
2466 : amrex::Error("BaseFab<T>::norm: wrong p");
2467 : }
2468 : }
2469 :
2470 : return nrm;
2471 : }
2472 :
2473 : template <class T>
2474 : template <RunOn run_on>
2475 : T
2476 : BaseFab<T>::min (int comp) const noexcept
2477 : {
2478 : return this->min<run_on>(this->domain,comp);
2479 : }
2480 :
2481 : template <class T>
2482 : template <RunOn run_on>
2483 : T
2484 : BaseFab<T>::min (const Box& subbox, int comp) const noexcept
2485 : {
2486 : Array4<T const> const& a = this->const_array(comp);
2487 : #ifdef AMREX_USE_GPU
2488 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2489 : ReduceOps<ReduceOpMin> reduce_op;
2490 : ReduceData<T> reduce_data(reduce_op);
2491 : using ReduceTuple = typename decltype(reduce_data)::Type;
2492 : reduce_op.eval(subbox, reduce_data,
2493 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2494 : {
2495 : return { a(i,j,k) };
2496 : });
2497 : ReduceTuple hv = reduce_data.value(reduce_op);
2498 : return amrex::get<0>(hv);
2499 : } else
2500 : #endif
2501 : {
2502 : T r = std::numeric_limits<T>::max();
2503 : amrex::LoopOnCpu(subbox, [=,&r] (int i, int j, int k) noexcept
2504 : {
2505 : r = amrex::min(r, a(i,j,k));
2506 : });
2507 : return r;
2508 : }
2509 : }
2510 :
2511 : template <class T>
2512 : template <RunOn run_on>
2513 : T
2514 : BaseFab<T>::max (int comp) const noexcept
2515 : {
2516 : return this->max<run_on>(this->domain,comp);
2517 : }
2518 :
2519 : template <class T>
2520 : template <RunOn run_on>
2521 : T
2522 : BaseFab<T>::max (const Box& subbox, int comp) const noexcept
2523 : {
2524 : Array4<T const> const& a = this->const_array(comp);
2525 : #ifdef AMREX_USE_GPU
2526 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2527 : ReduceOps<ReduceOpMax> reduce_op;
2528 : ReduceData<T> reduce_data(reduce_op);
2529 : using ReduceTuple = typename decltype(reduce_data)::Type;
2530 : reduce_op.eval(subbox, reduce_data,
2531 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2532 : {
2533 : return { a(i,j,k) };
2534 : });
2535 : ReduceTuple hv = reduce_data.value(reduce_op);
2536 : return amrex::get<0>(hv);
2537 : } else
2538 : #endif
2539 : {
2540 : T r = std::numeric_limits<T>::lowest();
2541 : amrex::LoopOnCpu(subbox, [=,&r] (int i, int j, int k) noexcept
2542 : {
2543 : r = amrex::max(r, a(i,j,k));
2544 : });
2545 : return r;
2546 : }
2547 : }
2548 :
2549 : template <class T>
2550 : template <RunOn run_on>
2551 : std::pair<T,T>
2552 : BaseFab<T>::minmax (int comp) const noexcept
2553 : {
2554 : return this->minmax<run_on>(this->domain,comp);
2555 : }
2556 :
2557 : template <class T>
2558 : template <RunOn run_on>
2559 : std::pair<T,T>
2560 : BaseFab<T>::minmax (const Box& subbox, int comp) const noexcept
2561 : {
2562 : Array4<T const> const& a = this->const_array(comp);
2563 : #ifdef AMREX_USE_GPU
2564 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2565 : ReduceOps<ReduceOpMin,ReduceOpMax> reduce_op;
2566 : ReduceData<T,T> reduce_data(reduce_op);
2567 : using ReduceTuple = typename decltype(reduce_data)::Type;
2568 : reduce_op.eval(subbox, reduce_data,
2569 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2570 : {
2571 : auto const x = a(i,j,k);
2572 : return { x, x };
2573 : });
2574 : ReduceTuple hv = reduce_data.value(reduce_op);
2575 : return std::make_pair(amrex::get<0>(hv), amrex::get<1>(hv));
2576 : } else
2577 : #endif
2578 : {
2579 : T rmax = std::numeric_limits<T>::lowest();
2580 : T rmin = std::numeric_limits<T>::max();
2581 : amrex::LoopOnCpu(subbox, [=,&rmin,&rmax] (int i, int j, int k) noexcept
2582 : {
2583 : auto const x = a(i,j,k);
2584 : rmin = amrex::min(rmin, x);
2585 : rmax = amrex::max(rmax, x);
2586 : });
2587 : return std::make_pair(rmin,rmax);
2588 : }
2589 : }
2590 :
2591 : template <class T>
2592 : template <RunOn run_on>
2593 : T
2594 : BaseFab<T>::maxabs (int comp) const noexcept
2595 : {
2596 : return this->maxabs<run_on>(this->domain,comp);
2597 : }
2598 :
2599 : template <class T>
2600 : template <RunOn run_on>
2601 : T
2602 : BaseFab<T>::maxabs (const Box& subbox, int comp) const noexcept
2603 : {
2604 : Array4<T const> const& a = this->const_array(comp);
2605 : #ifdef AMREX_USE_GPU
2606 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2607 : ReduceOps<ReduceOpMax> reduce_op;
2608 : ReduceData<T> reduce_data(reduce_op);
2609 : using ReduceTuple = typename decltype(reduce_data)::Type;
2610 : reduce_op.eval(subbox, reduce_data,
2611 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2612 : {
2613 : return { std::abs(a(i,j,k)) };
2614 : });
2615 : ReduceTuple hv = reduce_data.value(reduce_op);
2616 : return amrex::get<0>(hv);
2617 : } else
2618 : #endif
2619 : {
2620 : T r = 0;
2621 : amrex::LoopOnCpu(subbox, [=,&r] (int i, int j, int k) noexcept
2622 : {
2623 : r = amrex::max(r, std::abs(a(i,j,k)));
2624 : });
2625 : return r;
2626 : }
2627 : }
2628 :
2629 :
2630 : template <class T>
2631 : template <RunOn run_on>
2632 : IntVect
2633 : BaseFab<T>::indexFromValue (Box const& subbox, int comp, T const& value) const noexcept
2634 : {
2635 : Array4<T const> const& a = this->const_array(comp);
2636 : #ifdef AMREX_USE_GPU
2637 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2638 : Array<int,1+AMREX_SPACEDIM> ha{0,AMREX_D_DECL(std::numeric_limits<int>::lowest(),
2639 : std::numeric_limits<int>::lowest(),
2640 : std::numeric_limits<int>::lowest())};
2641 : Gpu::DeviceVector<int> dv(1+AMREX_SPACEDIM);
2642 : int* p = dv.data();
2643 : Gpu::htod_memcpy_async(p, ha.data(), sizeof(int)*ha.size());
2644 : amrex::ParallelFor(subbox, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
2645 : {
2646 : int* flag = p;
2647 : if ((*flag == 0) && (a(i,j,k) == value)) {
2648 : if (Gpu::Atomic::Exch(flag,1) == 0) {
2649 : AMREX_D_TERM(p[1] = i;,
2650 : p[2] = j;,
2651 : p[3] = k;);
2652 : }
2653 : }
2654 : });
2655 : Gpu::dtoh_memcpy_async(ha.data(), p, sizeof(int)*ha.size());
2656 : Gpu::streamSynchronize();
2657 : return IntVect(AMREX_D_DECL(ha[1],ha[2],ha[2]));
2658 : } else
2659 : #endif
2660 : {
2661 : AMREX_LOOP_3D(subbox, i, j, k,
2662 : {
2663 : if (a(i,j,k) == value) { return IntVect(AMREX_D_DECL(i,j,k)); }
2664 : });
2665 : return IntVect::TheMinVector();
2666 : }
2667 : }
2668 :
2669 : template <class T>
2670 : template <RunOn run_on>
2671 : IntVect
2672 : BaseFab<T>::minIndex (int comp) const noexcept
2673 : {
2674 : return this->minIndex<run_on>(this->domain,comp);
2675 : }
2676 :
2677 : template <class T>
2678 : template <RunOn run_on>
2679 : IntVect
2680 : BaseFab<T>::minIndex (const Box& subbox, int comp) const noexcept
2681 : {
2682 : T min_val = this->min<run_on>(subbox, comp);
2683 : return this->indexFromValue<run_on>(subbox, comp, min_val);
2684 : }
2685 :
2686 : template <class T>
2687 : template <RunOn run_on>
2688 : void
2689 : BaseFab<T>::minIndex (const Box& subbox, Real& min_val, IntVect& min_idx, int comp) const noexcept
2690 : {
2691 : min_val = this->min<run_on>(subbox, comp);
2692 : min_idx = this->indexFromValue<run_on>(subbox, comp, min_val);
2693 : }
2694 :
2695 : template <class T>
2696 : template <RunOn run_on>
2697 : IntVect
2698 : BaseFab<T>::maxIndex (int comp) const noexcept
2699 : {
2700 : return this->maxIndex<run_on>(this->domain,comp);
2701 : }
2702 :
2703 : template <class T>
2704 : template <RunOn run_on>
2705 : IntVect
2706 : BaseFab<T>::maxIndex (const Box& subbox, int comp) const noexcept
2707 : {
2708 : T max_val = this->max<run_on>(subbox, comp);
2709 : return this->indexFromValue<run_on>(subbox, comp, max_val);
2710 : }
2711 :
2712 : template <class T>
2713 : template <RunOn run_on>
2714 : void
2715 : BaseFab<T>::maxIndex (const Box& subbox, Real& max_val, IntVect& max_idx, int comp) const noexcept
2716 : {
2717 : max_val = this->max<run_on>(subbox, comp);
2718 : max_idx = this->indexFromValue<run_on>(subbox, comp, max_val);
2719 : }
2720 :
2721 : template <class T>
2722 : template <RunOn run_on>
2723 : int
2724 : BaseFab<T>::maskLT (BaseFab<int>& mask, T const& val, int comp) const noexcept
2725 : {
2726 : mask.resize(this->domain,1);
2727 : int cnt = 0;
2728 : Array4<int> const& m = mask.array();
2729 : Array4<T const> const& a = this->const_array(comp);
2730 : #ifdef AMREX_USE_GPU
2731 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2732 : ReduceOps<ReduceOpSum> reduce_op;
2733 : ReduceData<int> reduce_data(reduce_op);
2734 : using ReduceTuple = typename decltype(reduce_data)::Type;
2735 : reduce_op.eval(this->domain, reduce_data,
2736 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2737 : {
2738 : int t;
2739 : if (a(i,j,k) < val) {
2740 : m(i,j,k) = 1;
2741 : t = 1;
2742 : } else {
2743 : m(i,j,k) = 0;
2744 : t = 0;
2745 : }
2746 : return {t};
2747 : });
2748 : ReduceTuple hv = reduce_data.value(reduce_op);
2749 : cnt = amrex::get<0>(hv);
2750 : } else
2751 : #endif
2752 : {
2753 : AMREX_LOOP_3D(this->domain, i, j, k,
2754 : {
2755 : if (a(i,j,k) < val) {
2756 : m(i,j,k) = 1;
2757 : ++cnt;
2758 : } else {
2759 : m(i,j,k) = 0;
2760 : }
2761 : });
2762 : }
2763 :
2764 : return cnt;
2765 : }
2766 :
2767 : template <class T>
2768 : template <RunOn run_on>
2769 : int
2770 : BaseFab<T>::maskLE (BaseFab<int>& mask, T const& val, int comp) const noexcept
2771 : {
2772 : mask.resize(this->domain,1);
2773 : int cnt = 0;
2774 : Array4<int> const& m = mask.array();
2775 : Array4<T const> const& a = this->const_array(comp);
2776 : #ifdef AMREX_USE_GPU
2777 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2778 : ReduceOps<ReduceOpSum> reduce_op;
2779 : ReduceData<int> reduce_data(reduce_op);
2780 : using ReduceTuple = typename decltype(reduce_data)::Type;
2781 : reduce_op.eval(this->domain, reduce_data,
2782 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2783 : {
2784 : int t;
2785 : if (a(i,j,k) <= val) {
2786 : m(i,j,k) = 1;
2787 : t = 1;
2788 : } else {
2789 : m(i,j,k) = 0;
2790 : t = 0;
2791 : }
2792 : return {t};
2793 : });
2794 : ReduceTuple hv = reduce_data.value(reduce_op);
2795 : cnt = amrex::get<0>(hv);
2796 : } else
2797 : #endif
2798 : {
2799 : AMREX_LOOP_3D(this->domain, i, j, k,
2800 : {
2801 : if (a(i,j,k) <= val) {
2802 : m(i,j,k) = 1;
2803 : ++cnt;
2804 : } else {
2805 : m(i,j,k) = 0;
2806 : }
2807 : });
2808 : }
2809 :
2810 : return cnt;
2811 : }
2812 :
2813 : template <class T>
2814 : template <RunOn run_on>
2815 : int
2816 : BaseFab<T>::maskEQ (BaseFab<int>& mask, T const& val, int comp) const noexcept
2817 : {
2818 : mask.resize(this->domain,1);
2819 : int cnt = 0;
2820 : Array4<int> const& m = mask.array();
2821 : Array4<T const> const& a = this->const_array(comp);
2822 : #ifdef AMREX_USE_GPU
2823 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2824 : ReduceOps<ReduceOpSum> reduce_op;
2825 : ReduceData<int> reduce_data(reduce_op);
2826 : using ReduceTuple = typename decltype(reduce_data)::Type;
2827 : reduce_op.eval(this->domain, reduce_data,
2828 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2829 : {
2830 : int t;
2831 : if (a(i,j,k) == val) {
2832 : m(i,j,k) = 1;
2833 : t = 1;
2834 : } else {
2835 : m(i,j,k) = 0;
2836 : t = 0;
2837 : }
2838 : return {t};
2839 : });
2840 : ReduceTuple hv = reduce_data.value(reduce_op);
2841 : cnt = amrex::get<0>(hv);
2842 : } else
2843 : #endif
2844 : {
2845 : AMREX_LOOP_3D(this->domain, i, j, k,
2846 : {
2847 : if (a(i,j,k) == val) {
2848 : m(i,j,k) = 1;
2849 : ++cnt;
2850 : } else {
2851 : m(i,j,k) = 0;
2852 : }
2853 : });
2854 : }
2855 :
2856 : return cnt;
2857 : }
2858 :
2859 : template <class T>
2860 : template <RunOn run_on>
2861 : int
2862 : BaseFab<T>::maskGT (BaseFab<int>& mask, T const& val, int comp) const noexcept
2863 : {
2864 : mask.resize(this->domain,1);
2865 : int cnt = 0;
2866 : Array4<int> const& m = mask.array();
2867 : Array4<T const> const& a = this->const_array(comp);
2868 : #ifdef AMREX_USE_GPU
2869 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2870 : ReduceOps<ReduceOpSum> reduce_op;
2871 : ReduceData<int> reduce_data(reduce_op);
2872 : using ReduceTuple = typename decltype(reduce_data)::Type;
2873 : reduce_op.eval(this->domain, reduce_data,
2874 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2875 : {
2876 : int t;
2877 : if (a(i,j,k) > val) {
2878 : m(i,j,k) = 1;
2879 : t = 1;
2880 : } else {
2881 : m(i,j,k) = 0;
2882 : t = 0;
2883 : }
2884 : return {t};
2885 : });
2886 : ReduceTuple hv = reduce_data.value(reduce_op);
2887 : cnt = amrex::get<0>(hv);
2888 : } else
2889 : #endif
2890 : {
2891 : AMREX_LOOP_3D(this->domain, i, j, k,
2892 : {
2893 : if (a(i,j,k) > val) {
2894 : m(i,j,k) = 1;
2895 : ++cnt;
2896 : } else {
2897 : m(i,j,k) = 0;
2898 : }
2899 : });
2900 : }
2901 :
2902 : return cnt;
2903 : }
2904 :
2905 : template <class T>
2906 : template <RunOn run_on>
2907 : int
2908 : BaseFab<T>::maskGE (BaseFab<int>& mask, T const& val, int comp) const noexcept
2909 : {
2910 : mask.resize(this->domain,1);
2911 : int cnt = 0;
2912 : Array4<int> const& m = mask.array();
2913 : Array4<T const> const& a = this->const_array(comp);
2914 : #ifdef AMREX_USE_GPU
2915 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
2916 : ReduceOps<ReduceOpSum> reduce_op;
2917 : ReduceData<int> reduce_data(reduce_op);
2918 : using ReduceTuple = typename decltype(reduce_data)::Type;
2919 : reduce_op.eval(this->domain, reduce_data,
2920 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
2921 : {
2922 : int t;
2923 : if (a(i,j,k) >= val) {
2924 : m(i,j,k) = 1;
2925 : t = 1;
2926 : } else {
2927 : m(i,j,k) = 0;
2928 : t = 0;
2929 : }
2930 : return {t};
2931 : });
2932 : ReduceTuple hv = reduce_data.value(reduce_op);
2933 : cnt = amrex::get<0>(hv);
2934 : } else
2935 : #endif
2936 : {
2937 : AMREX_LOOP_3D(this->domain, i, j, k,
2938 : {
2939 : if (a(i,j,k) >= val) {
2940 : m(i,j,k) = 1;
2941 : ++cnt;
2942 : } else {
2943 : m(i,j,k) = 0;
2944 : }
2945 : });
2946 : }
2947 :
2948 : return cnt;
2949 : }
2950 :
2951 : template <class T>
2952 : template <RunOn run_on>
2953 : BaseFab<T>&
2954 : BaseFab<T>::atomicAdd (const BaseFab<T>& x) noexcept
2955 : {
2956 : Box ovlp(this->domain);
2957 : ovlp &= x.domain;
2958 : return ovlp.ok() ? this->atomicAdd<run_on>(x,ovlp,ovlp,0,0,this->nvar) : *this;
2959 : }
2960 :
2961 : template <class T>
2962 : template <RunOn run_on>
2963 : BaseFab<T>&
2964 : BaseFab<T>::saxpy (T a, const BaseFab<T>& x, const Box& srcbox, const Box& destbox,
2965 : int srccomp, int destcomp, int numcomp) noexcept
2966 : {
2967 : BL_ASSERT(srcbox.ok());
2968 : BL_ASSERT(x.box().contains(srcbox));
2969 : BL_ASSERT(destbox.ok());
2970 : BL_ASSERT(box().contains(destbox));
2971 : BL_ASSERT(destbox.sameSize(srcbox));
2972 : BL_ASSERT( srccomp >= 0 && srccomp+numcomp <= x.nComp());
2973 : BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
2974 :
2975 : Array4<T> const& d = this->array();
2976 : Array4<T const> const& s = x.const_array();
2977 : const auto dlo = amrex::lbound(destbox);
2978 : const auto slo = amrex::lbound(srcbox);
2979 : const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
2980 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
2981 : {
2982 : d(i,j,k,n+destcomp) += a * s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
2983 : });
2984 :
2985 : return *this;
2986 : }
2987 :
2988 : template <class T>
2989 : template <RunOn run_on>
2990 : BaseFab<T>&
2991 : BaseFab<T>::saxpy (T a, const BaseFab<T>& x) noexcept
2992 : {
2993 : Box ovlp(this->domain);
2994 : ovlp &= x.domain;
2995 : return ovlp.ok() ? saxpy<run_on>(a,x,ovlp,ovlp,0,0,this->nvar) : *this;
2996 : }
2997 :
2998 : template <class T>
2999 : template <RunOn run_on>
3000 : BaseFab<T>&
3001 : BaseFab<T>::xpay (T a, const BaseFab<T>& x,
3002 : const Box& srcbox, const Box& destbox,
3003 : int srccomp, int destcomp, int numcomp) noexcept
3004 : {
3005 : BL_ASSERT(srcbox.ok());
3006 : BL_ASSERT(x.box().contains(srcbox));
3007 : BL_ASSERT(destbox.ok());
3008 : BL_ASSERT(box().contains(destbox));
3009 : BL_ASSERT(destbox.sameSize(srcbox));
3010 : BL_ASSERT( srccomp >= 0 && srccomp+numcomp <= x.nComp());
3011 : BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
3012 :
3013 : Array4<T> const& d = this->array();
3014 : Array4<T const> const& s = x.const_array();
3015 : const auto dlo = amrex::lbound(destbox);
3016 : const auto slo = amrex::lbound(srcbox);
3017 : const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
3018 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
3019 : {
3020 : d(i,j,k,n+destcomp) = s(i+offset.x,j+offset.y,k+offset.z,n+srccomp) + a*d(i,j,k,n+destcomp);
3021 : });
3022 :
3023 : return *this;
3024 : }
3025 :
3026 : template <class T>
3027 : template <RunOn run_on>
3028 : BaseFab<T>&
3029 : BaseFab<T>::addproduct (const Box& destbox, int destcomp, int numcomp,
3030 : const BaseFab<T>& src1, int comp1,
3031 : const BaseFab<T>& src2, int comp2) noexcept
3032 : {
3033 : BL_ASSERT(destbox.ok());
3034 : BL_ASSERT(box().contains(destbox));
3035 : BL_ASSERT( comp1 >= 0 && comp1+numcomp <= src1.nComp());
3036 : BL_ASSERT( comp2 >= 0 && comp2+numcomp <= src2.nComp());
3037 : BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
3038 :
3039 : Array4<T> const& d = this->array();
3040 : Array4<T const> const& s1 = src1.const_array();
3041 : Array4<T const> const& s2 = src2.const_array();
3042 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
3043 : {
3044 : d(i,j,k,n+destcomp) += s1(i,j,k,n+comp1) * s2(i,j,k,n+comp2);
3045 : });
3046 :
3047 : return *this;
3048 : }
3049 :
3050 : template <class T>
3051 : template <RunOn run_on>
3052 : BaseFab<T>&
3053 : BaseFab<T>::linComb (const BaseFab<T>& f1, const Box& b1, int comp1,
3054 : const BaseFab<T>& f2, const Box& b2, int comp2,
3055 : Real alpha, Real beta, const Box& b,
3056 : int comp, int numcomp) noexcept
3057 : {
3058 : BL_ASSERT(b1.ok());
3059 : BL_ASSERT(f1.box().contains(b1));
3060 : BL_ASSERT(b2.ok());
3061 : BL_ASSERT(f2.box().contains(b2));
3062 : BL_ASSERT(b.ok());
3063 : BL_ASSERT(box().contains(b));
3064 : BL_ASSERT(b.sameSize(b1));
3065 : BL_ASSERT(b.sameSize(b2));
3066 : BL_ASSERT(comp1 >= 0 && comp1+numcomp <= f1.nComp());
3067 : BL_ASSERT(comp2 >= 0 && comp2+numcomp <= f2.nComp());
3068 : BL_ASSERT(comp >= 0 && comp +numcomp <= nComp());
3069 :
3070 : Array4<T> const& d = this->array();
3071 : Array4<T const> const& s1 = f1.const_array();
3072 : Array4<T const> const& s2 = f2.const_array();
3073 : const auto dlo = amrex::lbound(b);
3074 : const auto slo1 = amrex::lbound(b1);
3075 : const auto slo2 = amrex::lbound(b2);
3076 : const Dim3 off1{slo1.x-dlo.x,slo1.y-dlo.y,slo1.z-dlo.z};
3077 : const Dim3 off2{slo2.x-dlo.x,slo2.y-dlo.y,slo2.z-dlo.z};
3078 :
3079 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, b, numcomp, i, j, k, n,
3080 : {
3081 : d(i,j,k,n+comp) = alpha*s1(i+off1.x,j+off1.y,k+off1.z,n+comp1)
3082 : + beta*s2(i+off2.x,j+off2.y,k+off2.z,n+comp2);
3083 : });
3084 : return *this;
3085 : }
3086 :
3087 : template <class T>
3088 : template <RunOn run_on>
3089 : T
3090 : BaseFab<T>::dot (const Box& xbx, int xcomp,
3091 : const BaseFab<T>& y, const Box& ybx, int ycomp,
3092 : int numcomp) const noexcept
3093 : {
3094 : BL_ASSERT(xbx.ok());
3095 : BL_ASSERT(box().contains(xbx));
3096 : BL_ASSERT(y.box().contains(ybx));
3097 : BL_ASSERT(xbx.sameSize(ybx));
3098 : BL_ASSERT(xcomp >= 0 && xcomp+numcomp <= nComp());
3099 : BL_ASSERT(ycomp >= 0 && ycomp+numcomp <= y.nComp());
3100 :
3101 : T r = 0;
3102 :
3103 : const auto xlo = amrex::lbound(xbx);
3104 : const auto ylo = amrex::lbound(ybx);
3105 : const Dim3 offset{ylo.x-xlo.x,ylo.y-xlo.y,ylo.z-xlo.z};
3106 : Array4<T const> const& xa = this->const_array();
3107 : Array4<T const> const& ya = y.const_array();
3108 :
3109 : #ifdef AMREX_USE_GPU
3110 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
3111 : ReduceOps<ReduceOpSum> reduce_op;
3112 : ReduceData<T> reduce_data(reduce_op);
3113 : using ReduceTuple = typename decltype(reduce_data)::Type;
3114 : reduce_op.eval(xbx, reduce_data,
3115 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
3116 : {
3117 : T t = 0;
3118 : for (int n = 0; n < numcomp; ++n) {
3119 : t += xa(i,j,k,n+xcomp) * ya(i+offset.x,j+offset.y,k+offset.z,n+ycomp);
3120 : }
3121 : return {t};
3122 : });
3123 : ReduceTuple hv = reduce_data.value(reduce_op);
3124 : r = amrex::get<0>(hv);
3125 : } else
3126 : #endif
3127 : {
3128 : AMREX_LOOP_4D(xbx, numcomp, i, j, k, n,
3129 : {
3130 : r += xa(i,j,k,n+xcomp) * ya(i+offset.x,j+offset.y,k+offset.z,n+ycomp);
3131 : });
3132 : }
3133 :
3134 : return r;
3135 : }
3136 :
3137 : template <class T>
3138 : template <RunOn run_on>
3139 : T
3140 : BaseFab<T>::dotmask (const BaseFab<int>& mask, const Box& xbx, int xcomp,
3141 : const BaseFab<T>& y, const Box& ybx, int ycomp,
3142 : int numcomp) const noexcept
3143 : {
3144 : BL_ASSERT(xbx.ok());
3145 : BL_ASSERT(box().contains(xbx));
3146 : BL_ASSERT(y.box().contains(ybx));
3147 : BL_ASSERT(xbx.sameSize(ybx));
3148 : BL_ASSERT(xcomp >= 0 && xcomp+numcomp <= nComp());
3149 : BL_ASSERT(ycomp >= 0 && ycomp+numcomp <= y.nComp());
3150 :
3151 : T r = 0;
3152 :
3153 : const auto xlo = amrex::lbound(xbx);
3154 : const auto ylo = amrex::lbound(ybx);
3155 : const Dim3 offset{ylo.x-xlo.x,ylo.y-xlo.y,ylo.z-xlo.z};
3156 :
3157 : Array4<T const> const& xa = this->const_array();
3158 : Array4<T const> const& ya = y.const_array();
3159 : Array4<int const> const& ma = mask.const_array();
3160 :
3161 : #ifdef AMREX_USE_GPU
3162 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
3163 : ReduceOps<ReduceOpSum> reduce_op;
3164 : ReduceData<T> reduce_data(reduce_op);
3165 : using ReduceTuple = typename decltype(reduce_data)::Type;
3166 : reduce_op.eval(xbx, reduce_data,
3167 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
3168 : {
3169 : int m = static_cast<int>(static_cast<bool>(ma(i,j,k)));
3170 : T t = 0;
3171 : for (int n = 0; n < numcomp; ++n) {
3172 : t += xa(i,j,k,n+xcomp) * ya(i+offset.x,j+offset.y,k+offset.z,n+ycomp) * m;
3173 : }
3174 : return {t};
3175 : });
3176 : ReduceTuple hv = reduce_data.value(reduce_op);
3177 : r = amrex::get<0>(hv);
3178 : } else
3179 : #endif
3180 : {
3181 : AMREX_LOOP_4D(xbx, numcomp, i, j, k, n,
3182 : {
3183 : int m = static_cast<int>(static_cast<bool>(ma(i,j,k)));
3184 : r += xa(i,j,k,n+xcomp) * ya(i+offset.x,j+offset.y,k+offset.z,n+ycomp) * m;
3185 : });
3186 : }
3187 :
3188 : return r;
3189 : }
3190 :
3191 : template <class T>
3192 : template <RunOn run_on>
3193 : T
3194 : BaseFab<T>::sum (int comp, int numcomp) const noexcept
3195 : {
3196 : return this->sum<run_on>(this->domain, DestComp{comp}, NumComps{numcomp});
3197 : }
3198 :
3199 : template <class T>
3200 : template <RunOn run_on>
3201 : T
3202 : BaseFab<T>::sum (const Box& subbox, int comp, int numcomp) const noexcept
3203 : {
3204 : return this->sum<run_on>(subbox, DestComp{comp}, NumComps{numcomp});
3205 : }
3206 :
3207 : template <class T>
3208 : template <RunOn run_on>
3209 : BaseFab<T>&
3210 : BaseFab<T>::negate (int comp, int numcomp) noexcept
3211 : {
3212 : return this->negate<run_on>(this->domain, DestComp{comp}, NumComps{numcomp});
3213 : }
3214 :
3215 : template <class T>
3216 : template <RunOn run_on>
3217 : BaseFab<T>&
3218 : BaseFab<T>::negate (const Box& b, int comp, int numcomp) noexcept
3219 : {
3220 : return this->negate<run_on>(b, DestComp{comp}, NumComps{numcomp});
3221 : }
3222 :
3223 : template <class T>
3224 : template <RunOn run_on>
3225 : BaseFab<T>&
3226 : BaseFab<T>::invert (T const& r, int comp, int numcomp) noexcept
3227 : {
3228 : return this->invert<run_on>(r, this->domain, DestComp{comp}, NumComps{numcomp});
3229 : }
3230 :
3231 : template <class T>
3232 : template <RunOn run_on>
3233 : BaseFab<T>&
3234 : BaseFab<T>::invert (T const& r, const Box& b, int comp, int numcomp) noexcept
3235 : {
3236 : return this->invert<run_on>(r, b, DestComp{comp}, NumComps{numcomp});
3237 : }
3238 :
3239 : template <class T>
3240 : template <RunOn run_on>
3241 : BaseFab<T>&
3242 : BaseFab<T>::plus (T const& r, int comp, int numcomp) noexcept
3243 : {
3244 : return this->plus<run_on>(r, this->domain, DestComp{comp}, NumComps{numcomp});
3245 : }
3246 :
3247 : template <class T>
3248 : template <RunOn run_on>
3249 : BaseFab<T>&
3250 : BaseFab<T>::plus (T const& r, const Box& b, int comp, int numcomp) noexcept
3251 : {
3252 : return this->plus<run_on>(r, b, DestComp{comp}, NumComps{numcomp});
3253 : }
3254 :
3255 : template <class T>
3256 : template <RunOn run_on>
3257 : BaseFab<T>&
3258 0 : BaseFab<T>::plus (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
3259 : {
3260 0 : return this->plus<run_on>(src, this->domain, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
3261 : }
3262 :
3263 : template <class T>
3264 : template <RunOn run_on>
3265 : BaseFab<T>&
3266 : BaseFab<T>::atomicAdd (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
3267 : {
3268 : Box ovlp(this->domain);
3269 : ovlp &= src.domain;
3270 : return ovlp.ok() ? this->atomicAdd<run_on>(src,ovlp,ovlp,srccomp,destcomp,numcomp) : *this;
3271 : }
3272 :
3273 : template <class T>
3274 : template <RunOn run_on>
3275 : BaseFab<T>&
3276 : BaseFab<T>::plus (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
3277 : int numcomp) noexcept
3278 : {
3279 : return this->plus<run_on>(src, subbox, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
3280 : }
3281 :
3282 : template <class T>
3283 : template <RunOn run_on>
3284 : BaseFab<T>&
3285 : BaseFab<T>::atomicAdd (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
3286 : int numcomp) noexcept
3287 : {
3288 : Box ovlp(this->domain);
3289 : ovlp &= src.domain;
3290 : ovlp &= subbox;
3291 : return ovlp.ok() ? this->atomicAdd<run_on>(src,ovlp,ovlp,srccomp,destcomp,numcomp) : *this;
3292 : }
3293 :
3294 : template <class T>
3295 : template <RunOn run_on>
3296 : BaseFab<T>&
3297 4770 : BaseFab<T>::plus (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
3298 : int srccomp, int destcomp, int numcomp) noexcept
3299 : {
3300 : BL_ASSERT(destbox.ok());
3301 : BL_ASSERT(src.box().contains(srcbox));
3302 : BL_ASSERT(box().contains(destbox));
3303 : BL_ASSERT(destbox.sameSize(srcbox));
3304 : BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
3305 : BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
3306 :
3307 4770 : Array4<T> const& d = this->array();
3308 4770 : Array4<T const> const& s = src.const_array();
3309 : const auto dlo = amrex::lbound(destbox);
3310 : const auto slo = amrex::lbound(srcbox);
3311 4770 : const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
3312 3582270 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
3313 : {
3314 : d(i,j,k,n+destcomp) += s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
3315 : });
3316 :
3317 4770 : return *this;
3318 : }
3319 :
3320 : template <class T>
3321 : template <RunOn run_on>
3322 : BaseFab<T>&
3323 : BaseFab<T>::atomicAdd (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
3324 : int srccomp, int destcomp, int numcomp) noexcept
3325 : {
3326 : BL_ASSERT(destbox.ok());
3327 : BL_ASSERT(src.box().contains(srcbox));
3328 : BL_ASSERT(box().contains(destbox));
3329 : BL_ASSERT(destbox.sameSize(srcbox));
3330 : BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
3331 : BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
3332 :
3333 : Array4<T> const& d = this->array();
3334 : Array4<T const> const& s = src.const_array();
3335 : const auto dlo = amrex::lbound(destbox);
3336 : const auto slo = amrex::lbound(srcbox);
3337 : const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
3338 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
3339 : {
3340 : T* p = d.ptr(i,j,k,n+destcomp);
3341 : HostDevice::Atomic::Add(p, s(i+offset.x,j+offset.y,k+offset.z,n+srccomp));
3342 : });
3343 :
3344 : return *this;
3345 : }
3346 :
3347 : template <class T>
3348 : template <RunOn run_on>
3349 : BaseFab<T>&
3350 : BaseFab<T>::lockAdd (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
3351 : int srccomp, int destcomp, int numcomp) noexcept
3352 : {
3353 : #if defined(AMREX_USE_OMP) && (AMREX_SPACEDIM > 1)
3354 : #if defined(AMREX_USE_GPU)
3355 : if (run_on == RunOn::Host || Gpu::notInLaunchRegion()) {
3356 : #endif
3357 : BL_ASSERT(destbox.ok());
3358 : BL_ASSERT(src.box().contains(srcbox));
3359 : BL_ASSERT(box().contains(destbox));
3360 : BL_ASSERT(destbox.sameSize(srcbox));
3361 : BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
3362 : BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
3363 :
3364 : Array4<T> const& d = this->array();
3365 : Array4<T const> const& s = src.const_array();
3366 : auto const& dlo = amrex::lbound(destbox);
3367 : auto const& dhi = amrex::ubound(destbox);
3368 : auto const& len = amrex::length(destbox);
3369 : auto const& slo = amrex::lbound(srcbox);
3370 : Dim3 const offset{slo.x-dlo.x, slo.y-dlo.y, slo.z-dlo.z};
3371 :
3372 : int planedim;
3373 : int nplanes;
3374 : int plo;
3375 : if (len.z == 1) {
3376 : planedim = 1;
3377 : nplanes = len.y;
3378 : plo = dlo.y;
3379 : } else {
3380 : planedim = 2;
3381 : nplanes = len.z;
3382 : plo = dlo.z;
3383 : }
3384 :
3385 : auto* mask = (bool*) amrex_mempool_alloc(sizeof(bool)*nplanes);
3386 : for (int ip = 0; ip < nplanes; ++ip) {
3387 : mask[ip] = false;
3388 : }
3389 :
3390 : int mm = 0;
3391 : int planes_left = nplanes;
3392 : while (planes_left > 0) {
3393 : AMREX_ASSERT(mm < nplanes);
3394 : auto const m = mm + plo;
3395 : auto* lock = OpenMP::get_lock(m);
3396 : if (omp_test_lock(lock))
3397 : {
3398 : auto lo = dlo;
3399 : auto hi = dhi;
3400 : if (planedim == 1) {
3401 : lo.y = m;
3402 : hi.y = m;
3403 : } else {
3404 : lo.z = m;
3405 : hi.z = m;
3406 : }
3407 :
3408 : for (int n = 0; n < numcomp; ++n) {
3409 : for (int k = lo.z; k <= hi.z; ++k) {
3410 : for (int j = lo.y; j <= hi.y; ++j) {
3411 : auto * pdst = d.ptr(dlo.x,j ,k ,n+destcomp);
3412 : auto const* psrc = s.ptr(slo.x,j+offset.y,k+offset.z,n+ srccomp);
3413 : #pragma omp simd
3414 : for (int ii = 0; ii < len.x; ++ii) {
3415 : pdst[ii] += psrc[ii];
3416 : }
3417 : }
3418 : }
3419 : }
3420 :
3421 : mask[mm] = true;
3422 : --planes_left;
3423 : omp_unset_lock(lock);
3424 : if (planes_left == 0) { break; }
3425 : }
3426 :
3427 : ++mm;
3428 : for (int ip = 0; ip < nplanes; ++ip) {
3429 : int new_mm = (mm+ip) % nplanes;
3430 : if ( ! mask[new_mm] ) {
3431 : mm = new_mm;
3432 : break;
3433 : }
3434 : }
3435 : }
3436 :
3437 : amrex_mempool_free(mask);
3438 :
3439 : return *this;
3440 :
3441 : #if defined(AMREX_USE_GPU)
3442 : } else {
3443 : return this->template atomicAdd<run_on>(src, srcbox, destbox, srccomp, destcomp, numcomp);
3444 : }
3445 : #endif
3446 : #else
3447 : return this->template atomicAdd<run_on>(src, srcbox, destbox, srccomp, destcomp, numcomp);
3448 : #endif
3449 : }
3450 :
3451 : template <class T>
3452 : template <RunOn run_on>
3453 : BaseFab<T>&
3454 : BaseFab<T>::minus (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
3455 : {
3456 : return this->minus<run_on>(src, this->domain, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
3457 : }
3458 :
3459 : template <class T>
3460 : template <RunOn run_on>
3461 : BaseFab<T>&
3462 : BaseFab<T>::minus (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp, int numcomp) noexcept
3463 : {
3464 : return this->minus<run_on>(src, subbox, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
3465 : }
3466 :
3467 : template <class T>
3468 : template <RunOn run_on>
3469 : BaseFab<T>&
3470 : BaseFab<T>::minus (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
3471 : int srccomp, int destcomp, int numcomp) noexcept
3472 : {
3473 : BL_ASSERT(destbox.ok());
3474 : BL_ASSERT(src.box().contains(srcbox));
3475 : BL_ASSERT(box().contains(destbox));
3476 : BL_ASSERT(destbox.sameSize(srcbox));
3477 : BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
3478 : BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
3479 :
3480 : Array4<T> const& d = this->array();
3481 : Array4<T const> const& s = src.const_array();
3482 : const auto dlo = amrex::lbound(destbox);
3483 : const auto slo = amrex::lbound(srcbox);
3484 : const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
3485 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
3486 : {
3487 : d(i,j,k,n+destcomp) -= s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
3488 : });
3489 :
3490 : return *this;
3491 : }
3492 :
3493 : template <class T>
3494 : template <RunOn run_on>
3495 : BaseFab<T>&
3496 : BaseFab<T>::mult (T const& r, int comp, int numcomp) noexcept
3497 : {
3498 : return this->mult<run_on>(r, this->domain, DestComp{comp}, NumComps{numcomp});
3499 : }
3500 :
3501 : template <class T>
3502 : template <RunOn run_on>
3503 : BaseFab<T>&
3504 : BaseFab<T>::mult (T const& r, const Box& b, int comp, int numcomp) noexcept
3505 : {
3506 : return this->mult<run_on>(r, b, DestComp{comp}, NumComps{numcomp});
3507 : }
3508 :
3509 : template <class T>
3510 : template <RunOn run_on>
3511 : BaseFab<T>&
3512 0 : BaseFab<T>::mult (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
3513 : {
3514 0 : return this->mult<run_on>(src, this->domain, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
3515 : }
3516 :
3517 : template <class T>
3518 : template <RunOn run_on>
3519 : BaseFab<T>&
3520 : BaseFab<T>::mult (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp, int numcomp) noexcept
3521 : {
3522 : return this->mult<run_on>(src, subbox, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
3523 : }
3524 :
3525 : template <class T>
3526 : template <RunOn run_on>
3527 : BaseFab<T>&
3528 : BaseFab<T>::mult (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
3529 : int srccomp, int destcomp, int numcomp) noexcept
3530 : {
3531 : BL_ASSERT(destbox.ok());
3532 : BL_ASSERT(src.box().contains(srcbox));
3533 : BL_ASSERT(box().contains(destbox));
3534 : BL_ASSERT(destbox.sameSize(srcbox));
3535 : BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
3536 : BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
3537 :
3538 : Array4<T> const& d = this->array();
3539 : Array4<T const> const& s = src.const_array();
3540 : const auto dlo = amrex::lbound(destbox);
3541 : const auto slo = amrex::lbound(srcbox);
3542 : const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
3543 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
3544 : {
3545 : d(i,j,k,n+destcomp) *= s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
3546 : });
3547 :
3548 : return *this;
3549 : }
3550 :
3551 : template <class T>
3552 : template <RunOn run_on>
3553 : BaseFab<T>&
3554 : BaseFab<T>::divide (T const& r, int comp, int numcomp) noexcept
3555 : {
3556 : return this->divide<run_on>(r, this->domain, DestComp{comp}, NumComps{numcomp});
3557 : }
3558 :
3559 : template <class T>
3560 : template <RunOn run_on>
3561 : BaseFab<T>&
3562 : BaseFab<T>::divide (T const& r, const Box& b, int comp, int numcomp) noexcept
3563 : {
3564 : return this->divide<run_on>(r, b, DestComp{comp}, NumComps{numcomp});
3565 : }
3566 :
3567 : template <class T>
3568 : template <RunOn run_on>
3569 : BaseFab<T>&
3570 : BaseFab<T>::divide (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
3571 : {
3572 : return this->divide<run_on>(src, this->domain, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
3573 : }
3574 :
3575 : template <class T>
3576 : template <RunOn run_on>
3577 : BaseFab<T>&
3578 : BaseFab<T>::divide (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp, int numcomp) noexcept
3579 : {
3580 : return this->divide<run_on>(src, subbox, SrcComp{srccomp}, DestComp{destcomp}, NumComps{numcomp});
3581 : }
3582 :
3583 : template <class T>
3584 : template <RunOn run_on>
3585 : BaseFab<T>&
3586 : BaseFab<T>::divide (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
3587 : int srccomp, int destcomp, int numcomp) noexcept
3588 : {
3589 : BL_ASSERT(destbox.ok());
3590 : BL_ASSERT(src.box().contains(srcbox));
3591 : BL_ASSERT(box().contains(destbox));
3592 : BL_ASSERT(destbox.sameSize(srcbox));
3593 : BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
3594 : BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
3595 :
3596 : Array4<T> const& d = this->array();
3597 : Array4<T const> const& s = src.const_array();
3598 : const auto dlo = amrex::lbound(destbox);
3599 : const auto slo = amrex::lbound(srcbox);
3600 : const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
3601 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
3602 : {
3603 : d(i,j,k,n+destcomp) /= s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
3604 : });
3605 :
3606 : return *this;
3607 : }
3608 :
3609 : template <class T>
3610 : template <RunOn run_on>
3611 : BaseFab<T>&
3612 : BaseFab<T>::protected_divide (const BaseFab<T>& src) noexcept
3613 : {
3614 : Box ovlp(this->domain);
3615 : ovlp &= src.domain;
3616 : return ovlp.ok() ? this->protected_divide<run_on>(src,ovlp,ovlp,0,0,this->nvar) : *this;
3617 : }
3618 :
3619 : template <class T>
3620 : template <RunOn run_on>
3621 : BaseFab<T>&
3622 : BaseFab<T>::protected_divide (const BaseFab<T>& src, int srccomp, int destcomp, int numcomp) noexcept
3623 : {
3624 : Box ovlp(this->domain);
3625 : ovlp &= src.domain;
3626 : return ovlp.ok() ? this->protected_divide<run_on>(src,ovlp,ovlp,srccomp,destcomp,numcomp) : *this;
3627 : }
3628 :
3629 : template <class T>
3630 : template <RunOn run_on>
3631 : BaseFab<T>&
3632 : BaseFab<T>::protected_divide (const BaseFab<T>& src, const Box& subbox, int srccomp, int destcomp,
3633 : int numcomp) noexcept
3634 : {
3635 : Box ovlp(this->domain);
3636 : ovlp &= src.domain;
3637 : ovlp &= subbox;
3638 : return ovlp.ok() ? this->protected_divide<run_on>(src,ovlp,ovlp,srccomp,destcomp,numcomp) : *this;
3639 : }
3640 :
3641 : template <class T>
3642 : template <RunOn run_on>
3643 : BaseFab<T>&
3644 : BaseFab<T>::protected_divide (const BaseFab<T>& src, const Box& srcbox, const Box& destbox,
3645 : int srccomp, int destcomp, int numcomp) noexcept
3646 : {
3647 : BL_ASSERT(destbox.ok());
3648 : BL_ASSERT(src.box().contains(srcbox));
3649 : BL_ASSERT(box().contains(destbox));
3650 : BL_ASSERT(destbox.sameSize(srcbox));
3651 : BL_ASSERT(srccomp >= 0 && srccomp+numcomp <= src.nComp());
3652 : BL_ASSERT(destcomp >= 0 && destcomp+numcomp <= nComp());
3653 :
3654 : Array4<T> const& d = this->array();
3655 : Array4<T const> const& s = src.const_array();
3656 : const auto dlo = amrex::lbound(destbox);
3657 : const auto slo = amrex::lbound(srcbox);
3658 : const Dim3 offset{slo.x-dlo.x,slo.y-dlo.y,slo.z-dlo.z};
3659 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, destbox, numcomp, i, j, k, n,
3660 : {
3661 : if (s(i+offset.x,j+offset.y,k+offset.z,n+srccomp) != 0) {
3662 : d(i,j,k,n+destcomp) /= s(i+offset.x,j+offset.y,k+offset.z,n+srccomp);
3663 : }
3664 : });
3665 :
3666 : return *this;
3667 : }
3668 :
3669 : /**
3670 : * Linear Interpolation / Extrapolation
3671 : * Result is (t2-t)/(t2-t1)*f1 + (t-t1)/(t2-t1)*f2
3672 : * Data is taken from b1 region of f1, b2 region of f2
3673 : * and stored in b region of this FAB.
3674 : * Boxes b, b1 and b2 must be the same size.
3675 : * Data is taken from component comp1 of f1, comp2 of f2,
3676 : * and stored in component comp of this FAB.
3677 : * This fab is returned as a reference for chaining.
3678 : */
3679 : template <class T>
3680 : template <RunOn run_on>
3681 : BaseFab<T>&
3682 : BaseFab<T>::linInterp (const BaseFab<T>& f1, const Box& b1, int comp1,
3683 : const BaseFab<T>& f2, const Box& b2, int comp2,
3684 : Real t1, Real t2, Real t,
3685 : const Box& b, int comp, int numcomp) noexcept
3686 : {
3687 : if (amrex::almostEqual(t1,t2) || amrex::almostEqual(t1,t)) {
3688 : return copy<run_on>(f1,b1,comp1,b,comp,numcomp);
3689 : } else if (amrex::almostEqual(t2,t)) {
3690 : return copy<run_on>(f2,b2,comp2,b,comp,numcomp);
3691 : } else {
3692 : Real alpha = (t2-t)/(t2-t1);
3693 : Real beta = (t-t1)/(t2-t1);
3694 : return linComb<run_on>(f1,b1,comp1,f2,b2,comp2,alpha,beta,b,comp,numcomp);
3695 : }
3696 : }
3697 :
3698 : template <class T>
3699 : template <RunOn run_on>
3700 : BaseFab<T>&
3701 : BaseFab<T>::linInterp (const BaseFab<T>& f1, int comp1,
3702 : const BaseFab<T>& f2, int comp2,
3703 : Real t1, Real t2, Real t,
3704 : const Box& b, int comp, int numcomp) noexcept
3705 : {
3706 : if (amrex::almostEqual(t1,t2) || amrex::almostEqual(t1,t)) {
3707 : return copy<run_on>(f1,b,comp1,b,comp,numcomp);
3708 : } else if (amrex::almostEqual(t2,t)) {
3709 : return copy<run_on>(f2,b,comp2,b,comp,numcomp);
3710 : } else {
3711 : Real alpha = (t2-t)/(t2-t1);
3712 : Real beta = (t-t1)/(t2-t1);
3713 : return linComb<run_on>(f1,b,comp1,f2,b,comp2,alpha,beta,b,comp,numcomp);
3714 : }
3715 : }
3716 :
3717 : //
3718 : // New interfaces
3719 : //
3720 :
3721 : template <class T>
3722 : template <RunOn run_on>
3723 : void
3724 4770 : BaseFab<T>::setVal (T const& val) noexcept
3725 : {
3726 4770 : this->setVal<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
3727 4770 : }
3728 :
3729 : template <class T>
3730 : template <RunOn run_on>
3731 : void
3732 5184 : BaseFab<T>::setVal (T const& x, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept
3733 : {
3734 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
3735 5184 : Array4<T> const& a = this->array();
3736 1822280 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG (run_on, bx, ncomp.n, i, j, k, n,
3737 : {
3738 : a(i,j,k,n+dcomp.i) = x;
3739 : });
3740 5184 : }
3741 :
3742 : template <class T>
3743 : template <RunOn run_on>
3744 : void
3745 : BaseFab<T>::setValIf (T const& val, const BaseFab<int>& mask) noexcept
3746 : {
3747 : this->setValIf<run_on>(val, this->domain, mask, DestComp{0}, NumComps{this->nvar});
3748 : }
3749 :
3750 : template <class T>
3751 : template <RunOn run_on>
3752 : void
3753 : BaseFab<T>::setValIf (T const& val, Box const& bx, const BaseFab<int>& mask, DestComp dcomp, NumComps ncomp) noexcept
3754 : {
3755 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
3756 : Array4<T> const& a = this->array();
3757 : Array4<int const> const& m = mask.const_array();
3758 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG (run_on, bx, ncomp.n, i, j, k, n,
3759 : {
3760 : if (m(i,j,k)) { a(i,j,k,n+dcomp.i) = val; }
3761 : });
3762 : }
3763 :
3764 : template <class T>
3765 : template <RunOn run_on>
3766 : void
3767 : BaseFab<T>::setValIfNot (T const& val, const BaseFab<int>& mask) noexcept
3768 : {
3769 : this->setValIfNot<run_on>(val, this->domain, mask, DestComp{0}, NumComps{this->nvar});
3770 : }
3771 :
3772 : template <class T>
3773 : template <RunOn run_on>
3774 : void
3775 : BaseFab<T>::setValIfNot (T const& val, Box const& bx, const BaseFab<int>& mask, DestComp dcomp, NumComps ncomp) noexcept
3776 : {
3777 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
3778 : Array4<T> const& a = this->array();
3779 : Array4<int const> const& m = mask.const_array();
3780 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG (run_on, bx, ncomp.n, i, j, k, n,
3781 : {
3782 : if (!m(i,j,k)) { a(i,j,k,n+dcomp.i) = val; }
3783 : });
3784 : }
3785 :
3786 : template <class T>
3787 : template <RunOn run_on>
3788 : void
3789 216 : BaseFab<T>::setComplement (T const& x, const Box& bx, DestComp dcomp, NumComps ncomp) noexcept
3790 : {
3791 : #ifdef AMREX_USE_GPU
3792 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
3793 : Array4<T> const& a = this->array();
3794 : amrex::ParallelFor(this->domain, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
3795 : {
3796 : if (! bx.contains(IntVect(AMREX_D_DECL(i,j,k)))) {
3797 : for (int n = dcomp.i; n < ncomp.n+dcomp.i; ++n) {
3798 : a(i,j,k,n) = x;
3799 : }
3800 : }
3801 : });
3802 : } else
3803 : #endif
3804 : {
3805 432 : const BoxList b_lst = amrex::boxDiff(this->domain,bx);
3806 630 : for (auto const& b : b_lst) {
3807 414 : this->setVal<RunOn::Host>(x, b, dcomp, ncomp);
3808 : }
3809 : }
3810 216 : }
3811 :
3812 : template <class T>
3813 : template <RunOn run_on>
3814 : BaseFab<T>&
3815 : BaseFab<T>::copy (const BaseFab<T>& src) noexcept
3816 : {
3817 : this->copy<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
3818 : return *this;
3819 : }
3820 :
3821 : template <class T>
3822 : template <RunOn run_on>
3823 : BaseFab<T>&
3824 : BaseFab<T>::copy (const BaseFab<T>& src, Box bx,
3825 : SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
3826 : {
3827 : AMREX_ASSERT(this->domain.sameType(src.domain));
3828 : AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
3829 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
3830 :
3831 : bx &= src.domain;
3832 :
3833 : Array4<T> const& d = this->array();
3834 : Array4<T const> const& s = src.const_array();
3835 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
3836 : {
3837 : d(i,j,k,n+dcomp.i) = s(i,j,k,n+scomp.i);
3838 : });
3839 :
3840 : return *this;
3841 : }
3842 :
3843 : template <class T>
3844 : template <RunOn run_on>
3845 : BaseFab<T>&
3846 : BaseFab<T>::plus (T const& val) noexcept
3847 : {
3848 : return this->plus<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
3849 : }
3850 :
3851 : template <class T>
3852 : template <RunOn run_on>
3853 : BaseFab<T>&
3854 : BaseFab<T>::operator+= (T const& val) noexcept
3855 : {
3856 : return this->plus<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
3857 : }
3858 :
3859 : template <class T>
3860 : template <RunOn run_on>
3861 : BaseFab<T>&
3862 : BaseFab<T>::plus (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept
3863 : {
3864 : BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
3865 :
3866 : Array4<T> const& a = this->array();
3867 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
3868 : {
3869 : a(i,j,k,n+dcomp.i) += val;
3870 : });
3871 :
3872 : return *this;
3873 : }
3874 :
3875 : template <class T>
3876 : template <RunOn run_on>
3877 : BaseFab<T>&
3878 : BaseFab<T>::plus (const BaseFab<T>& src) noexcept
3879 : {
3880 : return this->plus<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
3881 : }
3882 :
3883 : template <class T>
3884 : template <RunOn run_on>
3885 : BaseFab<T>&
3886 : BaseFab<T>::operator+= (const BaseFab<T>& src) noexcept
3887 : {
3888 : return this->plus<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
3889 : }
3890 :
3891 : template <class T>
3892 : template <RunOn run_on>
3893 : BaseFab<T>&
3894 0 : BaseFab<T>::plus (const BaseFab<T>& src, Box bx,
3895 : SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
3896 : {
3897 : AMREX_ASSERT(this->domain.sameType(src.domain));
3898 : AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
3899 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
3900 :
3901 0 : bx &= src.domain;
3902 :
3903 0 : Array4<T> const& d = this->array();
3904 0 : Array4<T const> const& s = src.const_array();
3905 0 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
3906 : {
3907 : d(i,j,k,n+dcomp.i) += s(i,j,k,n+scomp.i);
3908 : });
3909 :
3910 0 : return *this;
3911 : }
3912 :
3913 : template <class T>
3914 : template <RunOn run_on>
3915 : BaseFab<T>&
3916 : BaseFab<T>::minus (T const& val) noexcept
3917 : {
3918 : return this->minus<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
3919 : }
3920 :
3921 : template <class T>
3922 : template <RunOn run_on>
3923 : BaseFab<T>&
3924 : BaseFab<T>::operator-= (T const& val) noexcept
3925 : {
3926 : return this->minus<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
3927 : }
3928 :
3929 : template <class T>
3930 : template <RunOn run_on>
3931 : BaseFab<T>&
3932 : BaseFab<T>::minus (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept
3933 : {
3934 : BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
3935 :
3936 : Array4<T> const& a = this->array();
3937 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
3938 : {
3939 : a(i,j,k,n+dcomp.i) -= val;
3940 : });
3941 :
3942 : return *this;
3943 : }
3944 :
3945 : template <class T>
3946 : template <RunOn run_on>
3947 : BaseFab<T>&
3948 : BaseFab<T>::minus (const BaseFab<T>& src) noexcept
3949 : {
3950 : return this->minus<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
3951 : }
3952 :
3953 : template <class T>
3954 : template <RunOn run_on>
3955 : BaseFab<T>&
3956 : BaseFab<T>::operator-= (const BaseFab<T>& src) noexcept
3957 : {
3958 : return this->minus<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
3959 : }
3960 :
3961 : template <class T>
3962 : template <RunOn run_on>
3963 : BaseFab<T>&
3964 : BaseFab<T>::minus (const BaseFab<T>& src, Box bx,
3965 : SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
3966 : {
3967 : AMREX_ASSERT(this->domain.sameType(src.domain));
3968 : AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
3969 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
3970 :
3971 : bx &= src.domain;
3972 :
3973 : Array4<T> const& d = this->array();
3974 : Array4<T const> const& s = src.const_array();
3975 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
3976 : {
3977 : d(i,j,k,n+dcomp.i) -= s(i,j,k,n+scomp.i);
3978 : });
3979 :
3980 : return *this;
3981 : }
3982 :
3983 : template <class T>
3984 : template <RunOn run_on>
3985 : BaseFab<T>&
3986 : BaseFab<T>::mult (T const& val) noexcept
3987 : {
3988 : return this->mult<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
3989 : }
3990 :
3991 : template <class T>
3992 : template <RunOn run_on>
3993 : BaseFab<T>&
3994 : BaseFab<T>::operator*= (T const& val) noexcept
3995 : {
3996 : return this->mult<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
3997 : }
3998 :
3999 : template <class T>
4000 : template <RunOn run_on>
4001 : BaseFab<T>&
4002 : BaseFab<T>::mult (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept
4003 : {
4004 : BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
4005 :
4006 : Array4<T> const& a = this->array();
4007 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
4008 : {
4009 : a(i,j,k,n+dcomp.i) *= val;
4010 : });
4011 :
4012 : return *this;
4013 : }
4014 :
4015 : template <class T>
4016 : template <RunOn run_on>
4017 : BaseFab<T>&
4018 : BaseFab<T>::mult (const BaseFab<T>& src) noexcept
4019 : {
4020 : return this->mult<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
4021 : }
4022 :
4023 : template <class T>
4024 : template <RunOn run_on>
4025 : BaseFab<T>&
4026 : BaseFab<T>::operator*= (const BaseFab<T>& src) noexcept
4027 : {
4028 : return this->mult<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
4029 : }
4030 :
4031 : template <class T>
4032 : template <RunOn run_on>
4033 : BaseFab<T>&
4034 0 : BaseFab<T>::mult (const BaseFab<T>& src, Box bx,
4035 : SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
4036 : {
4037 : AMREX_ASSERT(this->domain.sameType(src.domain));
4038 : AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
4039 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
4040 :
4041 0 : bx &= src.domain;
4042 :
4043 0 : Array4<T> const& d = this->array();
4044 0 : Array4<T const> const& s = src.const_array();
4045 0 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
4046 : {
4047 : d(i,j,k,n+dcomp.i) *= s(i,j,k,n+scomp.i);
4048 : });
4049 :
4050 0 : return *this;
4051 : }
4052 :
4053 : template <class T>
4054 : template <RunOn run_on>
4055 : BaseFab<T>&
4056 : BaseFab<T>::divide (T const& val) noexcept
4057 : {
4058 : return this->divide<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
4059 : }
4060 :
4061 : template <class T>
4062 : template <RunOn run_on>
4063 : BaseFab<T>&
4064 : BaseFab<T>::operator/= (T const& val) noexcept
4065 : {
4066 : return this->divide<run_on>(val, this->domain, DestComp{0}, NumComps{this->nvar});
4067 : }
4068 :
4069 : template <class T>
4070 : template <RunOn run_on>
4071 : BaseFab<T>&
4072 : BaseFab<T>::divide (T const& val, Box const& bx, DestComp dcomp, NumComps ncomp) noexcept
4073 : {
4074 : BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
4075 :
4076 : Array4<T> const& a = this->array();
4077 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
4078 : {
4079 : a(i,j,k,n+dcomp.i) /= val;
4080 : });
4081 :
4082 : return *this;
4083 : }
4084 :
4085 : template <class T>
4086 : template <RunOn run_on>
4087 : BaseFab<T>&
4088 : BaseFab<T>::divide (const BaseFab<T>& src) noexcept
4089 : {
4090 : return this->divide<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
4091 : }
4092 :
4093 : template <class T>
4094 : template <RunOn run_on>
4095 : BaseFab<T>&
4096 : BaseFab<T>::operator/= (const BaseFab<T>& src) noexcept
4097 : {
4098 : return this->divide<run_on>(src, this->domain, SrcComp{0}, DestComp{0}, NumComps{this->nvar});
4099 : }
4100 :
4101 : template <class T>
4102 : template <RunOn run_on>
4103 : BaseFab<T>&
4104 : BaseFab<T>::divide (const BaseFab<T>& src, Box bx,
4105 : SrcComp scomp, DestComp dcomp, NumComps ncomp) noexcept
4106 : {
4107 : AMREX_ASSERT(this->domain.sameType(src.domain));
4108 : AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
4109 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
4110 :
4111 : bx &= src.domain;
4112 :
4113 : Array4<T> const& d = this->array();
4114 : Array4<T const> const& s = src.const_array();
4115 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
4116 : {
4117 : d(i,j,k,n+dcomp.i) /= s(i,j,k,n+scomp.i);
4118 : });
4119 :
4120 : return *this;
4121 : }
4122 :
4123 : template <class T>
4124 : template <RunOn run_on>
4125 : BaseFab<T>&
4126 : BaseFab<T>::negate () noexcept
4127 : {
4128 : return this->negate<run_on>(this->domain, DestComp{0}, NumComps{this->nvar});
4129 : }
4130 :
4131 : template <class T>
4132 : template <RunOn run_on>
4133 : BaseFab<T>&
4134 : BaseFab<T>::negate (const Box& bx, DestComp dcomp, NumComps ncomp) noexcept
4135 : {
4136 : BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
4137 :
4138 : Array4<T> const& a = this->array();
4139 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
4140 : {
4141 : a(i,j,k,n+dcomp.i) = -a(i,j,k,n+dcomp.i);
4142 : });
4143 :
4144 : return *this;
4145 : }
4146 :
4147 : template <class T>
4148 : template <RunOn run_on>
4149 : BaseFab<T>&
4150 : BaseFab<T>::invert (T const& r) noexcept
4151 : {
4152 : return this->invert<run_on>(r, this->domain, DestComp{0}, NumComps{this->nvar});
4153 : }
4154 :
4155 : template <class T>
4156 : template <RunOn run_on>
4157 : BaseFab<T>&
4158 : BaseFab<T>::invert (T const& r, const Box& bx, DestComp dcomp, NumComps ncomp) noexcept
4159 : {
4160 : BL_ASSERT(dcomp.i >= 0 && dcomp.i + ncomp.n <= this->nvar);
4161 :
4162 : Array4<T> const& a = this->array();
4163 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(run_on, bx, ncomp.n, i, j, k, n,
4164 : {
4165 : a(i,j,k,n+dcomp.i) = r / a(i,j,k,n+dcomp.i);
4166 : });
4167 :
4168 : return *this;
4169 : }
4170 :
4171 : template <class T>
4172 : template <RunOn run_on>
4173 : T
4174 : BaseFab<T>::sum (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept
4175 : {
4176 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
4177 :
4178 : T r = 0;
4179 : Array4<T const> const& a = this->const_array();
4180 : #ifdef AMREX_USE_GPU
4181 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
4182 : ReduceOps<ReduceOpSum> reduce_op;
4183 : ReduceData<T> reduce_data(reduce_op);
4184 : using ReduceTuple = typename decltype(reduce_data)::Type;
4185 : reduce_op.eval(bx, reduce_data,
4186 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
4187 : {
4188 : T t = 0;
4189 : for (int n = 0; n < ncomp.n; ++n) {
4190 : t += a(i,j,k,n+dcomp.i);
4191 : }
4192 : return { t };
4193 : });
4194 : ReduceTuple hv = reduce_data.value(reduce_op);
4195 : r = amrex::get<0>(hv);
4196 : } else
4197 : #endif
4198 : {
4199 : amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
4200 : {
4201 : r += a(i,j,k,n+dcomp.i);
4202 : });
4203 : }
4204 :
4205 : return r;
4206 : }
4207 :
4208 : template <class T>
4209 : template <RunOn run_on>
4210 : T
4211 : BaseFab<T>::dot (const BaseFab<T>& src, const Box& bx, SrcComp scomp, DestComp dcomp, NumComps ncomp) const noexcept
4212 : {
4213 : AMREX_ASSERT(this->domain.sameType(src.domain));
4214 : AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
4215 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
4216 :
4217 : T r = 0;
4218 : Array4<T const> const& d = this->const_array();
4219 : Array4<T const> const& s = src.const_array();
4220 : #ifdef AMREX_USE_GPU
4221 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
4222 : ReduceOps<ReduceOpSum> reduce_op;
4223 : ReduceData<T> reduce_data(reduce_op);
4224 : using ReduceTuple = typename decltype(reduce_data)::Type;
4225 : reduce_op.eval(bx, reduce_data,
4226 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
4227 : {
4228 : T t = 0;
4229 : for (int n = 0; n < ncomp.n; ++n) {
4230 : t += d(i,j,k,n+dcomp.i) * s(i,j,k,n+scomp.i);
4231 : }
4232 : return { t };
4233 : });
4234 : ReduceTuple hv = reduce_data.value(reduce_op);
4235 : r = amrex::get<0>(hv);
4236 : } else
4237 : #endif
4238 : {
4239 : amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
4240 : {
4241 : r += d(i,j,k,n+dcomp.i) * s(i,j,k,n+scomp.i);
4242 : });
4243 : }
4244 :
4245 : return r;
4246 : }
4247 :
4248 : template <class T>
4249 : template <RunOn run_on>
4250 : T
4251 : BaseFab<T>::dot (const Box& bx, int destcomp, int numcomp) const noexcept
4252 : {
4253 : return dot<run_on>(bx, DestComp{destcomp}, NumComps{numcomp});
4254 : }
4255 :
4256 :
4257 : template <class T>
4258 : template <RunOn run_on>
4259 : T
4260 : BaseFab<T>::dot (const Box& bx, DestComp dcomp, NumComps ncomp) const noexcept
4261 : {
4262 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
4263 :
4264 : T r = 0;
4265 : Array4<T const> const& a = this->const_array();
4266 : #ifdef AMREX_USE_GPU
4267 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
4268 : ReduceOps<ReduceOpSum> reduce_op;
4269 : ReduceData<T> reduce_data(reduce_op);
4270 : using ReduceTuple = typename decltype(reduce_data)::Type;
4271 : reduce_op.eval(bx, reduce_data,
4272 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
4273 : {
4274 : T t = 0;
4275 : for (int n = 0; n < ncomp.n; ++n) {
4276 : t += a(i,j,k,n+dcomp.i)*a(i,j,k,n+dcomp.i);
4277 : }
4278 : return { t };
4279 : });
4280 : ReduceTuple hv = reduce_data.value(reduce_op);
4281 : r = amrex::get<0>(hv);
4282 : } else
4283 : #endif
4284 : {
4285 : amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
4286 : {
4287 : r += a(i,j,k,n+dcomp.i)*a(i,j,k,n+dcomp.i);
4288 : });
4289 : }
4290 :
4291 : return r;
4292 : }
4293 :
4294 : template <class T>
4295 : template <RunOn run_on>
4296 : T
4297 : BaseFab<T>::dotmask (const BaseFab<T>& src, const Box& bx, const BaseFab<int>& mask,
4298 : SrcComp scomp, DestComp dcomp, NumComps ncomp) const noexcept
4299 : {
4300 : AMREX_ASSERT(this->domain.sameType(src.domain));
4301 : AMREX_ASSERT(this->domain.sameType(mask.domain));
4302 : AMREX_ASSERT(scomp.i >= 0 && scomp.i+ncomp.n <= src.nvar);
4303 : AMREX_ASSERT(dcomp.i >= 0 && dcomp.i+ncomp.n <= this->nvar);
4304 :
4305 : T r = 0;
4306 : Array4<T const> const& d = this->const_array();
4307 : Array4<T const> const& s = src.const_array();
4308 : Array4<int const> const& m = mask.const_array();
4309 : #ifdef AMREX_USE_GPU
4310 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
4311 : ReduceOps<ReduceOpSum> reduce_op;
4312 : ReduceData<T> reduce_data(reduce_op);
4313 : using ReduceTuple = typename decltype(reduce_data)::Type;
4314 : reduce_op.eval(bx, reduce_data,
4315 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
4316 : {
4317 : T t = 0;
4318 : T mi = static_cast<T>(static_cast<int>(static_cast<bool>(m(i,j,k))));
4319 : for (int n = 0; n < ncomp.n; ++n) {
4320 : t += d(i,j,k,n+dcomp.i)*s(i,j,k,n+scomp.i)*mi;
4321 : }
4322 : return { t };
4323 : });
4324 : ReduceTuple hv = reduce_data.value(reduce_op);
4325 : r = amrex::get<0>(hv);
4326 : } else
4327 : #endif
4328 : {
4329 : amrex::LoopOnCpu(bx, ncomp.n, [=,&r] (int i, int j, int k, int n) noexcept
4330 : {
4331 : int mi = static_cast<int>(static_cast<bool>(m(i,j,k)));
4332 : r += d(i,j,k,n+dcomp.i)*s(i,j,k,n+scomp.i)*mi;
4333 : });
4334 : }
4335 :
4336 : return r;
4337 : }
4338 :
4339 : }
4340 :
4341 : #endif /*BL_BASEFAB_H*/
|