Line data Source code
1 :
2 : #ifndef BL_FABARRAY_H
3 : #define BL_FABARRAY_H
4 : #include <AMReX_Config.H>
5 :
6 : #include <AMReX_BLassert.H>
7 : #include <AMReX_Array.H>
8 : #include <AMReX_Vector.H>
9 : #include <AMReX_Box.H>
10 : #include <AMReX.H>
11 : #include <AMReX_BoxArray.H>
12 : #include <AMReX_BoxDomain.H>
13 : #include <AMReX_FabFactory.H>
14 : #include <AMReX_DistributionMapping.H>
15 : #include <AMReX_Geometry.H>
16 : #include <AMReX_ParallelDescriptor.H>
17 : #include <AMReX_Utility.H>
18 : #include <AMReX_ccse-mpi.H>
19 : #include <AMReX_BLProfiler.H>
20 : #include <AMReX_Periodicity.H>
21 : #include <AMReX_Print.H>
22 : #include <AMReX_FabArrayBase.H>
23 : #include <AMReX_MFIter.H>
24 : #include <AMReX_MakeType.H>
25 : #include <AMReX_TypeTraits.H>
26 : #include <AMReX_LayoutData.H>
27 : #include <AMReX_BaseFab.H>
28 : #include <AMReX_BaseFabUtility.H>
29 : #include <AMReX_MFParallelFor.H>
30 : #include <AMReX_TagParallelFor.H>
31 : #include <AMReX_ParReduce.H>
32 :
33 : #include <AMReX_Gpu.H>
34 :
35 : #ifdef AMREX_USE_EB
36 : #include <AMReX_EBFabFactory.H>
37 : #endif
38 :
39 : #ifdef AMREX_USE_OMP
40 : #include <omp.h>
41 : #endif
42 :
43 : #include <algorithm>
44 : #include <cstring>
45 : #include <limits>
46 : #include <map>
47 : #include <memory>
48 : #include <utility>
49 : #include <set>
50 : #include <string>
51 : #include <vector>
52 :
53 :
54 : namespace amrex {
55 :
56 : template <typename T, std::enable_if_t<!IsBaseFab<T>::value,int> = 0>
57 : Long nBytesOwned (T const&) noexcept { return 0; }
58 :
59 : template <typename T>
60 6846878 : Long nBytesOwned (BaseFab<T> const& fab) noexcept { return fab.nBytesOwned(); }
61 :
62 : /**
63 : * \brief FabArray memory allocation information
64 : */
65 : struct MFInfo {
66 : // alloc: allocate memory or not
67 : bool alloc = true;
68 : bool alloc_single_chunk = FabArrayBase::getAllocSingleChunk();
69 : Arena* arena = nullptr;
70 : Vector<std::string> tags;
71 :
72 0 : MFInfo& SetAlloc (bool a) noexcept { alloc = a; return *this; }
73 :
74 : MFInfo& SetAllocSingleChunk (bool a) noexcept { alloc_single_chunk = a; return *this; }
75 :
76 : MFInfo& SetArena (Arena* ar) noexcept { arena = ar; return *this; }
77 :
78 : MFInfo& SetTag () noexcept { return *this; }
79 :
80 : MFInfo& SetTag (const char* t) noexcept {
81 : tags.emplace_back(t);
82 : return *this;
83 : }
84 :
85 : MFInfo& SetTag (const std::string& t) noexcept {
86 : tags.emplace_back(t);
87 : return *this;
88 : }
89 :
90 : template <typename T, typename... Ts>
91 : MFInfo& SetTag (T&& t, Ts&&... ts) noexcept {
92 : tags.emplace_back(std::forward<T>(t));
93 : return SetTag(std::forward<Ts>(ts)...);
94 : }
95 : };
96 :
97 : struct TheFaArenaDeleter {
98 : using pointer = char*;
99 : void operator()(pointer p) const noexcept {
100 : The_Comms_Arena()->free(p);
101 : }
102 : };
103 : using TheFaArenaPointer = std::unique_ptr<char, TheFaArenaDeleter>;
104 :
105 : // Data used in non-blocking fill boundary.
106 : template <class FAB>
107 : struct FBData {
108 :
109 : const FabArrayBase::FB* fb = nullptr;
110 : int scomp;
111 : int ncomp;
112 :
113 : //
114 : char* the_recv_data = nullptr;
115 : char* the_send_data = nullptr;
116 : Vector<int> recv_from;
117 : Vector<char*> recv_data;
118 : Vector<std::size_t> recv_size;
119 : Vector<MPI_Request> recv_reqs;
120 : Vector<MPI_Status> recv_stat;
121 : //
122 : Vector<char*> send_data;
123 : Vector<MPI_Request> send_reqs;
124 : int tag;
125 :
126 : };
127 :
128 : // Data used in non-blocking parallel copy.
129 : template <class FAB>
130 : struct PCData {
131 :
132 : const FabArrayBase::CPC* cpc = nullptr;
133 : const FabArray<FAB>* src = nullptr;
134 : FabArrayBase::CpOp op;
135 : int tag = -1;
136 : int actual_n_rcvs = -1;
137 : int SC = -1, NC = -1, DC = -1;
138 :
139 : char* the_recv_data = nullptr;
140 : char* the_send_data = nullptr;
141 : Vector<int> recv_from;
142 : Vector<char*> recv_data;
143 : Vector<std::size_t> recv_size;
144 : Vector<MPI_Request> recv_reqs;
145 : Vector<MPI_Request> send_reqs;
146 :
147 : };
148 :
149 : template <typename T>
150 : struct MultiArray4
151 : {
152 : AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
153 : Array4<T> const& operator[] (int li) const noexcept {
154 : AMREX_IF_ON_DEVICE((return dp[li];))
155 : AMREX_IF_ON_HOST((return hp[li];))
156 : }
157 :
158 : AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
159 : explicit operator bool() const noexcept {
160 : AMREX_IF_ON_DEVICE((return dp != nullptr;))
161 : AMREX_IF_ON_HOST((return hp != nullptr;))
162 : }
163 :
164 : #ifdef AMREX_USE_GPU
165 : Array4<T> const* AMREX_RESTRICT dp = nullptr;
166 : #endif
167 : Array4<T> const* AMREX_RESTRICT hp = nullptr;
168 : };
169 :
170 : template <class FAB> class FabArray;
171 :
172 : template <class DFAB, class SFAB,
173 : std::enable_if_t<std::conjunction_v<
174 : IsBaseFab<DFAB>, IsBaseFab<SFAB>,
175 : std::is_convertible<typename SFAB::value_type,
176 : typename DFAB::value_type>>, int> BAR = 0>
177 : void
178 : Copy (FabArray<DFAB>& dst, FabArray<SFAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
179 : {
180 : Copy(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
181 : }
182 :
183 : template <class DFAB, class SFAB,
184 : std::enable_if_t<std::conjunction_v<
185 : IsBaseFab<DFAB>, IsBaseFab<SFAB>,
186 : std::is_convertible<typename SFAB::value_type,
187 : typename DFAB::value_type>>, int> BAR = 0>
188 : void
189 333923 : Copy (FabArray<DFAB>& dst, FabArray<SFAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
190 : {
191 : BL_PROFILE("amrex::Copy()");
192 :
193 : using DT = typename DFAB::value_type;
194 :
195 333923 : if (dst.local_size() == 0) { return; }
196 :
197 : // avoid self copy
198 : if constexpr (std::is_same_v<typename SFAB::value_type, typename DFAB::value_type>) {
199 333923 : if (dst.atLocalIdx(0).dataPtr(dstcomp) == src.atLocalIdx(0).dataPtr(srccomp)) {
200 0 : return;
201 : }
202 : }
203 :
204 : #ifdef AMREX_USE_GPU
205 : if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
206 : auto const& srcarr = src.const_arrays();
207 : auto const& dstarr = dst.arrays();
208 : ParallelFor(dst, nghost, numcomp,
209 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
210 : {
211 : dstarr[box_no](i,j,k,dstcomp+n) = DT(srcarr[box_no](i,j,k,srccomp+n));
212 : });
213 : Gpu::streamSynchronize();
214 : } else
215 : #endif
216 : {
217 : #ifdef AMREX_USE_OMP
218 : #pragma omp parallel if (Gpu::notInLaunchRegion())
219 : #endif
220 691930 : for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
221 : {
222 358007 : const Box& bx = mfi.growntilebox(nghost);
223 358007 : if (bx.ok())
224 : {
225 358007 : auto const& srcFab = src.const_array(mfi);
226 358007 : auto const& dstFab = dst.array(mfi);
227 123812000 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
228 : {
229 : dstFab(i,j,k,dstcomp+n) = DT(srcFab(i,j,k,srccomp+n));
230 : });
231 : }
232 : }
233 : }
234 : }
235 :
236 : template <class FAB,
237 : class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
238 : void
239 : Add (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, int nghost)
240 : {
241 : Add(dst,src,srccomp,dstcomp,numcomp,IntVect(nghost));
242 : }
243 :
244 : template <class FAB,
245 : class bar = std::enable_if_t<IsBaseFab<FAB>::value> >
246 : void
247 7894 : Add (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int numcomp, const IntVect& nghost)
248 : {
249 : BL_PROFILE("amrex::Add()");
250 :
251 : #ifdef AMREX_USE_GPU
252 : if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
253 : auto const& dstfa = dst.arrays();
254 : auto const& srcfa = src.const_arrays();
255 : ParallelFor(dst, nghost, numcomp,
256 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
257 : {
258 : dstfa[box_no](i,j,k,n+dstcomp) += srcfa[box_no](i,j,k,n+srccomp);
259 : });
260 : if (!Gpu::inNoSyncRegion()) {
261 : Gpu::streamSynchronize();
262 : }
263 : } else
264 : #endif
265 : {
266 : #ifdef AMREX_USE_OMP
267 : #pragma omp parallel if (Gpu::notInLaunchRegion())
268 : #endif
269 20608 : for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
270 : {
271 12714 : const Box& bx = mfi.growntilebox(nghost);
272 12714 : if (bx.ok())
273 : {
274 12714 : auto const srcFab = src.array(mfi);
275 12714 : auto dstFab = dst.array(mfi);
276 18568900 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
277 : {
278 : dstFab(i,j,k,n+dstcomp) += srcFab(i,j,k,n+srccomp);
279 : });
280 : }
281 : }
282 : }
283 7894 : }
284 :
285 : /**
286 : * \brief An Array of FortranArrayBox(FAB)-like Objects
287 : *
288 : * The FabArray<FAB> class implements a collection (stored as an array) of
289 : * Fortran array box-like ( \p FAB ) objects. The parameterized type \p FAB is intended to be
290 : * any class derived from BaseFab<T>. For example, \p FAB may be a BaseFab of
291 : * integers, so we could write:
292 : *
293 : * FabArray<BaseFab<int> > int_fabs;
294 : *
295 : * Then int_fabs is a FabArray that can hold a collection of BaseFab<int>
296 : * objects.
297 : *
298 : * FabArray is not just a general container class for Fortran arrays. It is
299 : * intended to hold "grid" data for use in finite difference calculations in
300 : * which the data is defined on a union of (usually disjoint) rectangular
301 : * regions embedded in a uniform index space. This region, called the valid
302 : * region, is represented by a BoxArray. For the purposes of this discussion,
303 : * the Kth Box in the BoxArray represents the interior region of the Kth grid.
304 : *
305 : * Since the intent is to be used with finite difference calculations a
306 : * FabArray also includes the notion of a boundary region for each grid. The
307 : * boundary region is specified by the ngrow parameter which tells the FabArray
308 : * to allocate each \p FAB to be ngrow cells larger in all directions than the
309 : * underlying Box. The larger region covered by the union of all the \p FABs is
310 : * called the region of definition. The underlying notion is that the valid
311 : * region contains the grid interior data and the region of definition includes
312 : * the interior region plus the boundary areas.
313 : *
314 : * Operations are available to copy data from the valid regions into these
315 : * boundary areas where the two overlap. The number of components, that is,
316 : * the number of values that can be stored in each cell of a \p FAB, is either
317 : * given as an argument to the constructor or is inherent in the definition of
318 : * the underlying \p FAB. Each \p FAB in the FabArray will have the same number of
319 : * components.
320 : *
321 : * In summary, a FabArray is an array of \p FABs. The Kth element contains a \p FAB
322 : * that holds the data for the Kth grid, a Box that defines the valid region
323 : * of the Kth grid.
324 : *
325 : * A typical use for a FabArray would be to hold the solution vector or
326 : * right-hand-side when solving a linear system of equations on a union of
327 : * rectangular grids. The copy operations would be used to copy data from the
328 : * valid regions of neighboring grids into the boundary regions after each
329 : * relaxation step of the iterative method. If a multigrid method is used, a
330 : * FabArray could be used to hold the data at each level in the multigrid
331 : * hierarchy.
332 : *
333 : * This class is a concrete class not a polymorphic one.
334 : *
335 : * This class does NOT provide a copy constructor or assignment operator.
336 : *
337 : * \tparam FAB FortranArrayBox-like object. Typically a derived class of BaseFab. Not to be confused with FabArrayBase.
338 : */
339 : template <class FAB>
340 : class FabArray
341 : :
342 : public FabArrayBase
343 : {
344 : public:
345 :
346 : struct FABType {
347 : using value_type = FAB;
348 : };
349 :
350 : /*
351 : * if FAB is a BaseFab or its child, value_type = FAB::value_type
352 : * else value_type = FAB;
353 : */
354 : using value_type = typename std::conditional_t<IsBaseFab<FAB>::value, FAB, FABType>::value_type;
355 :
356 : using fab_type = FAB;
357 :
358 : //
359 : //! Constructs an empty FabArray<FAB>.
360 : FabArray () noexcept;
361 :
362 : /**
363 : * \brief Construct an empty FabArray<FAB> that has a default Arena.
364 : *
365 : * If `define` is called later with a nullptr as MFInfo's arena, the
366 : * default Arena `a` will be used. If the arena in MFInfo is not a
367 : * nullptr, the MFInfo's arena will be used.
368 : */
369 : explicit FabArray (Arena* a) noexcept;
370 :
371 : /**
372 : * \brief Construct a FabArray<FAB> with a valid region defined by bxs
373 : * and a region of definition defined by the grow factor ngrow
374 : * and the number of components nvar.
375 : */
376 : FabArray (const BoxArray& bxs,
377 : const DistributionMapping& dm,
378 : int nvar,
379 : int ngrow,
380 : #ifdef AMREX_STRICT_MODE
381 : const MFInfo& info,
382 : const FabFactory<FAB>& factory);
383 : #else
384 : const MFInfo& info = MFInfo(),
385 : const FabFactory<FAB>& factory = DefaultFabFactory<FAB>());
386 : #endif
387 :
388 : FabArray (const BoxArray& bxs,
389 : const DistributionMapping& dm,
390 : int nvar,
391 : const IntVect& ngrow,
392 : #ifdef AMREX_STRICT_MODE
393 : const MFInfo& info,
394 : const FabFactory<FAB>& factory);
395 : #else
396 : const MFInfo& info = MFInfo(),
397 : const FabFactory<FAB>& factory = DefaultFabFactory<FAB>());
398 : #endif
399 :
400 : FabArray (const FabArray<FAB>& rhs, MakeType maketype, int scomp, int ncomp);
401 :
402 : //! The destructor -- deletes all FABs in the array.
403 : ~FabArray ();
404 :
405 : FabArray (FabArray<FAB>&& rhs) noexcept;
406 : FabArray<FAB>& operator= (FabArray<FAB>&& rhs) noexcept;
407 :
408 : FabArray (const FabArray<FAB>& rhs) = delete;
409 : FabArray<FAB>& operator= (const FabArray<FAB>& rhs) = delete;
410 :
411 : /**
412 : * \brief Define this FabArray identically to that performed by
413 : * the constructor having an analogous function signature.
414 : * This is only valid if this FabArray was defined using
415 : * the default constructor.
416 : */
417 : void define (const BoxArray& bxs,
418 : const DistributionMapping& dm,
419 : int nvar,
420 : int ngrow,
421 : #ifdef AMREX_STRICT_MODE
422 : const MFInfo& info,
423 : const FabFactory<FAB>& factory);
424 : #else
425 : const MFInfo& info = MFInfo(),
426 : const FabFactory<FAB>& factory = DefaultFabFactory<FAB>());
427 : #endif
428 :
429 : void define (const BoxArray& bxs,
430 : const DistributionMapping& dm,
431 : int nvar,
432 : const IntVect& ngrow,
433 : #ifdef AMREX_STRICT_MODE
434 : const MFInfo& info,
435 : const FabFactory<FAB>& factory);
436 : #else
437 : const MFInfo& info = MFInfo(),
438 : const FabFactory<FAB>& factory = DefaultFabFactory<FAB>());
439 : #endif
440 :
441 0 : const FabFactory<FAB>& Factory () const noexcept { return *m_factory; }
442 :
443 : // Provides access to the Arena this FabArray was build with.
444 : Arena* arena () const noexcept { return m_dallocator.arena(); }
445 :
446 : const Vector<std::string>& tags () const noexcept { return m_tags; }
447 :
448 : bool hasEBFabFactory () const noexcept {
449 : #ifdef AMREX_USE_EB
450 : const auto *const f = dynamic_cast<EBFArrayBoxFactory const*>(m_factory.get());
451 : return (f != nullptr);
452 : #else
453 : return false;
454 : #endif
455 : }
456 :
457 : //! Return the data pointer to the single chunk memory if this object
458 : //! uses a single contiguous chunk of memory, nullptr otherwise.
459 : [[nodiscard]] value_type* singleChunkPtr () noexcept {
460 : return m_single_chunk_arena ? (value_type*)m_single_chunk_arena->data() : nullptr;
461 : }
462 :
463 : //! Return the data pointer to the single chunk memory if this object
464 : //! uses a single contiguous chunk of memory, nullptr otherwise.
465 : [[nodiscard]] value_type const* singleChunkPtr () const noexcept {
466 : return m_single_chunk_arena ? (value_type const*)m_single_chunk_arena->data() : nullptr;
467 : }
468 :
469 : //! Return the size of the single chunk memory if this object uses a
470 : //! single contiguous chunk of memory, 0 otherwise.
471 : [[nodiscard]] std::size_t singleChunkSize () const noexcept { return m_single_chunk_size; }
472 :
473 : bool isAllRegular () const noexcept {
474 : #ifdef AMREX_USE_EB
475 : const auto *const f = dynamic_cast<EBFArrayBoxFactory const*>(m_factory.get());
476 : if (f) {
477 : return f->isAllRegular();
478 : } else {
479 : return true;
480 : }
481 : #else
482 : return true;
483 : #endif
484 : }
485 :
486 : /**
487 : * \brief Return true if the FabArray is well-defined. That is,
488 : * the FabArray has a BoxArray and DistributionMapping, the
489 : * FABs are allocated for each Box in the BoxArray and the
490 : * sizes of the FABs and the number of components are consistent
491 : * with the definition of the FabArray.
492 : */
493 : bool ok () const;
494 :
495 : /** Has define() been called on this rank?
496 : *
497 : * \return true if `define` has been called on this `FabArray`. Note that all constructors except `FabArray ()`
498 : * and `FabArray(Arena*a)` call `define`, even if the `MFInfo` argument has `alloc=false`. One could
499 : * also use `FabArrayBase::empty()` to find whether `define` is called or not, although they are not exactly
500 : * the same.
501 : */
502 : bool isDefined () const;
503 :
504 : //! Return a constant reference to the FAB associated with mfi.
505 1218557 : const FAB& operator[] (const MFIter& mfi) const noexcept { return *(this->fabPtr(mfi)); }
506 :
507 : //! Return a constant reference to the FAB associated with mfi.
508 : const FAB& get (const MFIter& mfi) const noexcept { return *(this->fabPtr(mfi)); }
509 :
510 : //! Returns a reference to the FAB associated mfi.
511 5581851 : FAB& operator[] (const MFIter& mfi) noexcept { return *(this->fabPtr(mfi)); }
512 :
513 : //! Returns a reference to the FAB associated mfi.
514 345536 : FAB& get (const MFIter& mfi) noexcept { return *(this->fabPtr(mfi)); }
515 :
516 : //! Return a constant reference to the FAB associated with the Kth element.
517 4919925 : const FAB& operator[] (int K) const noexcept { return *(this->fabPtr(K)); }
518 :
519 : //! Return a constant reference to the FAB associated with the Kth element.
520 : const FAB& get (int K) const noexcept { return *(this->fabPtr(K)); }
521 :
522 : //! Return a reference to the FAB associated with the Kth element.
523 795 : FAB& operator[] (int K) noexcept { return *(this->fabPtr(K)); }
524 :
525 : //! Return a reference to the FAB associated with the Kth element.
526 14748175 : FAB& get (int K) noexcept { return *(this->fabPtr(K)); }
527 :
528 : //! Return a reference to the FAB associated with local index L
529 333923 : FAB& atLocalIdx (int L) noexcept { return *m_fabs_v[L]; }
530 333923 : const FAB& atLocalIdx (int L) const noexcept { return *m_fabs_v[L]; }
531 :
532 : //! Return pointer to FAB
533 : FAB * fabPtr (const MFIter& mfi) noexcept;
534 : FAB const* fabPtr (const MFIter& mfi) const noexcept;
535 : FAB * fabPtr (int K) noexcept; // Here K is global index
536 : FAB const* fabPtr (int K) const noexcept;
537 :
538 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
539 : void prefetchToHost (const MFIter& mfi) const noexcept;
540 :
541 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
542 : void prefetchToDevice (const MFIter& mfi) const noexcept;
543 :
544 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
545 : Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi) const noexcept;
546 : //
547 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
548 : Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi) noexcept;
549 : //
550 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
551 : Array4<typename FabArray<FAB>::value_type const> array (int K) const noexcept;
552 : //
553 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
554 : Array4<typename FabArray<FAB>::value_type> array (int K) noexcept;
555 :
556 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
557 : Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi) const noexcept;
558 : //
559 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
560 : Array4<typename FabArray<FAB>::value_type const> const_array (int K) const noexcept;
561 :
562 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
563 : Array4<typename FabArray<FAB>::value_type const> array (const MFIter& mfi, int start_comp) const noexcept;
564 : //
565 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
566 : Array4<typename FabArray<FAB>::value_type> array (const MFIter& mfi, int start_comp) noexcept;
567 : //
568 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
569 : Array4<typename FabArray<FAB>::value_type const> array (int K, int start_comp) const noexcept;
570 : //
571 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
572 : Array4<typename FabArray<FAB>::value_type> array (int K, int start_comp) noexcept;
573 :
574 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
575 : Array4<typename FabArray<FAB>::value_type const> const_array (const MFIter& mfi, int start_comp) const noexcept;
576 : //
577 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
578 : Array4<typename FabArray<FAB>::value_type const> const_array (int K, int start_comp) const noexcept;
579 :
580 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
581 : MultiArray4<typename FabArray<FAB>::value_type> arrays () noexcept;
582 :
583 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
584 : MultiArray4<typename FabArray<FAB>::value_type const> arrays () const noexcept;
585 :
586 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
587 : MultiArray4<typename FabArray<FAB>::value_type const> const_arrays () const noexcept;
588 :
589 : //! Explicitly set the Kth FAB in the FabArray to point to elem.
590 : void setFab (int boxno, std::unique_ptr<FAB> elem);
591 :
592 : //! Explicitly set the Kth FAB in the FabArray to point to elem.
593 : template <class F=FAB, std::enable_if_t<std::is_move_constructible_v<F>,int> = 0>
594 : void setFab (int boxno, FAB&& elem);
595 :
596 : //! Explicitly set the FAB associated with mfi in the FabArray to point to elem.
597 : void setFab (const MFIter&mfi, std::unique_ptr<FAB> elem);
598 :
599 : //! Explicitly set the FAB associated with mfi in the FabArray to point to elem.
600 : template <class F=FAB, std::enable_if_t<std::is_move_constructible_v<F>,int> = 0>
601 : void setFab (const MFIter&mfi, FAB&& elem);
602 :
603 : //! Release ownership of the FAB. This function is not thread safe.
604 : AMREX_NODISCARD
605 : FAB* release (int K);
606 :
607 : //! Release ownership of the FAB. This function is not thread safe.
608 : AMREX_NODISCARD
609 : FAB* release (const MFIter& mfi);
610 :
611 : //! Releases FAB memory in the FabArray.
612 : void clear ();
613 :
614 : /**
615 : * \brief Perform local copy of FabArray data.
616 : *
617 : * The two FabArrays must have the same BoxArray and
618 : * DistributionMapping, although they could have different data types.
619 : * For example, this could be used to copy from FabArray<BaseFab<float>>
620 : * to FabArray<BaseFab<double>>.
621 : *
622 : * \param src source FabArray
623 : * \param scomp starting component of source
624 : * \param dcomp starting component of this FabArray
625 : * \param ncomp number of components
626 : * \param nghost number of ghost cells
627 : */
628 : template <typename SFAB, typename DFAB = FAB,
629 : std::enable_if_t<std::conjunction_v<
630 : IsBaseFab<DFAB>, IsBaseFab<SFAB>,
631 : std::is_convertible<typename SFAB::value_type,
632 : typename DFAB::value_type>>, int> = 0>
633 : void LocalCopy (FabArray<SFAB> const& src, int scomp, int dcomp, int ncomp,
634 : IntVect const& nghost);
635 :
636 : /**
637 : * \brief Perform local addition of FabArray data.
638 : *
639 : * The two FabArrays must have the same BoxArray and
640 : * DistributionMapping.
641 : *
642 : * \param src source FabArray
643 : * \param scomp starting component of source
644 : * \param dcomp starting component of this FabArray
645 : * \param ncomp number of components
646 : * \param nghost number of ghost cells
647 : */
648 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
649 : void LocalAdd (FabArray<FAB> const& src, int scomp, int dcomp, int ncomp,
650 : IntVect const& nghost);
651 :
652 : //! Set all components in the entire region of each FAB to val.
653 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
654 : void setVal (value_type val);
655 :
656 : //! Set all components in the entire region of each FAB to val.
657 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
658 : FabArray<FAB>& operator= (value_type val);
659 :
660 : /**
661 : * \brief Set the value of num_comp components in the valid region of
662 : * each FAB in the FabArray, starting at component comp to val.
663 : * Also set the value of nghost boundary cells.
664 : */
665 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
666 : void setVal (value_type val,
667 : int comp,
668 : int ncomp,
669 : int nghost = 0);
670 :
671 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
672 : void setVal (value_type val,
673 : int comp,
674 : int ncomp,
675 : const IntVect& nghost);
676 :
677 : /**
678 : * \brief Set the value of num_comp components in the valid region of
679 : * each FAB in the FabArray, starting at component comp, as well
680 : * as nghost boundary cells, to val, provided they also intersect
681 : * with the Box region.
682 : */
683 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
684 : void setVal (value_type val,
685 : const Box& region,
686 : int comp,
687 : int ncomp,
688 : int nghost = 0);
689 :
690 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
691 : void setVal (value_type val,
692 : const Box& region,
693 : int comp,
694 : int ncomp,
695 : const IntVect& nghost);
696 : /**
697 : * \brief Set all components in the valid region of each FAB in the
698 : * FabArray to val, including nghost boundary cells.
699 : */
700 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
701 : void setVal (value_type val, int nghost);
702 :
703 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
704 : void setVal (value_type val, const IntVect& nghost);
705 :
706 : /**
707 : * \brief Set all components in the valid region of each FAB in the
708 : * FabArray to val, including nghost boundary cells, that also
709 : * intersect the Box region.
710 : */
711 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
712 : void setVal (value_type val, const Box& region, int nghost);
713 :
714 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
715 : void setVal (value_type val, const Box& region, const IntVect& nghost);
716 :
717 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
718 : void abs (int comp, int ncomp, int nghost = 0);
719 :
720 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
721 : void abs (int comp, int ncomp, const IntVect& nghost);
722 :
723 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
724 : void plus (value_type val, int comp, int num_comp, int nghost = 0);
725 :
726 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
727 : void plus (value_type val, const Box& region, int comp, int num_comp, int nghost = 0);
728 :
729 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
730 : void mult (value_type val, int comp, int num_comp, int nghost = 0);
731 :
732 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
733 : void mult (value_type val, const Box& region, int comp, int num_comp, int nghost = 0);
734 :
735 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
736 : void invert (value_type numerator, int comp, int num_comp, int nghost = 0);
737 :
738 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
739 : void invert (value_type numerator, const Box& region, int comp, int num_comp, int nghost = 0);
740 :
741 : //! Set all values in the boundary region to val.
742 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
743 : void setBndry (value_type val);
744 :
745 : //! Set ncomp values in the boundary region, starting at start_comp to val.
746 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
747 : void setBndry (value_type val, int strt_comp, int ncomp);
748 :
749 : //! Set all values outside the Geometry domain to val.
750 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
751 : void setDomainBndry (value_type val, const Geometry& geom);
752 :
753 : //! Set ncomp values outside the Geometry domain to val, starting at start_comp.
754 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
755 : void setDomainBndry (value_type val, int strt_comp, int ncomp, const Geometry& geom);
756 :
757 : /**
758 : * \brief Returns the sum of component "comp"
759 : *
760 : * \param comp component
761 : * \param nghost number of ghost cells
762 : * \param local If true, MPI communication is skipped.
763 : */
764 : template <typename F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
765 : typename F::value_type
766 : sum (int comp, IntVect const& nghost, bool local = false) const;
767 :
768 : /**
769 : * \brief This function copies data from src to this FabArray. Each FAB
770 : * in fa is intersected with all FABs in this FabArray and a copy
771 : * is performed on the region of intersection. The intersection
772 : * is restricted to the valid regions.
773 : */
774 : void ParallelAdd (const FabArray<FAB>& src,
775 : const Periodicity& period = Periodicity::NonPeriodic())
776 : { ParallelCopy(src,period,FabArray::ADD); }
777 0 : void ParallelCopy (const FabArray<FAB>& src,
778 : const Periodicity& period = Periodicity::NonPeriodic(),
779 : CpOp op = FabArrayBase::COPY)
780 0 : { ParallelCopy(src,0,0,nComp(),0,0,period,op); }
781 :
782 : [[deprecated("Use FabArray::ParallelCopy() instead.")]]
783 : void copy (const FabArray<FAB>& src,
784 : const Periodicity& period = Periodicity::NonPeriodic(),
785 : CpOp op = FabArrayBase::COPY)
786 : { ParallelCopy(src,period,op); }
787 :
788 : void ParallelAdd_nowait (const FabArray<FAB>& src,
789 : const Periodicity& period = Periodicity::NonPeriodic())
790 : { ParallelCopy_nowait(src,period,FabArray::ADD); }
791 : void ParallelCopy_nowait (const FabArray<FAB>& src,
792 : const Periodicity& period = Periodicity::NonPeriodic(),
793 : CpOp op = FabArrayBase::COPY)
794 : { ParallelCopy_nowait(src,0,0,nComp(),0,0,period,op); }
795 :
796 : /**
797 : * \brief This function copies data from src to this FabArray. Each FAB
798 : * in src is intersected with all FABs in this FabArray and a copy
799 : * is performed on the region of intersection. The intersection
800 : * is restricted to the num_comp components starting at src_comp
801 : * in the FabArray src, with the destination components in this
802 : * FabArray starting at dest_comp.
803 : */
804 : void ParallelAdd (const FabArray<FAB>& src,
805 : int src_comp,
806 : int dest_comp,
807 : int num_comp,
808 : const Periodicity& period = Periodicity::NonPeriodic())
809 : { ParallelCopy(src,src_comp,dest_comp,num_comp, period, FabArrayBase::ADD); }
810 27148 : void ParallelCopy (const FabArray<FAB>& src,
811 : int src_comp,
812 : int dest_comp,
813 : int num_comp,
814 : const Periodicity& period = Periodicity::NonPeriodic(),
815 : CpOp op = FabArrayBase::COPY)
816 27148 : { ParallelCopy(src,src_comp,dest_comp,num_comp,0,0,period,op); }
817 :
818 : [[deprecated("Use FabArray::ParallelCopy() instead.")]]
819 : void copy (const FabArray<FAB>& src,
820 : int src_comp,
821 : int dest_comp,
822 : int num_comp,
823 : const Periodicity& period = Periodicity::NonPeriodic(),
824 : CpOp op = FabArrayBase::COPY)
825 : { ParallelCopy(src,src_comp,dest_comp,num_comp, period, op); }
826 :
827 : void ParallelAdd_nowait (const FabArray<FAB>& src,
828 : int src_comp,
829 : int dest_comp,
830 : int num_comp,
831 : const Periodicity& period = Periodicity::NonPeriodic())
832 : { ParallelCopy_nowait(src,src_comp,dest_comp,num_comp, period, FabArrayBase::ADD); }
833 : void ParallelCopy_nowait (const FabArray<FAB>& src,
834 : int src_comp,
835 : int dest_comp,
836 : int num_comp,
837 : const Periodicity& period = Periodicity::NonPeriodic(),
838 : CpOp op = FabArrayBase::COPY)
839 : { ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,0,0,period,op); }
840 :
841 : //! Similar to the above function, except that source and destination are grown by src_nghost and dst_nghost, respectively
842 : void ParallelAdd (const FabArray<FAB>& src,
843 : int src_comp,
844 : int dest_comp,
845 : int num_comp,
846 : int src_nghost,
847 : int dst_nghost,
848 : const Periodicity& period = Periodicity::NonPeriodic())
849 : { ParallelCopy(src,src_comp,dest_comp,num_comp,IntVect(src_nghost),IntVect(dst_nghost),period,
850 : FabArrayBase::ADD); }
851 : void ParallelAdd (const FabArray<FAB>& src,
852 : int src_comp,
853 : int dest_comp,
854 : int num_comp,
855 : const IntVect& src_nghost,
856 : const IntVect& dst_nghost,
857 : const Periodicity& period = Periodicity::NonPeriodic())
858 : { ParallelCopy(src,src_comp,dest_comp,num_comp,src_nghost,dst_nghost,period,FabArrayBase::ADD); }
859 32653 : void ParallelCopy (const FabArray<FAB>& src,
860 : int src_comp,
861 : int dest_comp,
862 : int num_comp,
863 : int src_nghost,
864 : int dst_nghost,
865 : const Periodicity& period = Periodicity::NonPeriodic(),
866 : CpOp op = FabArrayBase::COPY)
867 32653 : { ParallelCopy(src,src_comp,dest_comp,num_comp,IntVect(src_nghost),IntVect(dst_nghost),period,op); }
868 : void ParallelCopy (const FabArray<FAB>& src,
869 : int scomp,
870 : int dcomp,
871 : int ncomp,
872 : const IntVect& snghost,
873 : const IntVect& dnghost,
874 : const Periodicity& period = Periodicity::NonPeriodic(),
875 : CpOp op = FabArrayBase::COPY,
876 : const FabArrayBase::CPC* a_cpc = nullptr);
877 :
878 : void ParallelAdd_nowait (const FabArray<FAB>& src,
879 : int src_comp,
880 : int dest_comp,
881 : int num_comp,
882 : int src_nghost,
883 : int dst_nghost,
884 : const Periodicity& period = Periodicity::NonPeriodic())
885 : { ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,IntVect(src_nghost),
886 : IntVect(dst_nghost),period,FabArrayBase::ADD); }
887 :
888 : void ParallelAdd_nowait (const FabArray<FAB>& src,
889 : int src_comp,
890 : int dest_comp,
891 : int num_comp,
892 : const IntVect& src_nghost,
893 : const IntVect& dst_nghost,
894 : const Periodicity& period = Periodicity::NonPeriodic())
895 : { ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,src_nghost,
896 : dst_nghost,period,FabArrayBase::ADD); }
897 :
898 : void ParallelCopy_nowait (const FabArray<FAB>& src,
899 : int src_comp,
900 : int dest_comp,
901 : int num_comp,
902 : int src_nghost,
903 : int dst_nghost,
904 : const Periodicity& period = Periodicity::NonPeriodic(),
905 : CpOp op = FabArrayBase::COPY)
906 : { ParallelCopy_nowait(src,src_comp,dest_comp,num_comp,IntVect(src_nghost),
907 : IntVect(dst_nghost),period,op); }
908 :
909 : void ParallelCopy_nowait (const FabArray<FAB>& src,
910 : int scomp,
911 : int dcomp,
912 : int ncomp,
913 : const IntVect& snghost,
914 : const IntVect& dnghost,
915 : const Periodicity& period = Periodicity::NonPeriodic(),
916 : CpOp op = FabArrayBase::COPY,
917 : const FabArrayBase::CPC* a_cpc = nullptr,
918 : bool to_ghost_cells_only = false);
919 :
920 : void ParallelCopy_finish ();
921 :
922 : void ParallelCopyToGhost (const FabArray<FAB>& src,
923 : int scomp,
924 : int dcomp,
925 : int ncomp,
926 : const IntVect& snghost,
927 : const IntVect& dnghost,
928 : const Periodicity& period = Periodicity::NonPeriodic());
929 :
930 : void ParallelCopyToGhost_nowait (const FabArray<FAB>& src,
931 : int scomp,
932 : int dcomp,
933 : int ncomp,
934 : const IntVect& snghost,
935 : const IntVect& dnghost,
936 : const Periodicity& period = Periodicity::NonPeriodic());
937 :
938 : void ParallelCopyToGhost_finish();
939 :
940 : [[deprecated("Use FabArray::ParallelCopy() instead.")]]
941 : void copy (const FabArray<FAB>& src,
942 : int src_comp,
943 : int dest_comp,
944 : int num_comp,
945 : int src_nghost,
946 : int dst_nghost,
947 : const Periodicity& period = Periodicity::NonPeriodic(),
948 : CpOp op = FabArrayBase::COPY)
949 : { ParallelCopy(src,src_comp,dest_comp,num_comp,IntVect(src_nghost),IntVect(dst_nghost),period,op); }
950 :
951 : [[deprecated("Use FabArray::ParallelCopy() instead.")]]
952 : void copy (const FabArray<FAB>& src,
953 : int src_comp,
954 : int dest_comp,
955 : int num_comp,
956 : const IntVect& src_nghost,
957 : const IntVect& dst_nghost,
958 : const Periodicity& period = Periodicity::NonPeriodic(),
959 : CpOp op = FabArrayBase::COPY)
960 : { ParallelCopy(src,src_comp,dest_comp,num_comp,src_nghost,dst_nghost,period,op); }
961 :
962 : //! Copy from src to this. this and src have the same BoxArray, but different DistributionMapping
963 : void Redistribute (const FabArray<FAB>& src,
964 : int scomp,
965 : int dcomp,
966 : int ncomp,
967 : const IntVect& nghost);
968 :
969 : /**
970 : * \brief Copy the values contained in the intersection of the
971 : * valid + nghost region of this FabArray with the FAB dest into dest.
972 : * Note that FAB dest is assumed to be identical on each process.
973 : */
974 : void copyTo (FAB& dest, int nghost = 0) const;
975 :
976 : /**
977 : * \brief Copy the values contained in the intersection of the
978 : * num_comp component valid + nghost region of this FabArray, starting at
979 : * component src_comp, with the FAB dest into dest, starting at
980 : * component dest_comp in dest.
981 : * Note that FAB dest is assumed to be identical on each process.
982 : */
983 : void copyTo (FAB& dest, int scomp, int dcomp, int ncomp, int nghost = 0) const;
984 :
985 : //! Shift the boxarray by vector v
986 : void shift (const IntVect& v);
987 :
988 : bool defined (int K) const noexcept;
989 : bool defined (const MFIter& mfi) const noexcept;
990 :
991 : /**
992 : * \brief Copy on intersection within a FabArray. Data is copied from
993 : * valid regions to intersecting regions of definition. The
994 : * purpose is to fill in the boundary regions of each FAB in
995 : * the FabArray. If cross=true, corner cells are not filled.
996 : * If the length of periodic is provided, periodic boundaries are
997 : * also filled. Note that FabArray itself does not contains
998 : * any periodicity information.
999 : * FillBoundary expects that its cell-centered version of its BoxArray
1000 : * is non-overlapping.
1001 : */
1002 : template <typename BUF=value_type>
1003 : void FillBoundary (bool cross = false);
1004 :
1005 : template <typename BUF=value_type>
1006 : void FillBoundary (const Periodicity& period, bool cross = false);
1007 :
1008 : template <typename BUF=value_type>
1009 : void FillBoundary (const IntVect& nghost, const Periodicity& period, bool cross = false);
1010 :
1011 : //! Same as FillBoundary(), but only copies ncomp components starting at scomp.
1012 : template <typename BUF=value_type>
1013 : void FillBoundary (int scomp, int ncomp, bool cross = false);
1014 :
1015 : template <typename BUF=value_type>
1016 : void FillBoundary (int scomp, int ncomp, const Periodicity& period, bool cross = false);
1017 :
1018 : template <typename BUF=value_type>
1019 : void FillBoundary (int scomp, int ncomp, const IntVect& nghost, const Periodicity& period, bool cross = false);
1020 :
1021 : template <typename BUF=value_type>
1022 : void FillBoundary_nowait (bool cross = false);
1023 :
1024 : template <typename BUF=value_type>
1025 : void FillBoundary_nowait (const Periodicity& period, bool cross = false);
1026 :
1027 : template <typename BUF=value_type>
1028 : void FillBoundary_nowait (const IntVect& nghost, const Periodicity& period, bool cross = false);
1029 :
1030 : template <typename BUF=value_type>
1031 : void FillBoundary_nowait (int scomp, int ncomp, bool cross = false);
1032 :
1033 : template <typename BUF=value_type>
1034 : void FillBoundary_nowait (int scomp, int ncomp, const Periodicity& period, bool cross = false);
1035 :
1036 : template <typename BUF=value_type>
1037 : void FillBoundary_nowait (int scomp, int ncomp, const IntVect& nghost, const Periodicity& period, bool cross = false);
1038 :
1039 : template <typename BUF=value_type,
1040 : class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
1041 : void FillBoundary_finish ();
1042 :
1043 : void FillBoundary_test ();
1044 :
1045 :
1046 : /**
1047 : * \brief Fill ghost cells and synchronize nodal data. Ghost regions are
1048 : * filled with data from the intersecting valid regions. The
1049 : * synchronization will override valid regions by the intersecting valid
1050 : * regions with a higher precedence. The smaller the global box index
1051 : * is, the higher precedence the box has. With periodic boundaries, for
1052 : * cells in the same box, those near the lower corner have higher
1053 : * precedence than those near the upper corner.
1054 : *
1055 : * \param period periodic length if it's non-zero
1056 : */
1057 : void FillBoundaryAndSync (const Periodicity& period = Periodicity::NonPeriodic());
1058 : /**
1059 : * \brief Fill ghost cells and synchronize nodal data. Ghost regions are
1060 : * filled with data from the intersecting valid regions. The
1061 : * synchronization will override valid regions by the intersecting valid
1062 : * regions with a higher precedence. The smaller the global box index
1063 : * is, the higher precedence the box has. With periodic boundaries, for
1064 : * cells in the same box, those near the lower corner have higher
1065 : * precedence than those near the upper corner.
1066 : *
1067 : * \param scomp starting component
1068 : * \param ncomp number of components
1069 : * \param nghost number of ghost cells to fill
1070 : * \param period periodic length if it's non-zero
1071 : */
1072 : void FillBoundaryAndSync (int scomp, int ncomp, const IntVect& nghost,
1073 : const Periodicity& period);
1074 : void FillBoundaryAndSync_nowait (const Periodicity& period = Periodicity::NonPeriodic());
1075 : void FillBoundaryAndSync_nowait (int scomp, int ncomp, const IntVect& nghost,
1076 : const Periodicity& period);
1077 : void FillBoundaryAndSync_finish ();
1078 :
1079 : /**
1080 : * \brief Synchronize nodal data. The synchronization will override
1081 : * valid regions by the intersecting valid regions with a higher
1082 : * precedence. The smaller the global box index is, the higher
1083 : * precedence the box has. With periodic boundaries, for cells in the
1084 : * same box, those near the lower corner have higher precedence than
1085 : * those near the upper corner.
1086 : *
1087 : * \param period periodic length if it's non-zero
1088 : */
1089 : void OverrideSync (const Periodicity& period = Periodicity::NonPeriodic());
1090 : /**
1091 : * \brief Synchronize nodal data. The synchronization will override
1092 : * valid regions by the intersecting valid regions with a higher
1093 : * precedence. The smaller the global box index is, the higher
1094 : * precedence the box has. With periodic boundaries, for cells in the
1095 : * same box, those near the lower corner have higher precedence than
1096 : * those near the upper corner.
1097 : *
1098 : * \param scomp starting component
1099 : * \param ncomp number of components
1100 : * \param period periodic length if it's non-zero
1101 : */
1102 : void OverrideSync (int scomp, int ncomp, const Periodicity& period);
1103 : void OverrideSync_nowait (const Periodicity& period = Periodicity::NonPeriodic());
1104 : void OverrideSync_nowait (int scomp, int ncomp, const Periodicity& period);
1105 : void OverrideSync_finish ();
1106 :
1107 : /**
1108 : * \brief Sum values in overlapped cells. The destination is limited to valid cells.
1109 : */
1110 : void SumBoundary (const Periodicity& period = Periodicity::NonPeriodic());
1111 : void SumBoundary (int scomp, int ncomp, const Periodicity& period = Periodicity::NonPeriodic());
1112 : void SumBoundary_nowait (const Periodicity& period = Periodicity::NonPeriodic());
1113 : void SumBoundary_nowait (int scomp, int ncomp, const Periodicity& period = Periodicity::NonPeriodic());
1114 :
1115 : /**
1116 : * \brief Sum values in overlapped cells. The destination is limited to valid + ngrow cells.
1117 : */
1118 : void SumBoundary (int scomp, int ncomp, IntVect const& nghost,
1119 : const Periodicity& period = Periodicity::NonPeriodic());
1120 : void SumBoundary_nowait (int scomp, int ncomp, IntVect const& nghost,
1121 : const Periodicity& period = Periodicity::NonPeriodic());
1122 :
1123 : /**
1124 : * \brief Sum values in overlapped cells.
1125 : * For computing the overlap, the dst is grown by dst_ngrow, while the src uses src_ngrow.
1126 : */
1127 : void SumBoundary (int scomp, int ncomp, IntVect const& src_nghost, IntVect const& dst_nghost,
1128 : const Periodicity& period = Periodicity::NonPeriodic());
1129 : void SumBoundary_nowait (int scomp, int ncomp, IntVect const& src_nghost, IntVect const& dst_nghost,
1130 : const Periodicity& period = Periodicity::NonPeriodic());
1131 : void SumBoundary_finish ();
1132 :
1133 : /** \brief Fill ghost cells with values from their corresponding cells across periodic
1134 : * boundaries, regardless of whether the corresponding cells are valid. This differs from
1135 : * FillBoundary, which only fills from valid cells, and does not fill from ghost cells.
1136 : * The BoxArray is allowed to be overlapping.
1137 : */
1138 : void EnforcePeriodicity (const Periodicity& period);
1139 : void EnforcePeriodicity (int scomp, int ncomp, const Periodicity& period);
1140 : void EnforcePeriodicity (int scomp, int ncomp, const IntVect& nghost,
1141 : const Periodicity& period);
1142 :
1143 : // covered : ghost cells covered by valid cells of this FabArray
1144 : // (including periodically shifted valid cells)
1145 : // notcovered: ghost cells not covered by valid cells
1146 : // (including ghost cells outside periodic boundaries)
1147 : // physbnd : boundary cells outside the domain (excluding periodic boundaries)
1148 : // interior : interior cells (i.e., valid cells)
1149 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
1150 : void BuildMask (const Box& phys_domain, const Periodicity& period,
1151 : value_type covered, value_type notcovered,
1152 : value_type physbnd, value_type interior);
1153 :
1154 : // The following are private functions. But we have to make them public for cuda.
1155 :
1156 : template <typename BUF=value_type,
1157 : class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
1158 : void FBEP_nowait (int scomp, int ncomp, const IntVect& nghost,
1159 : const Periodicity& period, bool cross,
1160 : bool enforce_periodicity_only = false,
1161 : bool override_sync = false);
1162 :
1163 : void FB_local_copy_cpu (const FB& TheFB, int scomp, int ncomp);
1164 : void PC_local_cpu (const CPC& thecpc, FabArray<FAB> const& src,
1165 : int scomp, int dcomp, int ncomp, CpOp op);
1166 :
1167 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
1168 : void setVal (value_type val, const CommMetaData& thecmd, int scomp, int ncomp);
1169 :
1170 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
1171 : LayoutData<int> RecvLayoutMask (const CommMetaData& thecmd);
1172 :
1173 : #ifdef AMREX_USE_GPU
1174 :
1175 : void FB_local_copy_gpu (const FB& TheFB, int scomp, int ncomp);
1176 : void PC_local_gpu (const CPC& thecpc, FabArray<FAB> const& src,
1177 : int scomp, int dcomp, int ncomp, CpOp op);
1178 :
1179 : void CMD_local_setVal_gpu (value_type x, const CommMetaData& thecmd, int scomp, int ncomp);
1180 : void CMD_remote_setVal_gpu (value_type x, const CommMetaData& thecmd, int scomp, int ncomp);
1181 :
1182 : #if defined(__CUDACC__)
1183 :
1184 : void FB_local_copy_cuda_graph_1 (const FB& TheFB, int scomp, int ncomp);
1185 : void FB_local_copy_cuda_graph_n (const FB& TheFB, int scomp, int ncomp);
1186 :
1187 : #endif
1188 : #endif
1189 :
1190 : #ifdef AMREX_USE_MPI
1191 :
1192 : #ifdef AMREX_USE_GPU
1193 : #if defined(__CUDACC__)
1194 :
1195 : void FB_pack_send_buffer_cuda_graph (const FB& TheFB, int scomp, int ncomp,
1196 : Vector<char*>& send_data,
1197 : Vector<std::size_t> const& send_size,
1198 : Vector<const CopyComTagsContainer*> const& send_cctc);
1199 :
1200 : void FB_unpack_recv_buffer_cuda_graph (const FB& TheFB, int dcomp, int ncomp,
1201 : Vector<char*> const& recv_data,
1202 : Vector<std::size_t> const& recv_size,
1203 : Vector<const CopyComTagsContainer*> const& recv_cctc,
1204 : bool is_thread_safe);
1205 :
1206 : #endif
1207 :
1208 : template <typename BUF = value_type>
1209 : static void pack_send_buffer_gpu (FabArray<FAB> const& src, int scomp, int ncomp,
1210 : Vector<char*> const& send_data,
1211 : Vector<std::size_t> const& send_size,
1212 : Vector<const CopyComTagsContainer*> const& send_cctc);
1213 :
1214 : template <typename BUF = value_type>
1215 : static void unpack_recv_buffer_gpu (FabArray<FAB>& dst, int dcomp, int ncomp,
1216 : Vector<char*> const& recv_data,
1217 : Vector<std::size_t> const& recv_size,
1218 : Vector<const CopyComTagsContainer*> const& recv_cctc,
1219 : CpOp op, bool is_thread_safe);
1220 :
1221 : #endif
1222 :
1223 : template <typename BUF = value_type>
1224 : static void pack_send_buffer_cpu (FabArray<FAB> const& src, int scomp, int ncomp,
1225 : Vector<char*> const& send_data,
1226 : Vector<std::size_t> const& send_size,
1227 : Vector<const CopyComTagsContainer*> const& send_cctc);
1228 :
1229 : template <typename BUF = value_type>
1230 : static void unpack_recv_buffer_cpu (FabArray<FAB>& dst, int dcomp, int ncomp,
1231 : Vector<char*> const& recv_data,
1232 : Vector<std::size_t> const& recv_size,
1233 : Vector<const CopyComTagsContainer*> const& recv_cctc,
1234 : CpOp op, bool is_thread_safe);
1235 :
1236 : #endif
1237 :
1238 : /**
1239 : * \brief Return infinity norm
1240 : *
1241 : * \param comp starting component
1242 : * \param ncomp number of components
1243 : * \param nghost number of ghost cells
1244 : * \param local If true, MPI communication is skipped.
1245 : * \param ignore_covered ignore covered cells. Only relevant for cell-centered EB data.
1246 : */
1247 : template <typename F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
1248 : typename F::value_type
1249 : norminf (int comp, int ncomp, IntVect const& nghost, bool local = false,
1250 : [[maybe_unused]] bool ignore_covered = false) const;
1251 :
1252 : /**
1253 : * \brief Return infinity norm in masked region
1254 : *
1255 : * \param mask only mask=true region is included
1256 : * \param comp starting component
1257 : * \param ncomp number of components
1258 : * \param nghost number of ghost cells
1259 : * \param local If true, MPI communication is skipped.
1260 : */
1261 : template <typename IFAB, typename F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
1262 : typename F::value_type
1263 : norminf (FabArray<IFAB> const& mask, int comp, int ncomp, IntVect const& nghost,
1264 : bool local = false) const;
1265 :
1266 : protected:
1267 :
1268 : std::unique_ptr<FabFactory<FAB> > m_factory;
1269 : DataAllocator m_dallocator;
1270 : std::unique_ptr<detail::SingleChunkArena> m_single_chunk_arena;
1271 : Long m_single_chunk_size = 0;
1272 :
1273 : //! has define() been called?
1274 : bool define_function_called = false;
1275 :
1276 : //
1277 : //! The data.
1278 : std::vector<FAB*> m_fabs_v;
1279 :
1280 : #ifdef AMREX_USE_GPU
1281 : mutable void* m_dp_arrays = nullptr;
1282 : #endif
1283 : mutable void* m_hp_arrays = nullptr;
1284 : mutable MultiArray4<value_type> m_arrays;
1285 : mutable MultiArray4<value_type const> m_const_arrays;
1286 :
1287 : Vector<std::string> m_tags;
1288 :
1289 : //! for shared memory
1290 : struct ShMem {
1291 :
1292 886192 : ShMem () noexcept = default;
1293 :
1294 1025381 : ~ShMem () { // NOLINT
1295 : #if defined(BL_USE_MPI3)
1296 : if (win != MPI_WIN_NULL) { MPI_Win_free(&win); }
1297 : #endif
1298 : #ifdef BL_USE_TEAM
1299 : if (alloc) {
1300 : amrex::update_fab_stats(-n_points, -n_values, sizeof(value_type));
1301 : }
1302 : #endif
1303 1025381 : }
1304 5 : ShMem (ShMem&& rhs) noexcept
1305 5 : : alloc(rhs.alloc), n_values(rhs.n_values), n_points(rhs.n_points)
1306 : #if defined(BL_USE_MPI3)
1307 : , win(rhs.win)
1308 : #endif
1309 : {
1310 5 : rhs.alloc = false;
1311 : #if defined(BL_USE_MPI3)
1312 : rhs.win = MPI_WIN_NULL;
1313 : #endif
1314 5 : }
1315 41641 : ShMem& operator= (ShMem&& rhs) noexcept {
1316 41641 : if (&rhs != this) {
1317 41641 : alloc = rhs.alloc;
1318 41641 : n_values = rhs.n_values;
1319 41641 : n_points = rhs.n_points;
1320 41641 : rhs.alloc = false;
1321 : #if defined(BL_USE_MPI3)
1322 : win = rhs.win;
1323 : rhs.win = MPI_WIN_NULL;
1324 : #endif
1325 : }
1326 41641 : return *this;
1327 : }
1328 : ShMem (const ShMem&) = delete;
1329 : ShMem& operator= (const ShMem&) = delete;
1330 : bool alloc{false};
1331 : Long n_values{0};
1332 : Long n_points{0};
1333 : #if defined(BL_USE_MPI3)
1334 : MPI_Win win = MPI_WIN_NULL;
1335 : #endif
1336 : };
1337 : ShMem shmem;
1338 :
1339 : bool SharedMemory () const noexcept { return shmem.alloc; }
1340 :
1341 : private:
1342 : using Iterator = typename std::vector<FAB*>::iterator;
1343 :
1344 : void AllocFabs (const FabFactory<FAB>& factory, Arena* ar,
1345 : const Vector<std::string>& tags,
1346 : bool alloc_single_chunk);
1347 :
1348 : void setFab_assert (int K, FAB const& fab) const;
1349 :
1350 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
1351 : void build_arrays () const;
1352 :
1353 : void clear_arrays ();
1354 :
1355 : public:
1356 :
1357 : #ifdef BL_USE_MPI
1358 :
1359 : //! Prepost nonblocking receives
1360 : template <typename BUF=value_type>
1361 : static void PostRcvs (const MapOfCopyComTagContainers& RcvTags,
1362 : char*& the_recv_data,
1363 : Vector<char*>& recv_data,
1364 : Vector<std::size_t>& recv_size,
1365 : Vector<int>& recv_from,
1366 : Vector<MPI_Request>& recv_reqs,
1367 : int ncomp,
1368 : int SeqNum);
1369 :
1370 : template <typename BUF=value_type>
1371 : AMREX_NODISCARD
1372 : static TheFaArenaPointer PostRcvs (const MapOfCopyComTagContainers& RcvTags,
1373 : Vector<char*>& recv_data,
1374 : Vector<std::size_t>& recv_size,
1375 : Vector<int>& recv_from,
1376 : Vector<MPI_Request>& recv_reqs,
1377 : int ncomp,
1378 : int SeqNum);
1379 :
1380 : template <typename BUF=value_type>
1381 : static void PrepareSendBuffers (const MapOfCopyComTagContainers& SndTags,
1382 : char*& the_send_data,
1383 : Vector<char*>& send_data,
1384 : Vector<std::size_t>& send_size,
1385 : Vector<int>& send_rank,
1386 : Vector<MPI_Request>& send_reqs,
1387 : Vector<const CopyComTagsContainer*>& send_cctc,
1388 : int ncomp);
1389 :
1390 : template <typename BUF=value_type>
1391 : AMREX_NODISCARD
1392 : static TheFaArenaPointer PrepareSendBuffers (const MapOfCopyComTagContainers& SndTags,
1393 : Vector<char*>& send_data,
1394 : Vector<std::size_t>& send_size,
1395 : Vector<int>& send_rank,
1396 : Vector<MPI_Request>& send_reqs,
1397 : Vector<const CopyComTagsContainer*>& send_cctc,
1398 : int ncomp);
1399 :
1400 : static void PostSnds (Vector<char*> const& send_data,
1401 : Vector<std::size_t> const& send_size,
1402 : Vector<int> const& send_rank,
1403 : Vector<MPI_Request>& send_reqs,
1404 : int SeqNum);
1405 : #endif
1406 :
1407 : std::unique_ptr<FBData<FAB>> fbd;
1408 : std::unique_ptr<PCData<FAB>> pcd;
1409 :
1410 : // Pointer to temporary fab used in non-blocking amrex::OverrideSync
1411 : std::unique_ptr< FabArray<FAB> > os_temp;
1412 :
1413 :
1414 :
1415 : /**
1416 : * \brief y += a*x
1417 : *
1418 : * \param y FabArray y
1419 : * \param a scalar a
1420 : * \param x FabArray x
1421 : * \param xcomp starting component of x
1422 : * \param ycomp starting component of y
1423 : * \param ncomp number of components
1424 : * \param nghost number of ghost cells
1425 : */
1426 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
1427 : static void Saxpy (FabArray<FAB>& y, value_type a, FabArray<FAB> const& x,
1428 : int xcomp, int ycomp, int ncomp, IntVect const& nghost);
1429 :
1430 : /**
1431 : * \brief y = x + a*y
1432 : *
1433 : * \param y FabArray y
1434 : * \param a scalar a
1435 : * \param x FabArray x
1436 : * \param xcomp starting component of x
1437 : * \param ycomp starting component of y
1438 : * \param ncomp number of components
1439 : * \param nghost number of ghost cells
1440 : */
1441 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
1442 : static void Xpay (FabArray<FAB>& y, value_type a, FabArray<FAB> const& x,
1443 : int xcomp, int ycomp, int ncomp, IntVect const& nghost);
1444 :
1445 : /**
1446 : * \brief dst = a*x + b*y
1447 : *
1448 : * \param dst destination FabArray
1449 : * \param a scalar a
1450 : * \param x FabArray x
1451 : * \param xcomp starting component of x
1452 : * \param b scalar b
1453 : * \param y FabArray y
1454 : * \param ycomp starting component of y
1455 : * \param dstcomp starting component of destination
1456 : * \param numcomp number of components
1457 : * \param nghost number of ghost cells
1458 : */
1459 : template <class F=FAB, std::enable_if_t<IsBaseFab<F>::value,int> = 0>
1460 : static void LinComb (FabArray<FAB>& dst,
1461 : value_type a, const FabArray<FAB>& x, int xcomp,
1462 : value_type b, const FabArray<FAB>& y, int ycomp,
1463 : int dstcomp, int numcomp, const IntVect& nghost);
1464 : };
1465 :
1466 :
1467 : #include <AMReX_FabArrayCommI.H>
1468 :
1469 : template <class FAB>
1470 : bool
1471 : FabArray<FAB>::defined (int K) const noexcept
1472 : {
1473 : int li = localindex(K);
1474 : if (li >= 0 && li < static_cast<int>(m_fabs_v.size()) && m_fabs_v[li] != 0) {
1475 : return true;
1476 : }
1477 : else {
1478 : return false;
1479 : }
1480 : }
1481 :
1482 : template <class FAB>
1483 : bool
1484 : FabArray<FAB>::defined (const MFIter& mfi) const noexcept
1485 : {
1486 : int li = mfi.LocalIndex();
1487 : if (li < static_cast<int>(m_fabs_v.size()) && m_fabs_v[li] != nullptr) {
1488 : return true;
1489 : }
1490 : else {
1491 : return false;
1492 : }
1493 : }
1494 :
1495 : template <class FAB>
1496 : FAB*
1497 9060639 : FabArray<FAB>::fabPtr (const MFIter& mfi) noexcept
1498 : {
1499 : AMREX_ASSERT(mfi.LocalIndex() < indexArray.size());
1500 : AMREX_ASSERT(DistributionMap() == mfi.DistributionMap());
1501 9060639 : int li = mfi.LocalIndex();
1502 9060639 : return m_fabs_v[li];
1503 : }
1504 :
1505 : template <class FAB>
1506 : FAB const*
1507 2795576 : FabArray<FAB>::fabPtr (const MFIter& mfi) const noexcept
1508 : {
1509 : AMREX_ASSERT(mfi.LocalIndex() < indexArray.size());
1510 : AMREX_ASSERT(DistributionMap() == mfi.DistributionMap());
1511 2795576 : int li = mfi.LocalIndex();
1512 2795576 : return m_fabs_v[li];
1513 : }
1514 :
1515 : template <class FAB>
1516 : FAB*
1517 14748970 : FabArray<FAB>::fabPtr (int K) noexcept
1518 : {
1519 14748970 : int li = localindex(K);
1520 : AMREX_ASSERT(li >=0 && li < indexArray.size());
1521 14748970 : return m_fabs_v[li];
1522 : }
1523 :
1524 : template <class FAB>
1525 : FAB const*
1526 4919925 : FabArray<FAB>::fabPtr (int K) const noexcept
1527 : {
1528 4919925 : int li = localindex(K);
1529 : AMREX_ASSERT(li >=0 && li < indexArray.size());
1530 4919925 : return m_fabs_v[li];
1531 : }
1532 :
1533 : template <class FAB>
1534 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1535 : void
1536 : FabArray<FAB>::prefetchToHost (const MFIter& mfi) const noexcept
1537 : {
1538 : #ifdef AMREX_USE_CUDA
1539 : this->fabPtr(mfi)->prefetchToHost();
1540 : #else
1541 : amrex::ignore_unused(mfi);
1542 : #endif
1543 : }
1544 :
1545 : template <class FAB>
1546 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1547 : void
1548 : FabArray<FAB>::prefetchToDevice (const MFIter& mfi) const noexcept
1549 : {
1550 : #ifdef AMREX_USE_CUDA
1551 : this->fabPtr(mfi)->prefetchToDevice();
1552 : #else
1553 : amrex::ignore_unused(mfi);
1554 : #endif
1555 : }
1556 :
1557 : template <class FAB>
1558 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1559 : Array4<typename FabArray<FAB>::value_type const>
1560 815604 : FabArray<FAB>::array (const MFIter& mfi) const noexcept
1561 : {
1562 815604 : return fabPtr(mfi)->const_array();
1563 : }
1564 :
1565 : template <class FAB>
1566 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1567 : Array4<typename FabArray<FAB>::value_type>
1568 3133248 : FabArray<FAB>::array (const MFIter& mfi) noexcept
1569 : {
1570 3133248 : return fabPtr(mfi)->array();
1571 : }
1572 :
1573 : template <class FAB>
1574 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1575 : Array4<typename FabArray<FAB>::value_type const>
1576 0 : FabArray<FAB>::array (int K) const noexcept
1577 : {
1578 0 : return fabPtr(K)->const_array();
1579 : }
1580 :
1581 : template <class FAB>
1582 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1583 : Array4<typename FabArray<FAB>::value_type>
1584 : FabArray<FAB>::array (int K) noexcept
1585 : {
1586 : return fabPtr(K)->array();
1587 : }
1588 :
1589 : template <class FAB>
1590 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1591 : Array4<typename FabArray<FAB>::value_type const>
1592 761416 : FabArray<FAB>::const_array (const MFIter& mfi) const noexcept
1593 : {
1594 761416 : return fabPtr(mfi)->const_array();
1595 : }
1596 :
1597 : template <class FAB>
1598 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1599 : Array4<typename FabArray<FAB>::value_type const>
1600 : FabArray<FAB>::const_array (int K) const noexcept
1601 : {
1602 : return fabPtr(K)->const_array();
1603 : }
1604 :
1605 : template <class FAB>
1606 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1607 : Array4<typename FabArray<FAB>::value_type const>
1608 : FabArray<FAB>::array (const MFIter& mfi, int start_comp) const noexcept
1609 : {
1610 : return fabPtr(mfi)->const_array(start_comp);
1611 : }
1612 :
1613 : template <class FAB>
1614 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1615 : Array4<typename FabArray<FAB>::value_type>
1616 : FabArray<FAB>::array (const MFIter& mfi, int start_comp) noexcept
1617 : {
1618 : return fabPtr(mfi)->array(start_comp);
1619 : }
1620 :
1621 : template <class FAB>
1622 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1623 : Array4<typename FabArray<FAB>::value_type const>
1624 : FabArray<FAB>::array (int K, int start_comp) const noexcept
1625 : {
1626 : return fabPtr(K)->const_array(start_comp);
1627 : }
1628 :
1629 : template <class FAB>
1630 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1631 : Array4<typename FabArray<FAB>::value_type>
1632 : FabArray<FAB>::array (int K, int start_comp) noexcept
1633 : {
1634 : return fabPtr(K)->array(start_comp);
1635 : }
1636 :
1637 : template <class FAB>
1638 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1639 : Array4<typename FabArray<FAB>::value_type const>
1640 : FabArray<FAB>::const_array (const MFIter& mfi, int start_comp) const noexcept
1641 : {
1642 : return fabPtr(mfi)->const_array(start_comp);
1643 : }
1644 :
1645 : template <class FAB>
1646 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1647 : Array4<typename FabArray<FAB>::value_type const>
1648 : FabArray<FAB>::const_array (int K, int start_comp) const noexcept
1649 : {
1650 : return fabPtr(K)->const_array(start_comp);
1651 : }
1652 :
1653 : template <class FAB>
1654 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1655 : MultiArray4<typename FabArray<FAB>::value_type>
1656 : FabArray<FAB>::arrays () noexcept
1657 : {
1658 : build_arrays();
1659 : return m_arrays;
1660 : }
1661 :
1662 : template <class FAB>
1663 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1664 : MultiArray4<typename FabArray<FAB>::value_type const>
1665 : FabArray<FAB>::arrays () const noexcept
1666 : {
1667 : build_arrays();
1668 : return m_const_arrays;
1669 : }
1670 :
1671 : template <class FAB>
1672 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1673 : MultiArray4<typename FabArray<FAB>::value_type const>
1674 : FabArray<FAB>::const_arrays () const noexcept
1675 : {
1676 : build_arrays();
1677 : return m_const_arrays;
1678 : }
1679 :
1680 : template <class FAB>
1681 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1682 : void
1683 : FabArray<FAB>::build_arrays () const
1684 : {
1685 : using A = Array4<value_type>;
1686 : using AC = Array4<value_type const>;
1687 : static_assert(sizeof(A) == sizeof(AC), "sizeof(Array4<T>) != sizeof(Array4<T const>)");
1688 : if (!m_hp_arrays && local_size() > 0) {
1689 : const int n = local_size();
1690 : #ifdef AMREX_USE_GPU
1691 : m_hp_arrays = (void*)The_Pinned_Arena()->alloc(n*2*sizeof(A));
1692 : m_dp_arrays = (void*)The_Arena()->alloc(n*2*sizeof(A));
1693 : #else
1694 : m_hp_arrays = (void*)std::malloc(n*2*sizeof(A));
1695 : #endif
1696 : for (int li = 0; li < n; ++li) {
1697 : if (m_fabs_v[li]) {
1698 : new ((A*)m_hp_arrays+li) A(m_fabs_v[li]->array());
1699 : new ((AC*)m_hp_arrays+li+n) AC(m_fabs_v[li]->const_array());
1700 : } else {
1701 : new ((A*)m_hp_arrays+li) A{};
1702 : new ((AC*)m_hp_arrays+li+n) AC{};
1703 : }
1704 : }
1705 : m_arrays.hp = (A*)m_hp_arrays;
1706 : m_const_arrays.hp = (AC*)m_hp_arrays + n;
1707 : #ifdef AMREX_USE_GPU
1708 : m_arrays.dp = (A*)m_dp_arrays;
1709 : m_const_arrays.dp = (AC*)m_dp_arrays + n;
1710 : Gpu::htod_memcpy(m_dp_arrays, m_hp_arrays, n*2*sizeof(A));
1711 : #endif
1712 : }
1713 : }
1714 :
1715 : template <class FAB>
1716 : void
1717 2096199 : FabArray<FAB>::clear_arrays ()
1718 : {
1719 : #ifdef AMREX_USE_GPU
1720 : The_Pinned_Arena()->free(m_hp_arrays);
1721 : The_Arena()->free(m_dp_arrays);
1722 : m_dp_arrays = nullptr;
1723 : #else
1724 2096199 : std::free(m_hp_arrays);
1725 : #endif
1726 2096199 : m_hp_arrays = nullptr;
1727 2096199 : m_arrays.hp = nullptr;
1728 2096199 : m_const_arrays.hp = nullptr;
1729 2096199 : }
1730 :
1731 : template <class FAB>
1732 : AMREX_NODISCARD
1733 : FAB*
1734 : FabArray<FAB>::release (int K)
1735 : {
1736 : const int li = localindex(K);
1737 : if (li >= 0 && li < static_cast<int>(m_fabs_v.size()) && m_fabs_v[li] != nullptr) {
1738 : AMREX_ASSERT(m_single_chunk_arena == nullptr);
1739 : Long nbytes = amrex::nBytesOwned(*m_fabs_v[li]);
1740 : if (nbytes > 0) {
1741 : for (auto const& t : m_tags) {
1742 : updateMemUsage(t, -nbytes, nullptr);
1743 : }
1744 : }
1745 : return std::exchange(m_fabs_v[li], nullptr);
1746 : } else {
1747 : return nullptr;
1748 : }
1749 : }
1750 :
1751 : template <class FAB>
1752 : AMREX_NODISCARD
1753 : FAB*
1754 : FabArray<FAB>::release (const MFIter& mfi)
1755 : {
1756 : const int li = mfi.LocalIndex();
1757 : if (li >= 0 && li < static_cast<int>(m_fabs_v.size()) && m_fabs_v[li] != nullptr) {
1758 : AMREX_ASSERT(m_single_chunk_arena == nullptr);
1759 : Long nbytes = amrex::nBytesOwned(*m_fabs_v[li]);
1760 : if (nbytes > 0) {
1761 : for (auto const& t : m_tags) {
1762 : updateMemUsage(t, -nbytes, nullptr);
1763 : }
1764 : }
1765 : return std::exchange(m_fabs_v[li], nullptr);
1766 : } else {
1767 : return nullptr;
1768 : }
1769 : }
1770 :
1771 : template <class FAB>
1772 : void
1773 2096199 : FabArray<FAB>::clear ()
1774 : {
1775 2096199 : if (define_function_called)
1776 : {
1777 972846 : define_function_called = false;
1778 972846 : clearThisBD(); //!< addThisBD is called in define
1779 : }
1780 :
1781 2096199 : Long nbytes = 0L;
1782 5528875 : for (auto *x : m_fabs_v) {
1783 3432686 : if (x) {
1784 3432686 : nbytes += amrex::nBytesOwned(*x);
1785 3432686 : m_factory->destroy(x);
1786 : }
1787 : }
1788 2096199 : m_fabs_v.clear();
1789 2096199 : clear_arrays();
1790 2096199 : m_factory.reset();
1791 2096199 : m_dallocator.m_arena = nullptr;
1792 : // no need to clear the non-blocking fillboundary stuff
1793 :
1794 2096199 : if (nbytes > 0) {
1795 1942676 : for (auto const& t : m_tags) {
1796 971336 : updateMemUsage(t, -nbytes, nullptr);
1797 : }
1798 : }
1799 :
1800 2096199 : if (m_single_chunk_arena) {
1801 0 : m_single_chunk_arena.reset();
1802 : }
1803 2096199 : m_single_chunk_size = 0;
1804 :
1805 2096199 : m_tags.clear();
1806 :
1807 2096199 : FabArrayBase::clear();
1808 2096199 : }
1809 :
1810 : template <class FAB>
1811 : template <typename SFAB, typename DFAB,
1812 : std::enable_if_t<std::conjunction_v<
1813 : IsBaseFab<DFAB>, IsBaseFab<SFAB>,
1814 : std::is_convertible<typename SFAB::value_type,
1815 : typename DFAB::value_type>>, int>>
1816 : void
1817 : FabArray<FAB>::LocalCopy (FabArray<SFAB> const& src, int scomp, int dcomp, int ncomp,
1818 : IntVect const& nghost)
1819 : {
1820 : amrex::Copy(*this, src, scomp, dcomp, ncomp, nghost);
1821 : }
1822 :
1823 : template <class FAB>
1824 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1825 : void
1826 : FabArray<FAB>::LocalAdd (FabArray<FAB> const& src, int scomp, int dcomp, int ncomp,
1827 : IntVect const& nghost)
1828 : {
1829 : amrex::Add(*this, src, scomp, dcomp, ncomp, nghost);
1830 : }
1831 :
1832 : template <class FAB>
1833 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1834 : void
1835 : FabArray<FAB>::setVal (value_type val, int nghost)
1836 : {
1837 : setVal(val,0,n_comp,IntVect(nghost));
1838 : }
1839 :
1840 : template <class FAB>
1841 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1842 : void
1843 : FabArray<FAB>::setVal (value_type val, const IntVect& nghost)
1844 : {
1845 : setVal(val,0,n_comp,nghost);
1846 : }
1847 :
1848 : template <class FAB>
1849 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1850 : void
1851 : FabArray<FAB>::setVal (value_type val, const Box& region, int nghost)
1852 : {
1853 : setVal(val,region,0,n_comp,IntVect(nghost));
1854 : }
1855 :
1856 : template <class FAB>
1857 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
1858 : void
1859 : FabArray<FAB>::setVal (value_type val, const Box& region, const IntVect& nghost)
1860 : {
1861 : setVal(val,region,0,n_comp,nghost);
1862 : }
1863 :
1864 : template <class FAB>
1865 218 : FabArray<FAB>::FabArray () noexcept
1866 218 : : shmem()
1867 : {
1868 218 : m_FA_stats.recordBuild();
1869 218 : }
1870 :
1871 : template <class FAB>
1872 : FabArray<FAB>::FabArray (Arena* a) noexcept
1873 : : m_dallocator(a),
1874 : shmem()
1875 : {
1876 : m_FA_stats.recordBuild();
1877 : }
1878 :
1879 : template <class FAB>
1880 29351 : FabArray<FAB>::FabArray (const BoxArray& bxs,
1881 : const DistributionMapping& dm,
1882 : int nvar,
1883 : int ngrow,
1884 : const MFInfo& info,
1885 : const FabFactory<FAB>& factory)
1886 29351 : : FabArray<FAB>(bxs,dm,nvar,IntVect(ngrow),info,factory)
1887 29351 : {}
1888 :
1889 : template <class FAB>
1890 885974 : FabArray<FAB>::FabArray (const BoxArray& bxs,
1891 : const DistributionMapping& dm,
1892 : int nvar,
1893 : const IntVect& ngrow,
1894 : const MFInfo& info,
1895 : const FabFactory<FAB>& factory)
1896 : : m_factory(factory.clone()),
1897 885974 : shmem()
1898 : {
1899 885974 : m_FA_stats.recordBuild();
1900 885974 : define(bxs,dm,nvar,ngrow,info,*m_factory);
1901 885974 : }
1902 :
1903 : template <class FAB>
1904 : FabArray<FAB>::FabArray (const FabArray<FAB>& rhs, MakeType maketype, int scomp, int ncomp)
1905 : : m_factory(rhs.Factory().clone()),
1906 : shmem()
1907 : {
1908 : m_FA_stats.recordBuild();
1909 : define(rhs.boxArray(), rhs.DistributionMap(), ncomp, rhs.nGrowVect(),
1910 : MFInfo().SetAlloc(false), *m_factory);
1911 :
1912 : if (maketype == amrex::make_alias)
1913 : {
1914 : for (int i = 0, n = indexArray.size(); i < n; ++i) {
1915 : auto const& rhsfab = *(rhs.m_fabs_v[i]);
1916 : m_fabs_v.push_back(m_factory->create_alias(rhsfab, scomp, ncomp));
1917 : }
1918 : }
1919 : else
1920 : {
1921 : amrex::Abort("FabArray: unknown MakeType");
1922 : }
1923 : }
1924 :
1925 : template <class FAB>
1926 5 : FabArray<FAB>::FabArray (FabArray<FAB>&& rhs) noexcept
1927 : : FabArrayBase (static_cast<FabArrayBase&&>(rhs))
1928 5 : , m_factory (std::move(rhs.m_factory))
1929 5 : , m_dallocator (std::move(rhs.m_dallocator))
1930 5 : , m_single_chunk_arena(std::move(rhs.m_single_chunk_arena))
1931 5 : , m_single_chunk_size(std::exchange(rhs.m_single_chunk_size,0))
1932 5 : , define_function_called(rhs.define_function_called)
1933 5 : , m_fabs_v (std::move(rhs.m_fabs_v))
1934 : #ifdef AMREX_USE_GPU
1935 : , m_dp_arrays (std::exchange(rhs.m_dp_arrays, nullptr))
1936 : #endif
1937 5 : , m_hp_arrays (std::exchange(rhs.m_hp_arrays, nullptr))
1938 : , m_arrays (rhs.m_arrays)
1939 : , m_const_arrays(rhs.m_const_arrays)
1940 5 : , m_tags (std::move(rhs.m_tags))
1941 20 : , shmem (std::move(rhs.shmem))
1942 : // no need to worry about the data used in non-blocking FillBoundary.
1943 : {
1944 5 : m_FA_stats.recordBuild();
1945 5 : rhs.define_function_called = false; // the responsibility of clear BD has been transferred.
1946 5 : rhs.m_fabs_v.clear(); // clear the data pointers so that rhs.clear does delete them.
1947 5 : rhs.clear();
1948 5 : }
1949 :
1950 : template <class FAB>
1951 : FabArray<FAB>&
1952 41641 : FabArray<FAB>::operator= (FabArray<FAB>&& rhs) noexcept
1953 : {
1954 41641 : if (&rhs != this)
1955 : {
1956 41641 : clear();
1957 :
1958 41641 : FabArrayBase::operator=(static_cast<FabArrayBase&&>(rhs));
1959 41641 : m_factory = std::move(rhs.m_factory);
1960 41641 : m_dallocator = std::move(rhs.m_dallocator);
1961 41641 : m_single_chunk_arena = std::move(rhs.m_single_chunk_arena);
1962 41641 : std::swap(m_single_chunk_size, rhs.m_single_chunk_size);
1963 41641 : define_function_called = rhs.define_function_called;
1964 41641 : std::swap(m_fabs_v, rhs.m_fabs_v);
1965 : #ifdef AMREX_USE_GPU
1966 : std::swap(m_dp_arrays, rhs.m_dp_arrays);
1967 : #endif
1968 41641 : std::swap(m_hp_arrays, rhs.m_hp_arrays);
1969 41641 : m_arrays = rhs.m_arrays;
1970 41641 : m_const_arrays = rhs.m_const_arrays;
1971 41641 : std::swap(m_tags, rhs.m_tags);
1972 41641 : shmem = std::move(rhs.shmem);
1973 :
1974 41641 : rhs.define_function_called = false;
1975 41641 : rhs.m_fabs_v.clear();
1976 41641 : rhs.m_tags.clear();
1977 41641 : rhs.clear();
1978 : }
1979 41641 : return *this;
1980 : }
1981 :
1982 : template <class FAB>
1983 1025381 : FabArray<FAB>::~FabArray ()
1984 : {
1985 1025381 : m_FA_stats.recordDelete();
1986 1025381 : clear();
1987 1025381 : }
1988 :
1989 : template <class FAB>
1990 : bool
1991 : FabArray<FAB>::ok () const
1992 : {
1993 : if (!define_function_called) { return false; }
1994 :
1995 : int isok = 1;
1996 :
1997 : for (MFIter fai(*this); fai.isValid() && isok; ++fai)
1998 : {
1999 : if (defined(fai))
2000 : {
2001 : if (get(fai).box() != fabbox(fai.index()))
2002 : {
2003 : isok = 0;
2004 : }
2005 : }
2006 : else
2007 : {
2008 : isok = 0;
2009 : }
2010 : }
2011 :
2012 : ParallelAllReduce::Min(isok, ParallelContext::CommunicatorSub());
2013 :
2014 : return isok == 1;
2015 : }
2016 :
2017 : template <class FAB>
2018 : bool
2019 : FabArray<FAB>::isDefined () const
2020 : {
2021 : return define_function_called;
2022 : }
2023 :
2024 : template <class FAB>
2025 : void
2026 218 : FabArray<FAB>::define (const BoxArray& bxs,
2027 : const DistributionMapping& dm,
2028 : int nvar,
2029 : int ngrow,
2030 : const MFInfo& info,
2031 : const FabFactory<FAB>& a_factory)
2032 : {
2033 218 : define(bxs,dm,nvar,IntVect(ngrow),info,a_factory);
2034 218 : }
2035 :
2036 : template <class FAB>
2037 : void
2038 886192 : FabArray<FAB>::define (const BoxArray& bxs,
2039 : const DistributionMapping& dm,
2040 : int nvar,
2041 : const IntVect& ngrow,
2042 : const MFInfo& info,
2043 : const FabFactory<FAB>& a_factory)
2044 : {
2045 1772380 : std::unique_ptr<FabFactory<FAB> > factory(a_factory.clone());
2046 :
2047 886192 : auto *default_arena = m_dallocator.m_arena;
2048 886192 : clear();
2049 :
2050 886192 : m_factory = std::move(factory);
2051 886192 : m_dallocator.m_arena = info.arena ? info.arena : default_arena;
2052 :
2053 886192 : define_function_called = true;
2054 :
2055 : AMREX_ASSERT(ngrow.allGE(0));
2056 : AMREX_ASSERT(boxarray.empty());
2057 886192 : FabArrayBase::define(bxs, dm, nvar, ngrow);
2058 :
2059 886192 : addThisBD();
2060 :
2061 886192 : if(info.alloc) {
2062 885940 : AllocFabs(*m_factory, m_dallocator.m_arena, info.tags, info.alloc_single_chunk);
2063 : #ifdef BL_USE_TEAM
2064 : ParallelDescriptor::MyTeam().MemoryBarrier();
2065 : #endif
2066 : }
2067 886192 : }
2068 :
2069 : template <class FAB>
2070 : void
2071 966153 : FabArray<FAB>::AllocFabs (const FabFactory<FAB>& factory, Arena* ar,
2072 : const Vector<std::string>& tags, bool alloc_single_chunk)
2073 : {
2074 966153 : if (shmem.alloc) { alloc_single_chunk = false; }
2075 : if constexpr (!IsBaseFab_v<FAB>) { alloc_single_chunk = false; }
2076 :
2077 966153 : const int n = indexArray.size();
2078 966153 : const int nworkers = ParallelDescriptor::TeamSize();
2079 966153 : shmem.alloc = (nworkers > 1);
2080 :
2081 966153 : bool alloc = !shmem.alloc;
2082 :
2083 966153 : FabInfo fab_info;
2084 966153 : fab_info.SetAlloc(alloc).SetShared(shmem.alloc).SetArena(ar);
2085 :
2086 966153 : if (alloc_single_chunk) {
2087 0 : m_single_chunk_size = 0L;
2088 0 : for (int i = 0; i < n; ++i) {
2089 0 : int K = indexArray[i];
2090 0 : const Box& tmpbox = fabbox(K);
2091 0 : m_single_chunk_size += factory.nBytes(tmpbox, n_comp, K);
2092 : }
2093 : AMREX_ASSERT(m_single_chunk_size >= 0); // 0 is okay.
2094 0 : m_single_chunk_arena = std::make_unique<detail::SingleChunkArena>(ar, m_single_chunk_size);
2095 0 : fab_info.SetArena(m_single_chunk_arena.get());
2096 : }
2097 :
2098 966153 : m_fabs_v.reserve(n);
2099 :
2100 966153 : Long nbytes = 0L;
2101 4380357 : for (int i = 0; i < n; ++i)
2102 : {
2103 3414202 : int K = indexArray[i];
2104 3414202 : const Box& tmpbox = fabbox(K);
2105 3414202 : m_fabs_v.push_back(factory.create(tmpbox, n_comp, fab_info, K));
2106 3414202 : nbytes += amrex::nBytesOwned(*m_fabs_v.back());
2107 : }
2108 :
2109 966153 : m_tags.clear();
2110 966153 : m_tags.emplace_back("All");
2111 966153 : for (auto const& t : m_region_tag) {
2112 0 : m_tags.push_back(t);
2113 : }
2114 966153 : for (auto const& t : tags) {
2115 0 : m_tags.push_back(t);
2116 : }
2117 1932310 : for (auto const& t: m_tags) {
2118 966153 : updateMemUsage(t, nbytes, ar);
2119 : }
2120 :
2121 : #ifdef BL_USE_TEAM
2122 : if (shmem.alloc)
2123 : {
2124 : const int teamlead = ParallelDescriptor::MyTeamLead();
2125 :
2126 : shmem.n_values = 0;
2127 : shmem.n_points = 0;
2128 : Vector<Long> offset(n,0);
2129 : Vector<Long> nextoffset(nworkers,-1);
2130 : for (int i = 0; i < n; ++i) {
2131 : int K = indexArray[i];
2132 : int owner = distributionMap[K] - teamlead;
2133 : Long s = m_fabs_v[i]->size();
2134 : if (ownership[i]) {
2135 : shmem.n_values += s;
2136 : shmem.n_points += m_fabs_v[i]->numPts();
2137 : }
2138 : if (nextoffset[owner] < 0) {
2139 : offset[i] = 0;
2140 : nextoffset[owner] = s;
2141 : } else {
2142 : offset[i] = nextoffset[owner];
2143 : nextoffset[owner] += s;
2144 : }
2145 : }
2146 :
2147 : size_t bytes = shmem.n_values*sizeof(value_type);
2148 :
2149 : value_type *mfp;
2150 : Vector<value_type*> dps;
2151 :
2152 : #if defined (BL_USE_MPI3)
2153 :
2154 : static MPI_Info info = MPI_INFO_NULL;
2155 : if (info == MPI_INFO_NULL) {
2156 : MPI_Info_create(&info);
2157 : MPI_Info_set(info, "alloc_shared_noncontig", "true");
2158 : }
2159 :
2160 : const MPI_Comm& team_comm = ParallelDescriptor::MyTeam().get();
2161 :
2162 : BL_MPI_REQUIRE( MPI_Win_allocate_shared(bytes, sizeof(value_type),
2163 : info, team_comm, &mfp, &shmem.win) );
2164 :
2165 : for (int w = 0; w < nworkers; ++w) {
2166 : MPI_Aint sz;
2167 : int disp;
2168 : value_type *dptr = 0;
2169 : BL_MPI_REQUIRE( MPI_Win_shared_query(shmem.win, w, &sz, &disp, &dptr) );
2170 : // AMREX_ASSERT(disp == sizeof(value_type));
2171 : dps.push_back(dptr);
2172 : }
2173 :
2174 : #else
2175 :
2176 : amrex::Abort("BaseFab::define: to allocate shared memory, USE_MPI3 must be true");
2177 :
2178 : #endif
2179 :
2180 : for (int i = 0; i < n; ++i) {
2181 : int K = indexArray[i];
2182 : int owner = distributionMap[K] - teamlead;
2183 : value_type *p = dps[owner] + offset[i];
2184 : m_fabs_v[i]->setPtr(p, m_fabs_v[i]->size());
2185 : }
2186 :
2187 : for (Long i = 0; i < shmem.n_values; i++, mfp++) {
2188 : new (mfp) value_type;
2189 : }
2190 :
2191 : amrex::update_fab_stats(shmem.n_points, shmem.n_values, sizeof(value_type));
2192 : }
2193 : #endif
2194 966153 : }
2195 :
2196 : template <class FAB>
2197 : void
2198 : FabArray<FAB>::setFab_assert (int K, FAB const& fab) const
2199 : {
2200 : amrex::ignore_unused(K,fab);
2201 : AMREX_ASSERT(n_comp == fab.nComp());
2202 : AMREX_ASSERT(!boxarray.empty());
2203 : AMREX_ASSERT(fab.box() == fabbox(K));
2204 : AMREX_ASSERT(distributionMap[K] == ParallelDescriptor::MyProc());
2205 : AMREX_ASSERT(m_single_chunk_arena == nullptr);
2206 : }
2207 :
2208 : template <class FAB>
2209 : void
2210 : FabArray<FAB>::setFab (int boxno, std::unique_ptr<FAB> elem)
2211 : {
2212 : if (n_comp == 0) {
2213 : n_comp = elem->nComp();
2214 : }
2215 :
2216 : setFab_assert(boxno, *elem);
2217 :
2218 : if (m_fabs_v.empty()) {
2219 : m_fabs_v.resize(indexArray.size(),nullptr);
2220 : }
2221 :
2222 : const int li = localindex(boxno);
2223 : if (m_fabs_v[li]) {
2224 : m_factory->destroy(m_fabs_v[li]);
2225 : }
2226 : m_fabs_v[li] = elem.release();
2227 : }
2228 :
2229 : template <class FAB>
2230 : template <class F, std::enable_if_t<std::is_move_constructible_v<F>,int> >
2231 : void
2232 : FabArray<FAB>::setFab (int boxno, FAB&& elem)
2233 : {
2234 : if (n_comp == 0) {
2235 : n_comp = elem.nComp();
2236 : }
2237 :
2238 : setFab_assert(boxno, elem);
2239 :
2240 : if (m_fabs_v.empty()) {
2241 : m_fabs_v.resize(indexArray.size(),nullptr);
2242 : }
2243 :
2244 : const int li = localindex(boxno);
2245 : if (m_fabs_v[li]) {
2246 : m_factory->destroy(m_fabs_v[li]);
2247 : }
2248 : m_fabs_v[li] = new FAB(std::move(elem));
2249 : }
2250 :
2251 : template <class FAB>
2252 : void
2253 : FabArray<FAB>::setFab (const MFIter& mfi, std::unique_ptr<FAB> elem)
2254 : {
2255 : if (n_comp == 0) {
2256 : n_comp = elem->nComp();
2257 : }
2258 :
2259 : setFab_assert(mfi.index(), *elem);
2260 :
2261 : if (m_fabs_v.empty()) {
2262 : m_fabs_v.resize(indexArray.size(),nullptr);
2263 : }
2264 :
2265 : const int li = mfi.LocalIndex();
2266 : if (m_fabs_v[li]) {
2267 : m_factory->destroy(m_fabs_v[li]);
2268 : }
2269 : m_fabs_v[li] = elem.release();
2270 : }
2271 :
2272 : template <class FAB>
2273 : template <class F, std::enable_if_t<std::is_move_constructible_v<F>,int> >
2274 : void
2275 : FabArray<FAB>::setFab (const MFIter& mfi, FAB&& elem)
2276 : {
2277 : if (n_comp == 0) {
2278 : n_comp = elem.nComp();
2279 : }
2280 :
2281 : setFab_assert(mfi.index(), elem);
2282 :
2283 : if (m_fabs_v.empty()) {
2284 : m_fabs_v.resize(indexArray.size(),nullptr);
2285 : }
2286 :
2287 : const int li = mfi.LocalIndex();
2288 : if (m_fabs_v[li]) {
2289 : m_factory->destroy(m_fabs_v[li]);
2290 : }
2291 : m_fabs_v[li] = new FAB(std::move(elem));
2292 : }
2293 :
2294 : template <class FAB>
2295 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
2296 : void
2297 : FabArray<FAB>::setBndry (value_type val)
2298 : {
2299 : setBndry(val, 0, n_comp);
2300 : }
2301 :
2302 : template <class FAB>
2303 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
2304 : void
2305 0 : FabArray<FAB>::setBndry (value_type val,
2306 : int strt_comp,
2307 : int ncomp)
2308 : {
2309 0 : if (n_grow.max() > 0)
2310 : {
2311 : #ifdef AMREX_USE_GPU
2312 : if (Gpu::inLaunchRegion()) {
2313 : bool use_mfparfor = true;
2314 : const int nboxes = local_size();
2315 : if (nboxes == 1) {
2316 : if (boxarray[indexArray[0]].numPts() > Long(65*65*65)) {
2317 : use_mfparfor = false;
2318 : }
2319 : } else {
2320 : for (int i = 0; i < nboxes; ++i) {
2321 : const Long npts = boxarray[indexArray[i]].numPts();
2322 : if (npts >= Long(64*64*64)) {
2323 : use_mfparfor = false;
2324 : break;
2325 : } else if (npts <= Long(17*17*17)) {
2326 : break;
2327 : }
2328 : }
2329 : }
2330 : const IntVect nghost = n_grow;
2331 : if (use_mfparfor) {
2332 : auto const& ma = this->arrays();
2333 : ParallelFor(*this, nghost,
2334 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
2335 : {
2336 : auto const& a = ma[box_no];
2337 : Box vbx(a);
2338 : vbx.grow(-nghost);
2339 : if (!vbx.contains(i,j,k)) {
2340 : for (int n = 0; n < ncomp; ++n) {
2341 : a(i,j,k,strt_comp+n) = val;
2342 : }
2343 : }
2344 : });
2345 : Gpu::streamSynchronize();
2346 : } else {
2347 : using Tag = Array4BoxTag<value_type>;
2348 : Vector<Tag> tags;
2349 : for (MFIter mfi(*this); mfi.isValid(); ++mfi) {
2350 : Box const& vbx = mfi.validbox();
2351 : auto const& a = this->array(mfi);
2352 :
2353 : Box b;
2354 : #if (AMREX_SPACEDIM == 3)
2355 : if (nghost[2] > 0) {
2356 : b = vbx;
2357 : b.setRange(2, vbx.smallEnd(2)-nghost[2], nghost[2]);
2358 : b.grow(IntVect(nghost[0],nghost[1],0));
2359 : tags.emplace_back(Tag{a, b});
2360 : b.shift(2, vbx.length(2)+nghost[2]);
2361 : tags.emplace_back(Tag{a, b});
2362 : }
2363 : #endif
2364 : #if (AMREX_SPACEDIM >= 2)
2365 : if (nghost[1] > 0) {
2366 : b = vbx;
2367 : b.setRange(1, vbx.smallEnd(1)-nghost[1], nghost[1]);
2368 : b.grow(0, nghost[0]);
2369 : tags.emplace_back(Tag{a, b});
2370 : b.shift(1, vbx.length(1)+nghost[1]);
2371 : tags.emplace_back(Tag{a, b});
2372 : }
2373 : #endif
2374 : if (nghost[0] > 0) {
2375 : b = vbx;
2376 : b.setRange(0, vbx.smallEnd(0)-nghost[0], nghost[0]);
2377 : tags.emplace_back(Tag{a, b});
2378 : b.shift(0, vbx.length(0)+nghost[0]);
2379 : tags.emplace_back(Tag{a, b});
2380 : }
2381 : }
2382 :
2383 : ParallelFor(tags, ncomp,
2384 : [=] AMREX_GPU_DEVICE (int i, int j, int k, int n, Tag const& tag) noexcept
2385 : {
2386 : tag.dfab(i,j,k,strt_comp+n) = val;
2387 : });
2388 : }
2389 : } else
2390 : #endif
2391 : {
2392 : #ifdef AMREX_USE_OMP
2393 : #pragma omp parallel
2394 : #endif
2395 0 : for (MFIter fai(*this); fai.isValid(); ++fai)
2396 : {
2397 0 : get(fai).template setComplement<RunOn::Host>(val, fai.validbox(), strt_comp, ncomp);
2398 : }
2399 : }
2400 : }
2401 0 : }
2402 :
2403 : template <class FAB>
2404 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
2405 : void
2406 213210 : FabArray<FAB>::setDomainBndry (value_type val, const Geometry& geom)
2407 : {
2408 213210 : setDomainBndry(val, 0, n_comp, geom);
2409 213210 : }
2410 :
2411 : template <class FAB>
2412 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
2413 : void
2414 213210 : FabArray<FAB>::setDomainBndry (value_type val,
2415 : int strt_comp,
2416 : int ncomp,
2417 : const Geometry& geom)
2418 : {
2419 : BL_PROFILE("FabArray::setDomainBndry()");
2420 :
2421 213210 : Box domain_box = amrex::convert(geom.Domain(), boxArray().ixType());
2422 639630 : for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
2423 426420 : if (geom.isPeriodic(idim)) {
2424 184585 : int n = domain_box.length(idim);
2425 184585 : domain_box.grow(idim, n);
2426 : }
2427 : }
2428 :
2429 : #ifdef AMREX_USE_OMP
2430 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2431 : #endif
2432 1376070 : for (MFIter fai(*this); fai.isValid(); ++fai)
2433 : {
2434 1162860 : const Box& gbx = fai.fabbox();
2435 1162860 : if (! domain_box.contains(gbx))
2436 : {
2437 345536 : get(fai).template setComplement<RunOn::Device>(val, domain_box, strt_comp, ncomp);
2438 : }
2439 : }
2440 213210 : }
2441 :
2442 : template <class FAB>
2443 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
2444 : typename F::value_type
2445 : FabArray<FAB>::sum (int comp, IntVect const& nghost, bool local) const
2446 : {
2447 : BL_PROFILE("FabArray::sum()");
2448 :
2449 : using T = typename FAB::value_type;
2450 : auto sm = T(0.0);
2451 : #ifdef AMREX_USE_GPU
2452 : if (Gpu::inLaunchRegion()) {
2453 : auto const& ma = this->const_arrays();
2454 : sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<T>{}, *this, nghost,
2455 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
2456 : -> GpuTuple<T>
2457 : {
2458 : return ma[box_no](i,j,k,comp);
2459 : });
2460 : } else
2461 : #endif
2462 : {
2463 : #ifdef AMREX_USE_OMP
2464 : #pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
2465 : #endif
2466 : for (MFIter mfi(*this,true); mfi.isValid(); ++mfi)
2467 : {
2468 : Box const& bx = mfi.growntilebox(nghost);
2469 : auto const& a = this->const_array(mfi);
2470 : auto tmp = T(0.0);
2471 : AMREX_LOOP_3D(bx, i, j, k,
2472 : {
2473 : tmp += a(i,j,k,comp);
2474 : });
2475 : sm += tmp; // Do it this way so that it does not break regression tests.
2476 : }
2477 : }
2478 :
2479 : if (!local) {
2480 : ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
2481 : }
2482 :
2483 : return sm;
2484 : }
2485 :
2486 : template <class FAB>
2487 : void
2488 : FabArray<FAB>::copyTo (FAB& dest, int nghost) const
2489 : {
2490 : copyTo(dest, 0, 0, dest.nComp(), nghost);
2491 : }
2492 :
2493 : template <class FAB>
2494 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
2495 : void
2496 23978 : FabArray<FAB>::setVal (value_type val)
2497 : {
2498 23978 : setVal(val,0,n_comp,n_grow);
2499 23978 : }
2500 :
2501 : template <class FAB>
2502 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
2503 : FabArray<FAB>&
2504 : FabArray<FAB>::operator= (value_type val)
2505 : {
2506 : setVal(val);
2507 : return *this;
2508 : }
2509 :
2510 : template <class FAB>
2511 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
2512 : void
2513 0 : FabArray<FAB>::setVal (value_type val,
2514 : int comp,
2515 : int ncomp,
2516 : int nghost)
2517 : {
2518 0 : setVal(val,comp,ncomp,IntVect(nghost));
2519 0 : }
2520 :
2521 : template <class FAB>
2522 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
2523 : void
2524 23978 : FabArray<FAB>::setVal (value_type val,
2525 : int comp,
2526 : int ncomp,
2527 : const IntVect& nghost)
2528 : {
2529 : AMREX_ASSERT(nghost.allGE(0) && nghost.allLE(n_grow));
2530 23978 : AMREX_ALWAYS_ASSERT(comp+ncomp <= n_comp);
2531 :
2532 : BL_PROFILE("FabArray::setVal()");
2533 :
2534 : #ifdef AMREX_USE_GPU
2535 : if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
2536 : auto const& fa = this->arrays();
2537 : ParallelFor(*this, nghost, ncomp,
2538 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
2539 : {
2540 : fa[box_no](i,j,k,n+comp) = val;
2541 : });
2542 : if (!Gpu::inNoSyncRegion()) {
2543 : Gpu::streamSynchronize();
2544 : }
2545 : } else
2546 : #endif
2547 : {
2548 : #ifdef AMREX_USE_OMP
2549 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2550 : #endif
2551 81038 : for (MFIter fai(*this,TilingIfNotGPU()); fai.isValid(); ++fai)
2552 : {
2553 57060 : const Box& bx = fai.growntilebox(nghost);
2554 57060 : auto fab = this->array(fai);
2555 27831137 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
2556 : {
2557 : fab(i,j,k,n+comp) = val;
2558 : });
2559 : }
2560 : }
2561 23978 : }
2562 :
2563 : template <class FAB>
2564 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
2565 : void
2566 : FabArray<FAB>::setVal (value_type val,
2567 : const Box& region,
2568 : int comp,
2569 : int ncomp,
2570 : int nghost)
2571 : {
2572 : setVal(val,region,comp,ncomp,IntVect(nghost));
2573 : }
2574 :
2575 : template <class FAB>
2576 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
2577 : void
2578 : FabArray<FAB>::setVal (value_type val,
2579 : const Box& region,
2580 : int comp,
2581 : int ncomp,
2582 : const IntVect& nghost)
2583 : {
2584 : AMREX_ASSERT(nghost.allGE(0) && nghost.allLE(n_grow));
2585 : AMREX_ALWAYS_ASSERT(comp+ncomp <= n_comp);
2586 :
2587 : BL_PROFILE("FabArray::setVal(val,region,comp,ncomp,nghost)");
2588 :
2589 : #ifdef AMREX_USE_GPU
2590 : if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
2591 : auto const& fa = this->arrays();
2592 : ParallelFor(*this, nghost, ncomp,
2593 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
2594 : {
2595 : if (region.contains(i,j,k)) {
2596 : fa[box_no](i,j,k,n+comp) = val;
2597 : }
2598 : });
2599 : if (!Gpu::inNoSyncRegion()) {
2600 : Gpu::streamSynchronize();
2601 : }
2602 : } else
2603 : #endif
2604 : {
2605 : #ifdef AMREX_USE_OMP
2606 : AMREX_ALWAYS_ASSERT(!omp_in_parallel());
2607 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2608 : #endif
2609 : for (MFIter fai(*this,TilingIfNotGPU()); fai.isValid(); ++fai)
2610 : {
2611 : Box b = fai.growntilebox(nghost) & region;
2612 :
2613 : if (b.ok()) {
2614 : auto fab = this->array(fai);
2615 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( b, ncomp, i, j, k, n,
2616 : {
2617 : fab(i,j,k,n+comp) = val;
2618 : });
2619 : }
2620 : }
2621 : }
2622 : }
2623 :
2624 : template <class FAB>
2625 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
2626 : void
2627 : FabArray<FAB>::abs (int comp, int ncomp, int nghost)
2628 : {
2629 : abs(comp, ncomp, IntVect(nghost));
2630 : }
2631 :
2632 : template <class FAB>
2633 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
2634 : void
2635 : FabArray<FAB>::abs (int comp, int ncomp, const IntVect& nghost)
2636 : {
2637 : AMREX_ASSERT(nghost.allGE(0) && nghost.allLE(n_grow));
2638 : AMREX_ALWAYS_ASSERT(comp+ncomp <= n_comp);
2639 : BL_PROFILE("FabArray::abs()");
2640 :
2641 : #ifdef AMREX_USE_GPU
2642 : if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
2643 : auto const& fa = this->arrays();
2644 : ParallelFor(*this, nghost, ncomp,
2645 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
2646 : {
2647 : fa[box_no](i,j,k,n+comp) = std::abs(fa[box_no](i,j,k,n+comp));
2648 : });
2649 : if (!Gpu::inNoSyncRegion()) {
2650 : Gpu::streamSynchronize();
2651 : }
2652 : } else
2653 : #endif
2654 : {
2655 : #ifdef AMREX_USE_OMP
2656 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2657 : #endif
2658 : for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
2659 : {
2660 : const Box& bx = mfi.growntilebox(nghost);
2661 : auto fab = this->array(mfi);
2662 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
2663 : {
2664 : fab(i,j,k,n+comp) = std::abs(fab(i,j,k,n+comp));
2665 : });
2666 : }
2667 : }
2668 : }
2669 :
2670 : template <class FAB>
2671 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
2672 : void
2673 : FabArray<FAB>::plus (value_type val, int comp, int num_comp, int nghost)
2674 : {
2675 : BL_PROFILE("FabArray::plus()");
2676 :
2677 : #ifdef AMREX_USE_GPU
2678 : if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
2679 : auto const& fa = this->arrays();
2680 : ParallelFor(*this, IntVect(nghost), num_comp,
2681 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
2682 : {
2683 : fa[box_no](i,j,k,n+comp) += val;
2684 : });
2685 : if (!Gpu::inNoSyncRegion()) {
2686 : Gpu::streamSynchronize();
2687 : }
2688 : } else
2689 : #endif
2690 : {
2691 : #ifdef AMREX_USE_OMP
2692 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2693 : #endif
2694 : for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
2695 : {
2696 : const Box& bx = mfi.growntilebox(nghost);
2697 : auto fab = this->array(mfi);
2698 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
2699 : {
2700 : fab(i,j,k,n+comp) += val;
2701 : });
2702 : }
2703 : }
2704 : }
2705 :
2706 : template <class FAB>
2707 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
2708 : void
2709 : FabArray<FAB>::plus (value_type val, const Box& region, int comp, int num_comp, int nghost)
2710 : {
2711 : BL_PROFILE("FabArray::plus(val, region, comp, num_comp, nghost)");
2712 :
2713 : #ifdef AMREX_USE_GPU
2714 : if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
2715 : auto const& fa = this->arrays();
2716 : ParallelFor(*this, IntVect(nghost), num_comp,
2717 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
2718 : {
2719 : if (region.contains(i,j,k)) {
2720 : fa[box_no](i,j,k,n+comp) += val;
2721 : }
2722 : });
2723 : if (!Gpu::inNoSyncRegion()) {
2724 : Gpu::streamSynchronize();
2725 : }
2726 : } else
2727 : #endif
2728 : {
2729 : #ifdef AMREX_USE_OMP
2730 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2731 : #endif
2732 : for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
2733 : {
2734 : const Box& bx = mfi.growntilebox(nghost) & region;
2735 : if (bx.ok()) {
2736 : auto fab = this->array(mfi);
2737 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
2738 : {
2739 : fab(i,j,k,n+comp) += val;
2740 : });
2741 : }
2742 : }
2743 : }
2744 : }
2745 :
2746 : template <class FAB>
2747 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
2748 : void
2749 : FabArray<FAB>::mult (value_type val, int comp, int num_comp, int nghost)
2750 : {
2751 : BL_PROFILE("FabArray::mult()");
2752 :
2753 : #ifdef AMREX_USE_GPU
2754 : if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
2755 : auto const& fa = this->arrays();
2756 : ParallelFor(*this, IntVect(nghost), num_comp,
2757 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
2758 : {
2759 : fa[box_no](i,j,k,n+comp) *= val;
2760 : });
2761 : if (!Gpu::inNoSyncRegion()) {
2762 : Gpu::streamSynchronize();
2763 : }
2764 : } else
2765 : #endif
2766 : {
2767 : #ifdef AMREX_USE_OMP
2768 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2769 : #endif
2770 : for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
2771 : {
2772 : const Box& bx = mfi.growntilebox(nghost);
2773 : auto fab = this->array(mfi);
2774 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
2775 : {
2776 : fab(i,j,k,n+comp) *= val;
2777 : });
2778 : }
2779 : }
2780 : }
2781 :
2782 : template <class FAB>
2783 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
2784 : void
2785 : FabArray<FAB>::mult (value_type val, const Box& region, int comp, int num_comp, int nghost)
2786 : {
2787 : BL_PROFILE("FabArray::mult(val, region, comp, num_comp, nghost)");
2788 :
2789 : #ifdef AMREX_USE_GPU
2790 : if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
2791 : auto const& fa = this->arrays();
2792 : ParallelFor(*this, IntVect(nghost), num_comp,
2793 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
2794 : {
2795 : if (region.contains(i,j,k)) {
2796 : fa[box_no](i,j,k,n+comp) *= val;
2797 : }
2798 : });
2799 : if (!Gpu::inNoSyncRegion()) {
2800 : Gpu::streamSynchronize();
2801 : }
2802 : } else
2803 : #endif
2804 : {
2805 : #ifdef AMREX_USE_OMP
2806 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2807 : #endif
2808 : for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
2809 : {
2810 : const Box& bx = mfi.growntilebox(nghost) & region;
2811 : if (bx.ok()) {
2812 : auto fab = this->array(mfi);
2813 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
2814 : {
2815 : fab(i,j,k,n+comp) *= val;
2816 : });
2817 : }
2818 : }
2819 : }
2820 : }
2821 :
2822 : template <class FAB>
2823 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
2824 : void
2825 : FabArray<FAB>::invert (value_type numerator, int comp, int num_comp, int nghost)
2826 : {
2827 : BL_PROFILE("FabArray::invert()");
2828 :
2829 : #ifdef AMREX_USE_GPU
2830 : if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
2831 : auto const& fa = this->arrays();
2832 : ParallelFor(*this, IntVect(nghost), num_comp,
2833 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
2834 : {
2835 : fa[box_no](i,j,k,n+comp) = numerator / fa[box_no](i,j,k,n+comp);
2836 : });
2837 : if (!Gpu::inNoSyncRegion()) {
2838 : Gpu::streamSynchronize();
2839 : }
2840 : } else
2841 : #endif
2842 : {
2843 : #ifdef AMREX_USE_OMP
2844 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2845 : #endif
2846 : for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
2847 : {
2848 : const Box& bx = mfi.growntilebox(nghost);
2849 : auto fab = this->array(mfi);
2850 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
2851 : {
2852 : fab(i,j,k,n+comp) = numerator / fab(i,j,k,n+comp);
2853 : });
2854 : }
2855 : }
2856 : }
2857 :
2858 : template <class FAB>
2859 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
2860 : void
2861 : FabArray<FAB>::invert (value_type numerator, const Box& region, int comp, int num_comp, int nghost)
2862 : {
2863 : BL_PROFILE("FabArray::invert(numerator, region, comp, num_comp, nghost)");
2864 :
2865 : #ifdef AMREX_USE_GPU
2866 : if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
2867 : auto const& fa = this->arrays();
2868 : ParallelFor(*this, IntVect(nghost), num_comp,
2869 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
2870 : {
2871 : if (region.contains(i,j,k)) {
2872 : fa[box_no](i,j,k,n+comp) = numerator / fa[box_no](i,j,k,n+comp);
2873 : }
2874 : });
2875 : if (!Gpu::inNoSyncRegion()) {
2876 : Gpu::streamSynchronize();
2877 : }
2878 : } else
2879 : #endif
2880 : {
2881 : #ifdef AMREX_USE_OMP
2882 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2883 : #endif
2884 : for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
2885 : {
2886 : const Box& bx = mfi.growntilebox(nghost) & region;
2887 : if (bx.ok()) {
2888 : auto fab = this->array(mfi);
2889 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
2890 : {
2891 : fab(i,j,k,n+comp) = numerator / fab(i,j,k,n+comp);
2892 : });
2893 : }
2894 : }
2895 : }
2896 : }
2897 :
2898 : template <class FAB>
2899 : void
2900 : FabArray<FAB>::shift (const IntVect& v)
2901 : {
2902 : clearThisBD(); // The new boxarray will have a different ID.
2903 : boxarray.shift(v);
2904 : addThisBD();
2905 : #ifdef AMREX_USE_OMP
2906 : #pragma omp parallel
2907 : #endif
2908 : for (MFIter fai(*this); fai.isValid(); ++fai)
2909 : {
2910 : get(fai).shift(v);
2911 : }
2912 : clear_arrays();
2913 : }
2914 :
2915 : template <class FAB>
2916 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
2917 : void FabArray<FAB>::Saxpy (FabArray<FAB>& y, value_type a, FabArray<FAB> const& x,
2918 : int xcomp, int ycomp, int ncomp, IntVect const& nghost)
2919 : {
2920 : AMREX_ASSERT(y.boxArray() == x.boxArray());
2921 : AMREX_ASSERT(y.distributionMap == x.distributionMap);
2922 : AMREX_ASSERT(y.nGrowVect().allGE(nghost) && x.nGrowVect().allGE(nghost));
2923 :
2924 : BL_PROFILE("FabArray::Saxpy()");
2925 :
2926 : #ifdef AMREX_USE_GPU
2927 : if (Gpu::inLaunchRegion() && y.isFusingCandidate()) {
2928 : auto const& yma = y.arrays();
2929 : auto const& xma = x.const_arrays();
2930 : ParallelFor(y, nghost, ncomp,
2931 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
2932 : {
2933 : yma[box_no](i,j,k,ycomp+n) += a * xma[box_no](i,j,k,xcomp+n);
2934 : });
2935 : if (!Gpu::inNoSyncRegion()) {
2936 : Gpu::streamSynchronize();
2937 : }
2938 : } else
2939 : #endif
2940 : {
2941 : #ifdef AMREX_USE_OMP
2942 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2943 : #endif
2944 : for (MFIter mfi(y,TilingIfNotGPU()); mfi.isValid(); ++mfi)
2945 : {
2946 : const Box& bx = mfi.growntilebox(nghost);
2947 :
2948 : if (bx.ok()) {
2949 : auto const& xfab = x.const_array(mfi);
2950 : auto const& yfab = y.array(mfi);
2951 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
2952 : {
2953 : yfab(i,j,k,ycomp+n) += a * xfab(i,j,k,xcomp+n);
2954 : });
2955 : }
2956 : }
2957 : }
2958 : }
2959 :
2960 : template <class FAB>
2961 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
2962 : void
2963 : FabArray<FAB>::Xpay (FabArray<FAB>& y, value_type a, FabArray<FAB> const& x,
2964 : int xcomp, int ycomp, int ncomp, IntVect const& nghost)
2965 : {
2966 : AMREX_ASSERT(y.boxArray() == x.boxArray());
2967 : AMREX_ASSERT(y.distributionMap == x.distributionMap);
2968 : AMREX_ASSERT(y.nGrowVect().allGE(nghost) && x.nGrowVect().allGE(nghost));
2969 :
2970 : BL_PROFILE("FabArray::Xpay()");
2971 :
2972 : #ifdef AMREX_USE_GPU
2973 : if (Gpu::inLaunchRegion() && y.isFusingCandidate()) {
2974 : auto const& yfa = y.arrays();
2975 : auto const& xfa = x.const_arrays();
2976 : ParallelFor(y, nghost, ncomp,
2977 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
2978 : {
2979 : yfa[box_no](i,j,k,n+ycomp) = xfa[box_no](i,j,k,n+xcomp)
2980 : + a * yfa[box_no](i,j,k,n+ycomp);
2981 : });
2982 : if (!Gpu::inNoSyncRegion()) {
2983 : Gpu::streamSynchronize();
2984 : }
2985 : } else
2986 : #endif
2987 : {
2988 : #ifdef AMREX_USE_OMP
2989 : #pragma omp parallel if (Gpu::notInLaunchRegion())
2990 : #endif
2991 : for (MFIter mfi(y,TilingIfNotGPU()); mfi.isValid(); ++mfi)
2992 : {
2993 : const Box& bx = mfi.growntilebox(nghost);
2994 : auto const& xFab = x.const_array(mfi);
2995 : auto const& yFab = y.array(mfi);
2996 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
2997 : {
2998 : yFab(i,j,k,n+ycomp) = xFab(i,j,k,n+xcomp)
2999 : + a * yFab(i,j,k,n+ycomp);
3000 : });
3001 : }
3002 : }
3003 : }
3004 :
3005 : template <class FAB>
3006 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
3007 : void
3008 : FabArray<FAB>::LinComb (FabArray<FAB>& dst,
3009 : value_type a, const FabArray<FAB>& x, int xcomp,
3010 : value_type b, const FabArray<FAB>& y, int ycomp,
3011 : int dstcomp, int numcomp, const IntVect& nghost)
3012 : {
3013 : AMREX_ASSERT(dst.boxArray() == x.boxArray());
3014 : AMREX_ASSERT(dst.distributionMap == x.distributionMap);
3015 : AMREX_ASSERT(dst.boxArray() == y.boxArray());
3016 : AMREX_ASSERT(dst.distributionMap == y.distributionMap);
3017 : AMREX_ASSERT(dst.nGrowVect().allGE(nghost) && x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
3018 :
3019 : BL_PROFILE("FabArray::LinComb()");
3020 :
3021 : #ifdef AMREX_USE_GPU
3022 : if (Gpu::inLaunchRegion() && dst.isFusingCandidate()) {
3023 : auto const& dstma = dst.arrays();
3024 : auto const& xma = x.const_arrays();
3025 : auto const& yma = y.const_arrays();
3026 : ParallelFor(dst, nghost, numcomp,
3027 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
3028 : {
3029 : dstma[box_no](i,j,k,dstcomp+n) = a*xma[box_no](i,j,k,xcomp+n)
3030 : + b*yma[box_no](i,j,k,ycomp+n);
3031 : });
3032 : if (!Gpu::inNoSyncRegion()) {
3033 : Gpu::streamSynchronize();
3034 : }
3035 : } else
3036 : #endif
3037 : {
3038 : #ifdef AMREX_USE_OMP
3039 : #pragma omp parallel if (Gpu::notInLaunchRegion())
3040 : #endif
3041 : for (MFIter mfi(dst,TilingIfNotGPU()); mfi.isValid(); ++mfi)
3042 : {
3043 : const Box& bx = mfi.growntilebox(nghost);
3044 : auto const& xfab = x.const_array(mfi);
3045 : auto const& yfab = y.const_array(mfi);
3046 : auto const& dfab = dst.array(mfi);
3047 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
3048 : {
3049 : dfab(i,j,k,dstcomp+n) = a*xfab(i,j,k,xcomp+n) + b*yfab(i,j,k,ycomp+n);
3050 : });
3051 : }
3052 : }
3053 : }
3054 :
3055 : template <class FAB>
3056 : template <typename BUF>
3057 : void
3058 216055 : FabArray<FAB>::FillBoundary (bool cross)
3059 : {
3060 : BL_PROFILE("FabArray::FillBoundary()");
3061 432110 : if ( n_grow.max() > 0 ) {
3062 216055 : FillBoundary_nowait<BUF>(0, nComp(), n_grow, Periodicity::NonPeriodic(), cross);
3063 216055 : FillBoundary_finish<BUF>();
3064 : }
3065 216055 : }
3066 :
3067 : template <class FAB>
3068 : template <typename BUF>
3069 : void
3070 433151 : FabArray<FAB>::FillBoundary (const Periodicity& period, bool cross)
3071 : {
3072 : BL_PROFILE("FabArray::FillBoundary()");
3073 866302 : if ( n_grow.max() > 0 ) {
3074 219941 : FillBoundary_nowait<BUF>(0, nComp(), n_grow, period, cross);
3075 219941 : FillBoundary_finish<BUF>();
3076 : }
3077 433151 : }
3078 :
3079 : template <class FAB>
3080 : template <typename BUF>
3081 : void
3082 : FabArray<FAB>::FillBoundary (const IntVect& nghost, const Periodicity& period, bool cross)
3083 : {
3084 : BL_PROFILE("FabArray::FillBoundary()");
3085 : AMREX_ALWAYS_ASSERT_WITH_MESSAGE(nghost.allLE(nGrowVect()),
3086 : "FillBoundary: asked to fill more ghost cells than we have");
3087 : if ( nghost.max() > 0 ) {
3088 : FillBoundary_nowait<BUF>(0, nComp(), nghost, period, cross);
3089 : FillBoundary_finish<BUF>();
3090 : }
3091 : }
3092 :
3093 : template <class FAB>
3094 : template <typename BUF>
3095 : void
3096 : FabArray<FAB>::FillBoundary (int scomp, int ncomp, bool cross)
3097 : {
3098 : BL_PROFILE("FabArray::FillBoundary()");
3099 : if ( n_grow.max() > 0 ) {
3100 : FillBoundary_nowait<BUF>(scomp, ncomp, n_grow, Periodicity::NonPeriodic(), cross);
3101 : FillBoundary_finish<BUF>();
3102 : }
3103 : }
3104 :
3105 : template <class FAB>
3106 : template <typename BUF>
3107 : void
3108 : FabArray<FAB>::FillBoundary (int scomp, int ncomp, const Periodicity& period, bool cross)
3109 : {
3110 : BL_PROFILE("FabArray::FillBoundary()");
3111 : if ( n_grow.max() > 0 ) {
3112 : FillBoundary_nowait<BUF>(scomp, ncomp, n_grow, period, cross);
3113 : FillBoundary_finish<BUF>();
3114 : }
3115 : }
3116 :
3117 : template <class FAB>
3118 : template <typename BUF>
3119 : void
3120 216914 : FabArray<FAB>::FillBoundary (int scomp, int ncomp, const IntVect& nghost,
3121 : const Periodicity& period, bool cross)
3122 : {
3123 : BL_PROFILE("FabArray::FillBoundary()");
3124 433828 : AMREX_ALWAYS_ASSERT_WITH_MESSAGE(nghost.allLE(nGrowVect()),
3125 : "FillBoundary: asked to fill more ghost cells than we have");
3126 216914 : if ( nghost.max() > 0 ) {
3127 216914 : FillBoundary_nowait<BUF>(scomp, ncomp, nghost, period, cross);
3128 216914 : FillBoundary_finish<BUF>();
3129 : }
3130 216914 : }
3131 :
3132 : template <class FAB>
3133 : template <typename BUF>
3134 : void
3135 : FabArray<FAB>::FillBoundary_nowait (bool cross)
3136 : {
3137 : FillBoundary_nowait<BUF>(0, nComp(), nGrowVect(), Periodicity::NonPeriodic(), cross);
3138 : }
3139 :
3140 : template <class FAB>
3141 : template <typename BUF>
3142 : void
3143 : FabArray<FAB>::FillBoundary_nowait (const Periodicity& period, bool cross)
3144 : {
3145 : FillBoundary_nowait<BUF>(0, nComp(), nGrowVect(), period, cross);
3146 : }
3147 :
3148 : template <class FAB>
3149 : template <typename BUF>
3150 : void
3151 : FabArray<FAB>::FillBoundary_nowait (const IntVect& nghost, const Periodicity& period, bool cross)
3152 : {
3153 : FillBoundary_nowait<BUF>(0, nComp(), nghost, period, cross);
3154 : }
3155 :
3156 : template <class FAB>
3157 : template <typename BUF>
3158 : void
3159 : FabArray<FAB>::FillBoundary_nowait (int scomp, int ncomp, bool cross)
3160 : {
3161 : FillBoundary_nowait<BUF>(scomp, ncomp, nGrowVect(), Periodicity::NonPeriodic(), cross);
3162 : }
3163 :
3164 : template <class FAB>
3165 : void
3166 : FabArray<FAB>::FillBoundaryAndSync (const Periodicity& period)
3167 : {
3168 : BL_PROFILE("FabArray::FillBoundaryAndSync()");
3169 : if (n_grow.max() > 0 || !is_cell_centered()) {
3170 : FillBoundaryAndSync_nowait(0, nComp(), n_grow, period);
3171 : FillBoundaryAndSync_finish();
3172 : }
3173 : }
3174 :
3175 : template <class FAB>
3176 : void
3177 : FabArray<FAB>::FillBoundaryAndSync (int scomp, int ncomp, const IntVect& nghost,
3178 : const Periodicity& period)
3179 : {
3180 : BL_PROFILE("FabArray::FillBoundaryAndSync()");
3181 : if (nghost.max() > 0 || !is_cell_centered()) {
3182 : FillBoundaryAndSync_nowait(scomp, ncomp, nghost, period);
3183 : FillBoundaryAndSync_finish();
3184 : }
3185 : }
3186 :
3187 : template <class FAB>
3188 : void
3189 : FabArray<FAB>::FillBoundaryAndSync_nowait (const Periodicity& period)
3190 : {
3191 : FillBoundaryAndSync_nowait(0, nComp(), nGrowVect(), period);
3192 : }
3193 :
3194 : template <class FAB>
3195 : void
3196 : FabArray<FAB>::FillBoundaryAndSync_nowait (int scomp, int ncomp, const IntVect& nghost,
3197 : const Periodicity& period)
3198 : {
3199 : BL_PROFILE("FillBoundaryAndSync_nowait()");
3200 : FBEP_nowait(scomp, ncomp, nghost, period, false, false, true);
3201 : }
3202 :
3203 : template <class FAB>
3204 : void
3205 : FabArray<FAB>::FillBoundaryAndSync_finish ()
3206 : {
3207 : BL_PROFILE("FillBoundaryAndSync_finish()");
3208 : FillBoundary_finish();
3209 : }
3210 :
3211 : template <class FAB>
3212 : void
3213 : FabArray<FAB>::OverrideSync (const Periodicity& period)
3214 : {
3215 : BL_PROFILE("FAbArray::OverrideSync()");
3216 : if (!is_cell_centered()) {
3217 : OverrideSync_nowait(0, nComp(), period);
3218 : OverrideSync_finish();
3219 : }
3220 : }
3221 :
3222 : template <class FAB>
3223 : void
3224 : FabArray<FAB>::OverrideSync (int scomp, int ncomp, const Periodicity& period)
3225 : {
3226 : BL_PROFILE("FAbArray::OverrideSync()");
3227 : if (!is_cell_centered()) {
3228 : OverrideSync_nowait(scomp, ncomp, period);
3229 : OverrideSync_finish();
3230 : }
3231 : }
3232 :
3233 : template <class FAB>
3234 : void
3235 : FabArray<FAB>::OverrideSync_nowait (const Periodicity& period)
3236 : {
3237 : OverrideSync_nowait(0, nComp(), period);
3238 : }
3239 :
3240 : template <class FAB>
3241 : void
3242 : FabArray<FAB>::OverrideSync_nowait (int scomp, int ncomp, const Periodicity& period)
3243 : {
3244 : BL_PROFILE("OverrideSync_nowait()");
3245 : FBEP_nowait(scomp, ncomp, IntVect(0), period, false, false, true);
3246 : }
3247 :
3248 : template <class FAB>
3249 : void
3250 : FabArray<FAB>::OverrideSync_finish ()
3251 : {
3252 : BL_PROFILE("OverrideSync_finish()");
3253 : FillBoundary_finish();
3254 : }
3255 :
3256 : template <class FAB>
3257 : void
3258 : FabArray<FAB>::SumBoundary (const Periodicity& period)
3259 : {
3260 : SumBoundary(0, n_comp, IntVect(0), period);
3261 : }
3262 :
3263 : template <class FAB>
3264 : void
3265 : FabArray<FAB>::SumBoundary (int scomp, int ncomp, const Periodicity& period)
3266 : {
3267 : SumBoundary(scomp, ncomp, IntVect(0), period);
3268 : }
3269 :
3270 : template <class FAB>
3271 : void
3272 : FabArray<FAB>::SumBoundary (int scomp, int ncomp, IntVect const& nghost, const Periodicity& period)
3273 : {
3274 : SumBoundary(scomp, ncomp, this->nGrowVect(), nghost, period);
3275 : }
3276 :
3277 : template <class FAB>
3278 : void
3279 : FabArray<FAB>::SumBoundary (int scomp, int ncomp, IntVect const& src_nghost, IntVect const& dst_nghost, const Periodicity& period)
3280 : {
3281 : BL_PROFILE("FabArray<FAB>::SumBoundary()");
3282 :
3283 : SumBoundary_nowait(scomp, ncomp, src_nghost, dst_nghost, period);
3284 : SumBoundary_finish();
3285 : }
3286 :
3287 : template <class FAB>
3288 : void
3289 : FabArray<FAB>::SumBoundary_nowait (const Periodicity& period)
3290 : {
3291 : SumBoundary_nowait(0, n_comp, IntVect(0), period);
3292 : }
3293 :
3294 : template <class FAB>
3295 : void
3296 : FabArray<FAB>::SumBoundary_nowait (int scomp, int ncomp, const Periodicity& period)
3297 : {
3298 : SumBoundary_nowait(scomp, ncomp, IntVect(0), period);
3299 : }
3300 :
3301 : template <class FAB>
3302 : void
3303 : FabArray<FAB>::SumBoundary_nowait (int scomp, int ncomp, IntVect const& nghost, const Periodicity& period)
3304 : {
3305 : SumBoundary_nowait(scomp, ncomp, this->nGrowVect(), nghost, period);
3306 : }
3307 :
3308 : template <class FAB>
3309 : void
3310 : FabArray<FAB>::SumBoundary_nowait (int scomp, int ncomp, IntVect const& src_nghost, IntVect const& dst_nghost, const Periodicity& period)
3311 : {
3312 : BL_PROFILE("FabArray<FAB>::SumBoundary_nowait()");
3313 :
3314 : if ( n_grow == IntVect::TheZeroVector() && boxArray().ixType().cellCentered()) { return; }
3315 :
3316 : AMREX_ALWAYS_ASSERT(src_nghost.allLE(n_grow));
3317 :
3318 : auto* tmp = new FabArray<FAB>( boxArray(), DistributionMap(), ncomp, src_nghost, MFInfo(), Factory() );
3319 : amrex::Copy(*tmp, *this, scomp, 0, ncomp, src_nghost);
3320 : this->setVal(typename FAB::value_type(0), scomp, ncomp, dst_nghost);
3321 : this->ParallelCopy_nowait(*tmp,0,scomp,ncomp,src_nghost,dst_nghost,period,FabArrayBase::ADD);
3322 :
3323 : // All local. Operation complete.
3324 : if (!this->pcd) { delete tmp; }
3325 : }
3326 :
3327 : template <class FAB>
3328 : void
3329 : FabArray<FAB>::SumBoundary_finish ()
3330 : {
3331 : BL_PROFILE("FabArray<FAB>::SumBoundary_finish()");
3332 :
3333 : // If pcd doesn't exist, ParallelCopy was all local and operation was fully completed in "SumBoundary_nowait".
3334 : if ( (n_grow == IntVect::TheZeroVector() && boxArray().ixType().cellCentered()) || !(this->pcd) ) { return; }
3335 :
3336 : auto* tmp = const_cast<FabArray<FAB>*> (this->pcd->src);
3337 : this->ParallelCopy_finish();
3338 : delete tmp;
3339 : }
3340 :
3341 : template <class FAB>
3342 : void
3343 : FabArray<FAB>::EnforcePeriodicity (const Periodicity& period)
3344 : {
3345 : BL_PROFILE("FabArray::EnforcePeriodicity");
3346 : if (period.isAnyPeriodic()) {
3347 : FBEP_nowait(0, nComp(), nGrowVect(), period, false, true);
3348 : FillBoundary_finish(); // unsafe unless isAnyPeriodic()
3349 : }
3350 : }
3351 :
3352 : template <class FAB>
3353 : void
3354 : FabArray<FAB>::EnforcePeriodicity (int scomp, int ncomp, const Periodicity& period)
3355 : {
3356 : BL_PROFILE("FabArray::EnforcePeriodicity");
3357 : if (period.isAnyPeriodic()) {
3358 : FBEP_nowait(scomp, ncomp, nGrowVect(), period, false, true);
3359 : FillBoundary_finish(); // unsafe unless isAnyPeriodic()
3360 : }
3361 : }
3362 :
3363 : template <class FAB>
3364 : void
3365 : FabArray<FAB>::EnforcePeriodicity (int scomp, int ncomp, const IntVect& nghost,
3366 : const Periodicity& period)
3367 : {
3368 : BL_PROFILE("FabArray::EnforcePeriodicity");
3369 : if (period.isAnyPeriodic()) {
3370 : FBEP_nowait(scomp, ncomp, nghost, period, false, true);
3371 : FillBoundary_finish(); // unsafe unless isAnyPeriodic()
3372 : }
3373 : }
3374 :
3375 : template <class FAB>
3376 : template <typename BUF>
3377 : void
3378 : FabArray<FAB>::FillBoundary_nowait (int scomp, int ncomp, const Periodicity& period, bool cross)
3379 : {
3380 : FBEP_nowait<BUF>(scomp, ncomp, nGrowVect(), period, cross);
3381 : }
3382 :
3383 : template <class FAB>
3384 : template <typename BUF>
3385 : void
3386 652910 : FabArray<FAB>::FillBoundary_nowait (int scomp, int ncomp, const IntVect& nghost,
3387 : const Periodicity& period, bool cross)
3388 : {
3389 652910 : FBEP_nowait<BUF>(scomp, ncomp, nghost, period, cross);
3390 652910 : }
3391 :
3392 : template <class FAB>
3393 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>Z>
3394 : void
3395 : FabArray<FAB>::BuildMask (const Box& phys_domain, const Periodicity& period,
3396 : value_type covered, value_type notcovered,
3397 : value_type physbnd, value_type interior)
3398 : {
3399 : BL_PROFILE("FabArray::BuildMask()");
3400 :
3401 : int ncomp = this->nComp();
3402 : const IntVect& ngrow = this->nGrowVect();
3403 :
3404 : Box domain = amrex::convert(phys_domain, boxArray().ixType());
3405 : for (int i = 0; i < AMREX_SPACEDIM; ++i) {
3406 : if (period.isPeriodic(i)) {
3407 : domain.grow(i, ngrow[i]);
3408 : }
3409 : }
3410 :
3411 : #ifdef AMREX_USE_GPU
3412 : if (Gpu::inLaunchRegion() && this->isFusingCandidate()) {
3413 : auto const& fa = this->arrays();
3414 : ParallelFor(*this, ngrow, ncomp,
3415 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
3416 : {
3417 : auto const& fab = fa[box_no];
3418 : Box vbx(fab);
3419 : vbx.grow(-ngrow);
3420 : if (vbx.contains(i,j,k)) {
3421 : fab(i,j,k,n) = interior;
3422 : } else if (domain.contains(i,j,k)) {
3423 : fab(i,j,k,n) = notcovered;
3424 : } else {
3425 : fab(i,j,k,n) = physbnd;
3426 : }
3427 : });
3428 : if (!Gpu::inNoSyncRegion()) {
3429 : Gpu::streamSynchronize();
3430 : }
3431 : } else
3432 : #endif
3433 : {
3434 : #ifdef AMREX_USE_OMP
3435 : #pragma omp parallel if (Gpu::notInLaunchRegion())
3436 : #endif
3437 : for (MFIter mfi(*this,TilingIfNotGPU()); mfi.isValid(); ++mfi)
3438 : {
3439 : auto const& fab = this->array(mfi);
3440 : Box const& fbx = mfi.growntilebox();
3441 : Box const& gbx = fbx & domain;
3442 : Box const& vbx = mfi.validbox();
3443 : AMREX_HOST_DEVICE_FOR_4D(fbx, ncomp, i, j, k, n,
3444 : {
3445 : if (vbx.contains(i,j,k)) {
3446 : fab(i,j,k,n) = interior;
3447 : } else if (gbx.contains(i,j,k)) {
3448 : fab(i,j,k,n) = notcovered;
3449 : } else {
3450 : fab(i,j,k,n) = physbnd;
3451 : }
3452 : });
3453 : }
3454 : }
3455 :
3456 : const FabArrayBase::FB& TheFB = this->getFB(ngrow,period);
3457 : setVal(covered, TheFB, 0, ncomp);
3458 : }
3459 :
3460 : template <class FAB>
3461 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
3462 : void
3463 409 : FabArray<FAB>::setVal (value_type val, const CommMetaData& thecmd, int scomp, int ncomp)
3464 : {
3465 : BL_PROFILE("FabArray::setVal(val, thecmd, scomp, ncomp)");
3466 :
3467 : #ifdef AMREX_USE_GPU
3468 : if (Gpu::inLaunchRegion())
3469 : {
3470 : CMD_local_setVal_gpu(val, thecmd, scomp, ncomp);
3471 : CMD_remote_setVal_gpu(val, thecmd, scomp, ncomp);
3472 : }
3473 : else
3474 : #endif
3475 : {
3476 : AMREX_ASSERT(thecmd.m_LocTags && thecmd.m_RcvTags);
3477 409 : const CopyComTagsContainer& LocTags = *(thecmd.m_LocTags);
3478 409 : const MapOfCopyComTagContainers& RcvTags = *(thecmd.m_RcvTags);
3479 409 : auto N_locs = static_cast<int>(LocTags.size());
3480 : #ifdef AMREX_USE_OMP
3481 : #pragma omp parallel for if (thecmd.m_threadsafe_loc)
3482 : #endif
3483 1204 : for (int i = 0; i < N_locs; ++i) {
3484 795 : const CopyComTag& tag = LocTags[i];
3485 795 : (*this)[tag.dstIndex].template setVal<RunOn::Host>(val, tag.dbox, scomp, ncomp);
3486 : }
3487 :
3488 409 : for (const auto & RcvTag : RcvTags) {
3489 0 : auto N = static_cast<int>(RcvTag.second.size());
3490 : #ifdef AMREX_USE_OMP
3491 : #pragma omp parallel for if (thecmd.m_threadsafe_rcv)
3492 : #endif
3493 0 : for (int i = 0; i < N; ++i) {
3494 0 : const CopyComTag& tag = RcvTag.second[i];
3495 0 : (*this)[tag.dstIndex].template setVal<RunOn::Host>(val, tag.dbox, scomp, ncomp);
3496 : }
3497 : }
3498 : }
3499 409 : }
3500 :
3501 : template <class FAB>
3502 : template <class F, std::enable_if_t<IsBaseFab<F>::value,int>>
3503 : LayoutData<int>
3504 : FabArray<FAB>::RecvLayoutMask (const CommMetaData& thecmd)
3505 : {
3506 : BL_PROFILE("FabArray::RecvLayoutMask()");
3507 :
3508 : LayoutData<int> r(this->boxArray(), this->DistributionMap());
3509 : #ifdef AMREX_USE_OMP
3510 : #pragma omp parallel if (thecmd.m_threadsafe_rcv)
3511 : #endif
3512 : for (MFIter mfi(r); mfi.isValid(); ++mfi) {
3513 : r[mfi] = 0;
3514 : }
3515 :
3516 : const CopyComTagsContainer& LocTags = *(thecmd.m_LocTags);
3517 : const MapOfCopyComTagContainers& RcvTags = *(thecmd.m_RcvTags);
3518 :
3519 : auto N_locs = static_cast<int>(LocTags.size());
3520 : for (int i = 0; i < N_locs; ++i) {
3521 : const CopyComTag& tag = LocTags[i];
3522 : r[tag.dstIndex] = 1;
3523 : }
3524 :
3525 : for (const auto & RcvTag : RcvTags) {
3526 : auto N = static_cast<int>(RcvTag.second.size());
3527 : for (int i = 0; i < N; ++i) {
3528 : const CopyComTag& tag = RcvTag.second[i];
3529 : r[tag.dstIndex] = 1;
3530 : }
3531 : }
3532 : return r;
3533 : }
3534 :
3535 : template <class FAB>
3536 : template <typename F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
3537 : typename F::value_type
3538 : FabArray<FAB>::norminf (int comp, int ncomp, IntVect const& nghost, bool local,
3539 : [[maybe_unused]] bool ignore_covered) const
3540 : {
3541 : BL_PROFILE("FabArray::norminf()");
3542 :
3543 : using RT = typename F::value_type;
3544 :
3545 : auto nm0 = RT(0.0);
3546 :
3547 : #ifdef AMREX_USE_EB
3548 : if ( this->is_cell_centered() && this->hasEBFabFactory() && ignore_covered )
3549 : {
3550 : const auto& ebfactory = dynamic_cast<EBFArrayBoxFactory const&>(this->Factory());
3551 : auto const& flags = ebfactory.getMultiEBCellFlagFab();
3552 : #ifdef AMREX_USE_GPU
3553 : if (Gpu::inLaunchRegion()) {
3554 : auto const& flagsma = flags.const_arrays();
3555 : auto const& ma = this->const_arrays();
3556 : nm0 = ParReduce(TypeList<ReduceOpMax>{}, TypeList<RT>{}, *this, nghost,
3557 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<RT>
3558 : {
3559 : if (flagsma[box_no](i,j,k).isCovered()) {
3560 : return RT(0.0);
3561 : } else {
3562 : auto tmp = RT(0.0);
3563 : auto const& a = ma[box_no];
3564 : for (int n = 0; n < ncomp; ++n) {
3565 : tmp = amrex::max(tmp, std::abs(a(i,j,k,comp+n)));
3566 : }
3567 : return tmp;
3568 : }
3569 : });
3570 : } else
3571 : #endif
3572 : {
3573 : #ifdef AMREX_USE_OMP
3574 : #pragma omp parallel reduction(max:nm0)
3575 : #endif
3576 : for (MFIter mfi(*this,true); mfi.isValid(); ++mfi) {
3577 : Box const& bx = mfi.growntilebox(nghost);
3578 : if (flags[mfi].getType(bx) != FabType::covered) {
3579 : auto const& flag = flags.const_array(mfi);
3580 : auto const& a = this->const_array(mfi);
3581 : AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
3582 : {
3583 : if (!flag(i,j,k).isCovered()) {
3584 : nm0 = std::max(nm0, std::abs(a(i,j,k,comp+n)));
3585 : }
3586 : });
3587 : }
3588 : }
3589 : }
3590 : }
3591 : else
3592 : #endif
3593 : {
3594 : #ifdef AMREX_USE_GPU
3595 : if (Gpu::inLaunchRegion()) {
3596 : auto const& ma = this->const_arrays();
3597 : nm0 = ParReduce(TypeList<ReduceOpMax>{}, TypeList<RT>{}, *this, nghost, ncomp,
3598 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept -> GpuTuple<RT>
3599 : {
3600 : return std::abs(ma[box_no](i,j,k,comp+n));
3601 : });
3602 : } else
3603 : #endif
3604 : {
3605 : #ifdef AMREX_USE_OMP
3606 : #pragma omp parallel reduction(max:nm0)
3607 : #endif
3608 : for (MFIter mfi(*this,true); mfi.isValid(); ++mfi) {
3609 : Box const& bx = mfi.growntilebox(nghost);
3610 : auto const& a = this->const_array(mfi);
3611 : AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
3612 : {
3613 : nm0 = std::max(nm0, std::abs(a(i,j,k,comp+n)));
3614 : });
3615 : }
3616 : }
3617 : }
3618 :
3619 : if (!local) {
3620 : ParallelAllReduce::Max(nm0, ParallelContext::CommunicatorSub());
3621 : }
3622 :
3623 : return nm0;
3624 : }
3625 :
3626 : template <class FAB>
3627 : template <typename IFAB, typename F, std::enable_if_t<IsBaseFab<F>::value,int> FOO>
3628 : typename F::value_type
3629 : FabArray<FAB>::norminf (FabArray<IFAB> const& mask, int comp, int ncomp,
3630 : IntVect const& nghost, bool local) const
3631 : {
3632 : BL_PROFILE("FabArray::norminf(mask)");
3633 :
3634 : using RT = typename F::value_type;
3635 :
3636 : auto nm0 = RT(0.0);
3637 :
3638 : #ifdef AMREX_USE_GPU
3639 : if (Gpu::inLaunchRegion()) {
3640 : auto const& ma = this->const_arrays();
3641 : auto const& maskma = mask.const_arrays();
3642 : nm0 = ParReduce(TypeList<ReduceOpMax>{}, TypeList<RT>{}, *this, IntVect(nghost),
3643 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<RT>
3644 : {
3645 : if (maskma[box_no](i,j,k)) {
3646 : auto tmp = RT(0.0);
3647 : auto const& a = ma[box_no];
3648 : for (int n = 0; n < ncomp; ++n) {
3649 : tmp = amrex::max(tmp, std::abs(a(i,j,k,comp+n)));
3650 : }
3651 : return tmp;
3652 : } else {
3653 : return RT(0.0);
3654 : }
3655 : });
3656 : } else
3657 : #endif
3658 : {
3659 : #ifdef AMREX_USE_OMP
3660 : #pragma omp parallel reduction(max:nm0)
3661 : #endif
3662 : for (MFIter mfi(*this,true); mfi.isValid(); ++mfi) {
3663 : Box const& bx = mfi.growntilebox(nghost);
3664 : auto const& a = this->const_array(mfi);
3665 : auto const& mskfab = mask.const_array(mfi);
3666 : AMREX_LOOP_4D(bx, ncomp, i, j, k, n,
3667 : {
3668 : if (mskfab(i,j,k)) {
3669 : nm0 = std::max(nm0, std::abs(a(i,j,k,comp+n)));
3670 : }
3671 : });
3672 : }
3673 : }
3674 :
3675 : if (!local) {
3676 : ParallelAllReduce::Max(nm0, ParallelContext::CommunicatorSub());
3677 : }
3678 :
3679 : return nm0;
3680 : }
3681 :
3682 : }
3683 :
3684 : #endif /*BL_FABARRAY_H*/
|