Line data Source code
1 : #ifndef AMREX_MultiFabUtil_H_
2 : #define AMREX_MultiFabUtil_H_
3 : #include <AMReX_Config.H>
4 :
5 : #include <AMReX_MultiFab.H>
6 : #include <AMReX_iMultiFab.H>
7 : #include <AMReX_LayoutData.H>
8 : #include <AMReX_MFIter.H>
9 : #include <AMReX_Array.H>
10 : #include <AMReX_Vector.H>
11 : #include <AMReX_MultiFabUtil_C.H>
12 :
13 : #include <AMReX_MultiFabUtilI.H>
14 :
15 : namespace amrex
16 : {
17 : //! Average nodal-based MultiFab onto cell-centered MultiFab.
18 : void average_node_to_cellcenter (MultiFab& cc, int dcomp,
19 : const MultiFab& nd, int scomp,
20 : int ncomp, int ngrow = 0);
21 :
22 : /**
23 : * \brief Average edge-based MultiFab onto cell-centered MultiFab.
24 : *
25 : * This fills in \p ngrow ghost cells in the cell-centered MultiFab. Both cell centered and
26 : * edge centered MultiFabs need to have \p ngrow ghost values.
27 : */
28 : void average_edge_to_cellcenter (MultiFab& cc, int dcomp,
29 : const Vector<const MultiFab*>& edge,
30 : int ngrow = 0);
31 : //! Average face-based MultiFab onto cell-centered MultiFab.
32 : void average_face_to_cellcenter (MultiFab& cc, int dcomp,
33 : const Vector<const MultiFab*>& fc,
34 : int ngrow = 0);
35 : //! Average face-based FabArray onto cell-centered FabArray.
36 : template <typename CMF, typename FMF,
37 : std::enable_if_t<IsFabArray_v<CMF> && IsFabArray_v<FMF>, int> = 0>
38 : void average_face_to_cellcenter (CMF& cc, int dcomp,
39 : const Array<const FMF*,AMREX_SPACEDIM>& fc,
40 : int ngrow = 0);
41 : //! Average face-based MultiFab onto cell-centered MultiFab with geometric weighting.
42 : void average_face_to_cellcenter (MultiFab& cc,
43 : const Vector<const MultiFab*>& fc,
44 : const Geometry& geom);
45 : //! Average face-based MultiFab onto cell-centered MultiFab with geometric weighting.
46 : void average_face_to_cellcenter (MultiFab& cc,
47 : const Array<const MultiFab*,AMREX_SPACEDIM>& fc,
48 : const Geometry& geom);
49 : //! Average cell-centered MultiFab onto face-based MultiFab with geometric weighting.
50 : void average_cellcenter_to_face (const Vector<MultiFab*>& fc,
51 : const MultiFab& cc,
52 : const Geometry& geom,
53 : int ncomp = 1,
54 : bool use_harmonic_averaging = false);
55 : //! Average cell-centered MultiFab onto face-based MultiFab with geometric weighting.
56 : void average_cellcenter_to_face (const Array<MultiFab*,AMREX_SPACEDIM>& fc,
57 : const MultiFab& cc,
58 : const Geometry& geom,
59 : int ncomp = 1,
60 : bool use_harmonic_averaging = false);
61 :
62 : //! Average fine face-based FabArray onto crse face-based FabArray.
63 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> = 0>
64 : void average_down_faces (const Vector<const MF*>& fine,
65 : const Vector<MF*>& crse,
66 : const IntVect& ratio,
67 : int ngcrse = 0);
68 : //! Average fine face-based FabArray onto crse face-based FabArray.
69 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> = 0>
70 : void average_down_faces (const Vector<const MF*>& fine,
71 : const Vector<MF*>& crse,
72 : int ratio,
73 : int ngcrse = 0);
74 : //! Average fine face-based FabArray onto crse face-based FabArray.
75 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> = 0>
76 : void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
77 : const Array<MF*,AMREX_SPACEDIM>& crse,
78 : const IntVect& ratio,
79 : int ngcrse = 0);
80 : //! Average fine face-based FabArray onto crse face-based FabArray.
81 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> = 0>
82 : void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
83 : const Array<MF*,AMREX_SPACEDIM>& crse,
84 : int ratio,
85 : int ngcrse = 0);
86 : /**
87 : * \brief This version does average down for one face direction.
88 : *
89 : * It uses the IndexType of MultiFabs to determine the direction.
90 : * It is expected that one direction is nodal and the rest are cell-centered.
91 : */
92 : template <typename FAB>
93 : void average_down_faces (const FabArray<FAB>& fine, FabArray<FAB>& crse,
94 : const IntVect& ratio, int ngcrse=0);
95 :
96 : // This version takes periodicity into account.
97 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> = 0>
98 : void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
99 : const Array<MF*,AMREX_SPACEDIM>& crse,
100 : const IntVect& ratio, const Geometry& crse_geom);
101 : // This version takes periodicity into account.
102 : template <typename FAB>
103 : void average_down_faces (const FabArray<FAB>& fine, FabArray<FAB>& crse,
104 : const IntVect& ratio, const Geometry& crse_geom);
105 :
106 : //! Average fine edge-based MultiFab onto crse edge-based MultiFab.
107 : void average_down_edges (const Vector<const MultiFab*>& fine,
108 : const Vector<MultiFab*>& crse,
109 : const IntVect& ratio,
110 : int ngcrse = 0);
111 : void average_down_edges (const Array<const MultiFab*,AMREX_SPACEDIM>& fine,
112 : const Array<MultiFab*,AMREX_SPACEDIM>& crse,
113 : const IntVect& ratio,
114 : int ngcrse = 0);
115 : //! This version does average down for one direction.
116 : //! It uses the IndexType of MultiFabs to determine the direction.
117 : //! It is expected that one direction is cell-centered and the rest are nodal.
118 : void average_down_edges (const MultiFab& fine, MultiFab& crse,
119 : const IntVect& ratio, int ngcrse=0);
120 :
121 : //! Average fine node-based MultiFab onto crse node-centered MultiFab.
122 : template <typename FAB>
123 : void average_down_nodal (const FabArray<FAB>& S_fine,
124 : FabArray<FAB>& S_crse,
125 : const IntVect& ratio,
126 : int ngcrse = 0,
127 : bool mfiter_is_definitely_safe=false);
128 :
129 : /**
130 : * \brief Volume weighed average of fine MultiFab onto coarse MultiFab.
131 : *
132 : * Both MultiFabs are assumed to be cell-centered. This routine DOES NOT assume that
133 : * the crse BoxArray is a coarsened version of the fine BoxArray.
134 : */
135 : void average_down (const MultiFab& S_fine, MultiFab& S_crse,
136 : const Geometry& fgeom, const Geometry& cgeom,
137 : int scomp, int ncomp, const IntVect& ratio);
138 : void average_down (const MultiFab& S_fine, MultiFab& S_crse,
139 : const Geometry& fgeom, const Geometry& cgeom,
140 : int scomp, int ncomp, int rr);
141 :
142 : //! Average MultiFab onto crse MultiFab without volume weighting. This
143 : //! routine DOES NOT assume that the crse BoxArray is a coarsened version of
144 : //! the fine BoxArray. Work for both cell-centered and nodal MultiFabs.
145 : template<typename FAB>
146 : void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
147 : int scomp, int ncomp, const IntVect& ratio);
148 : template<typename FAB>
149 : void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
150 : int scomp, int ncomp, int rr);
151 :
152 : //! Add a coarsened version of the data contained in the S_fine MultiFab to
153 : //! S_crse, including ghost cells.
154 : void sum_fine_to_coarse (const MultiFab& S_Fine, MultiFab& S_crse,
155 : int scomp, int ncomp,
156 : const IntVect& ratio,
157 : const Geometry& cgeom, const Geometry& fgeom);
158 :
159 : //! Output state data for a single zone
160 : void print_state (const MultiFab& mf, const IntVect& cell, int n=-1,
161 : const IntVect& ng = IntVect::TheZeroVector());
162 :
163 : //! Write each fab individually
164 : void writeFabs (const MultiFab& mf, const std::string& name);
165 : void writeFabs (const MultiFab& mf, int comp, int ncomp, const std::string& name);
166 :
167 : //! Extract a slice from the given cell-centered MultiFab at coordinate
168 : //! "coord" along direction "dir".
169 : std::unique_ptr<MultiFab> get_slice_data(int dir, Real coord,
170 : const MultiFab& cc,
171 : const Geometry& geom, int start_comp, int ncomp,
172 : bool interpolate=false);
173 :
174 : /**
175 : * \brief Get data in a cell of MultiFab/FabArray
176 : *
177 : * This returns a Vector containing the data in a given cell, if it's
178 : * available on a process. The returned Vector is empty if a process
179 : * does not have the given cell.
180 : */
181 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> FOO = 0>
182 : Vector<typename MF::value_type> get_cell_data (MF const& mf, IntVect const& cell);
183 :
184 : /**
185 : * \brief Get data in a line of MultiFab/FabArray
186 : *
187 : * This returns a MultiFab/FabArray containing the data in a line
188 : * specified by a direction and a cell.
189 : */
190 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> FOO = 0>
191 : MF get_line_data (MF const& mf, int dir, IntVect const& cell);
192 :
193 : //! Return an iMultiFab that has the same BoxArray and DistributionMapping
194 : //! as the coarse MultiFab cmf. Cells covered by the coarsened fine grids
195 : //! are set to fine_value, whereas other cells are set to crse_value.
196 : template <typename FAB>
197 : iMultiFab makeFineMask (const FabArray<FAB>& cmf, const BoxArray& fba, const IntVect& ratio,
198 : int crse_value = 0, int fine_value = 1);
199 : iMultiFab makeFineMask (const BoxArray& cba, const DistributionMapping& cdm,
200 : const BoxArray& fba, const IntVect& ratio,
201 : int crse_value = 0, int fine_value = 1);
202 : template <typename FAB>
203 : iMultiFab makeFineMask (const FabArray<FAB>& cmf, const BoxArray& fba, const IntVect& ratio,
204 : Periodicity const& period, int crse_value, int fine_value);
205 : iMultiFab makeFineMask (const BoxArray& cba, const DistributionMapping& cdm,
206 : const IntVect& cnghost, const BoxArray& fba, const IntVect& ratio,
207 : Periodicity const& period, int crse_value, int fine_value);
208 : template <typename FAB>
209 : iMultiFab makeFineMask (const FabArray<FAB>& cmf, const FabArray<FAB>& fmf,
210 : const IntVect& cnghost, const IntVect& ratio,
211 : Periodicity const& period, int crse_value, int fine_value);
212 : template <typename FAB>
213 : iMultiFab makeFineMask (const FabArray<FAB>& cmf, const FabArray<FAB>& fmf,
214 : const IntVect& cnghost, const IntVect& ratio,
215 : Periodicity const& period, int crse_value, int fine_value,
216 : LayoutData<int>& has_cf);
217 :
218 : MultiFab makeFineMask (const BoxArray& cba, const DistributionMapping& cdm,
219 : const BoxArray& fba, const IntVect& ratio,
220 : Real crse_value, Real fine_value);
221 :
222 : //! Computes divergence of face-data stored in the umac MultiFab.
223 : void computeDivergence (MultiFab& divu, const Array<MultiFab const*,AMREX_SPACEDIM>& umac,
224 : const Geometry& geom);
225 :
226 : //! Computes gradient of face-data stored in the umac MultiFab.
227 : void computeGradient (MultiFab& grad, const Array<MultiFab const*,AMREX_SPACEDIM>& umac,
228 : const Geometry& geom);
229 :
230 : //! Convert iMultiFab to MultiFab
231 : MultiFab ToMultiFab (const iMultiFab& imf);
232 : //! Convert iMultiFab to Long
233 : FabArray<BaseFab<Long> > ToLongMultiFab (const iMultiFab& imf);
234 :
235 : //! Periodic shift MultiFab
236 : MultiFab periodicShift (MultiFab const& mf, IntVect const& offset,
237 : Periodicity const& period);
238 :
239 : //! example: auto mf = amrex::cast<MultiFab>(imf);
240 : template <typename T, typename U>
241 : T cast (U const& mf_in)
242 : {
243 : T mf_out(mf_in.boxArray(), mf_in.DistributionMap(), mf_in.nComp(), mf_in.nGrowVect());
244 :
245 : #ifdef AMREX_USE_OMP
246 : #pragma omp parallel if (Gpu::notInLaunchRegion())
247 : #endif
248 : for (MFIter mfi(mf_in); mfi.isValid(); ++mfi)
249 : {
250 : const Long n = mfi.fabbox().numPts() * mf_in.nComp();
251 : auto * pdst = mf_out[mfi].dataPtr();
252 : auto const* psrc = mf_in [mfi].dataPtr();
253 : AMREX_HOST_DEVICE_PARALLEL_FOR_1D ( n, i,
254 : {
255 : pdst[i] = static_cast<typename U::value_type>(psrc[i]); // NOLINT(bugprone-signed-char-misuse)
256 : });
257 : }
258 : return mf_out;
259 : }
260 :
261 : /**
262 : * \brief Reduce FabArray/MultiFab data to a plane.
263 : *
264 : * This function takes a FabArray/MultiFab and reduces its data to a
265 : * plane. The return data are stored in a BaseFab with only one cell in
266 : * the normal direction of the plane. The index range of the BaseFab in
267 : * the other directions is the same as the provided domain Box. If data
268 : * do not exist along a certain line, the value is set to the minimum,
269 : * maximum and zero, for reduce max, min and sum, respectively. The
270 : * reduction is local and the user may need to do MPI communication
271 : * afterwards if needed.
272 : *
273 : * In the example code below, the sum along each line at (i,j) in the
274 : * z-direction is computed and stored at (i,j,0) of the returned
275 : * BaseFab.
276 : \verbatim
277 : int dir = 2; // z-direction
278 : auto const& domain_box = geom.Domain();
279 : auto const& ma = mf.const_arrays();
280 : auto rr = ReduceToPlane<ReduceOpSum,Real>(dir, domain_box, mf,
281 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) -> Real
282 : {
283 : return ma[box_no](i,j,k); // data at (i,j,k) of Box box_no
284 : });
285 : \endverbatim
286 : *
287 : * Below is another example. This finds the maximum value in the
288 : * x-direction and stores the maximum value and the i-index. An MPI
289 : * reduce is then called to further reduce the data to the root process
290 : * 0.
291 : \verbatim
292 : int dir = 0; // x-direction
293 : auto const& domain_box = geom.Domain().surroundingNodes(); // nodal data
294 : auto const& ma = mf.const_arrays();
295 : auto rr = ReduceToPlane<ReduceOpMax,KeyValuePair<Real,int>>
296 : (dir, domain_box, mf,
297 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k)
298 : -> KeyValuePair<Real,int>
299 : {
300 : return {ma[box_no](i,j,k), i};
301 : });
302 : ParallelReduce::Max(rr.dataPtr(), rr.size(), root,
303 : ParallelDescriptor::Communicator());
304 : // Process root now has the final results.
305 : \endverbatim
306 : *
307 : * \tparam Op reduce operator (e.g., ReduceOpSum, ReduceOpMin and ReduceOpMax)
308 : * \tparam T data type of reduction result
309 : * \tparam FAB FabArray/MultiFab type
310 : * \tparam F callable type like a lambda function
311 : *
312 : * \param direction normal direction of the plane (e.g., 0, 1 and 2)
313 : * \param domain domain Box
314 : * \param mf a FabArray/MultiFab object specifying the iteration space
315 : * \param f a callable object returning T. It takes four ints,
316 : * where the first int is the local box index and the others
317 : * are spatial indices for x, y, and z-directions.
318 : *
319 : * \return reduction result (BaseFab<T>)
320 : */
321 : template <typename Op, typename T, typename FAB, typename F,
322 : std::enable_if_t<IsBaseFab<FAB>::value
323 : #ifndef AMREX_USE_CUDA
324 : && IsCallableR<T,F,int,int,int,int>::value
325 : #endif
326 : , int> FOO = 0>
327 : BaseFab<T>
328 : ReduceToPlane (int direction, Box const& domain, FabArray<FAB> const& mf, F const& f);
329 :
330 : /**
331 : * \brief Sum MultiFab data to line
332 : *
333 : * Return a HostVector that contains the sum of the given MultiFab data in the plane
334 : * with the given normal direction. The size of the vector is
335 : * domain.length(direction) x ncomp. The vector is actually a 2D array, where the
336 : * element for component icomp at spatial index k is at [icomp*ncomp+k].
337 : *
338 : * \param mf MultiFab data for summing
339 : * \param icomp starting component
340 : * \param ncomp number of components
341 : * \param domain the domain
342 : * \param direction the direction of the line
343 : * \param local If false, reduce across MPI processes.
344 : */
345 : Gpu::HostVector<Real> sumToLine (MultiFab const& mf, int icomp, int ncomp,
346 : Box const& domain, int direction, bool local = false);
347 :
348 : /** \brief Volume weighted sum for a vector of MultiFabs
349 : *
350 : * Return a volume weighted sum of MultiFabs of AMR data. The sum is
351 : * perform on a single component of the data. If the MultiFabs are
352 : * built with EB Factories, the cut cell volume fraction will be
353 : * included in the weight.
354 : */
355 : Real volumeWeightedSum (Vector<MultiFab const*> const& mf, int icomp,
356 : Vector<Geometry> const& geom,
357 : Vector<IntVect> const& ratio,
358 : bool local = false);
359 :
360 : /**
361 : * \brief Fourth-order interpolation from fine to coarse level.
362 : *
363 : * This is for high-order "average-down" of finite-difference data. If
364 : * ghost cell data are used, it's the caller's responsibility to fill
365 : * the ghost cells before calling this function.
366 : *
367 : * \param cmf coarse data
368 : * \param scomp starting component
369 : * \param ncomp number of component
370 : * \param fmf fine data
371 : * \param ratio refinement ratio.
372 : */
373 : void FourthOrderInterpFromFineToCoarse (MultiFab& cmf, int scomp, int ncomp,
374 : MultiFab const& fmf,
375 : IntVect const& ratio);
376 :
377 : /**
378 : * \brief Fill MultiFab with random numbers from uniform distribution
379 : *
380 : * The uniform distribution range is [0.0, 1.0) for CPU and SYCL, it's
381 : * (0,1] for CUDA and HIP. All cells including ghost cells are filled.
382 : *
383 : * \param mf MultiFab
384 : * \param scomp starting component
385 : * \param ncomp number of component
386 : */
387 : void FillRandom (MultiFab& mf, int scomp, int ncomp);
388 :
389 : /**
390 : * \brief Fill MultiFab with random numbers from normal distribution
391 : *
392 : * All cells including ghost cells are filled.
393 : *
394 : * \param mf MultiFab
395 : * \param scomp starting component
396 : * \param ncomp number of component
397 : * \param mean mean of normal distribution
398 : * \param stddev standard deviation of normal distribution
399 : */
400 : void FillRandomNormal (MultiFab& mf, int scomp, int ncomp, Real mean, Real stddev);
401 :
402 : /**
403 : * \brief Convexify AMR data
404 : *
405 : * This function "convexifies" the AMR data by removing cells that are
406 : * covered by fine levels from coarse level MultiFabs. This could be
407 : * useful for visualization. The returned MultiFabs on coarse levels
408 : * have different BoxArrays from the original BoxArrays. For the finest
409 : * level, the data is simply copied to the returned object. The returned
410 : * MultiFabs have no ghost cells. For nodal data, the nodes on the
411 : * coarse/fine interface exist on both levels.
412 : */
413 : [[nodiscard]] Vector<MultiFab> convexify (Vector<MultiFab const*> const& mf,
414 : Vector<IntVect> const& refinement_ratio);
415 : }
416 :
417 : namespace amrex {
418 :
419 : template <typename FAB>
420 : iMultiFab
421 : makeFineMask (const FabArray<FAB>& cmf, const BoxArray& fba, const IntVect& ratio,
422 : int crse_value, int fine_value)
423 : {
424 : return makeFineMask(cmf.boxArray(), cmf.DistributionMap(), cmf.nGrowVect(),
425 : fba, ratio, Periodicity::NonPeriodic(), crse_value, fine_value);
426 : }
427 :
428 : template <typename FAB>
429 : iMultiFab
430 : makeFineMask (const FabArray<FAB>& cmf, const BoxArray& fba, const IntVect& ratio,
431 : Periodicity const& period, int crse_value, int fine_value)
432 : {
433 : return makeFineMask(cmf.boxArray(), cmf.DistributionMap(), cmf.nGrowVect(),
434 : fba, ratio, period, crse_value, fine_value);
435 : }
436 :
437 : template <typename FAB>
438 : iMultiFab
439 : makeFineMask (const FabArray<FAB>& cmf, const FabArray<FAB>& fmf,
440 : const IntVect& cnghost, const IntVect& ratio,
441 : Periodicity const& period, int crse_value, int fine_value)
442 : {
443 : iMultiFab mask(cmf.boxArray(), cmf.DistributionMap(), 1, cnghost);
444 : mask.setVal(crse_value);
445 :
446 : iMultiFab foo(amrex::coarsen(fmf.boxArray(),ratio), fmf.DistributionMap(),
447 : 1, 0, MFInfo().SetAlloc(false));
448 : const FabArrayBase::CPC& cpc = mask.getCPC(cnghost,foo,IntVect::TheZeroVector(),period);
449 : mask.setVal(fine_value, cpc, 0, 1);
450 :
451 : return mask;
452 : }
453 :
454 : template <typename FAB>
455 : iMultiFab
456 : makeFineMask (const FabArray<FAB>& cmf, const FabArray<FAB>& fmf,
457 : const IntVect& cnghost, const IntVect& ratio,
458 : Periodicity const& period, int crse_value, int fine_value,
459 : LayoutData<int>& has_cf)
460 : {
461 : iMultiFab mask(cmf.boxArray(), cmf.DistributionMap(), 1, cnghost);
462 : mask.setVal(crse_value);
463 :
464 : iMultiFab foo(amrex::coarsen(fmf.boxArray(),ratio), fmf.DistributionMap(),
465 : 1, 0, MFInfo().SetAlloc(false));
466 : const FabArrayBase::CPC& cpc = mask.getCPC(cnghost,foo,IntVect::TheZeroVector(),period);
467 : mask.setVal(fine_value, cpc, 0, 1);
468 :
469 : has_cf = mask.RecvLayoutMask(cpc);
470 :
471 : return mask;
472 : }
473 :
474 : //! Average fine node-based MultiFab onto crse node-based MultiFab.
475 : //! This routine assumes that the crse BoxArray is a coarsened version of the fine BoxArray.
476 : template <typename FAB>
477 0 : void average_down_nodal (const FabArray<FAB>& fine, FabArray<FAB>& crse,
478 : const IntVect& ratio, int ngcrse, bool mfiter_is_definitely_safe)
479 : {
480 : AMREX_ASSERT(fine.is_nodal());
481 : AMREX_ASSERT(crse.is_nodal());
482 : AMREX_ASSERT(crse.nComp() == fine.nComp());
483 :
484 0 : int ncomp = crse.nComp();
485 : using value_type = typename FAB::value_type;
486 :
487 0 : if (mfiter_is_definitely_safe || isMFIterSafe(fine, crse))
488 : {
489 : #ifdef AMREX_USE_OMP
490 : #pragma omp parallel if (Gpu::notInLaunchRegion())
491 : #endif
492 0 : for (MFIter mfi(crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
493 : {
494 0 : const Box& bx = mfi.growntilebox(ngcrse);
495 0 : Array4<value_type> const& crsearr = crse.array(mfi);
496 0 : Array4<value_type const> const& finearr = fine.const_array(mfi);
497 :
498 0 : AMREX_LAUNCH_HOST_DEVICE_LAMBDA ( bx, tbx,
499 : {
500 : amrex_avgdown_nodes(tbx,crsearr,finearr,0,0,ncomp,ratio);
501 : });
502 : }
503 : }
504 : else
505 : {
506 0 : FabArray<FAB> ctmp(amrex::coarsen(fine.boxArray(),ratio), fine.DistributionMap(),
507 : ncomp, ngcrse);
508 0 : average_down_nodal(fine, ctmp, ratio, ngcrse);
509 0 : crse.ParallelCopy(ctmp,0,0,ncomp,ngcrse,ngcrse);
510 : }
511 0 : }
512 :
513 : // *************************************************************************************************************
514 :
515 : // Average fine cell-based MultiFab onto crse cell-centered MultiFab.
516 : // We do NOT assume that the coarse layout is a coarsened version of the fine layout.
517 : // This version does NOT use volume-weighting
518 : template<typename FAB>
519 46 : void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse, int scomp, int ncomp, int rr)
520 : {
521 92 : average_down(S_fine,S_crse,scomp,ncomp,rr*IntVect::TheUnitVector());
522 46 : }
523 :
524 : template<typename FAB>
525 30730 : void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
526 : int scomp, int ncomp, const IntVect& ratio)
527 : {
528 : BL_PROFILE("amrex::average_down");
529 : AMREX_ASSERT(S_crse.nComp() == S_fine.nComp());
530 : AMREX_ASSERT((S_crse.is_cell_centered() && S_fine.is_cell_centered()) ||
531 : (S_crse.is_nodal() && S_fine.is_nodal()));
532 :
533 : using value_type = typename FAB::value_type;
534 :
535 30730 : bool is_cell_centered = S_crse.is_cell_centered();
536 :
537 : //
538 : // Coarsen() the fine stuff on processors owning the fine data.
539 : //
540 61460 : BoxArray crse_S_fine_BA = S_fine.boxArray(); crse_S_fine_BA.coarsen(ratio);
541 :
542 30730 : if (crse_S_fine_BA == S_crse.boxArray() && S_fine.DistributionMap() == S_crse.DistributionMap())
543 : {
544 : #ifdef AMREX_USE_GPU
545 : if (Gpu::inLaunchRegion() && S_crse.isFusingCandidate()) {
546 : auto const& crsema = S_crse.arrays();
547 : auto const& finema = S_fine.const_arrays();
548 : if (is_cell_centered) {
549 : ParallelFor(S_crse, IntVect(0), ncomp,
550 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
551 : {
552 : amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
553 : });
554 : } else {
555 : ParallelFor(S_crse, IntVect(0), ncomp,
556 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
557 : {
558 : amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
559 : });
560 : }
561 : if (!Gpu::inNoSyncRegion()) {
562 : Gpu::streamSynchronize();
563 : }
564 : } else
565 : #endif
566 : {
567 : #ifdef AMREX_USE_OMP
568 : #pragma omp parallel if (Gpu::notInLaunchRegion())
569 : #endif
570 7232 : for (MFIter mfi(S_crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
571 : {
572 : // NOTE: The tilebox is defined at the coarse level.
573 3616 : const Box& bx = mfi.tilebox();
574 3616 : Array4<value_type> const& crsearr = S_crse.array(mfi);
575 3616 : Array4<value_type const> const& finearr = S_fine.const_array(mfi);
576 :
577 3616 : if (is_cell_centered) {
578 0 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
579 : {
580 : amrex_avgdown(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
581 : });
582 : } else {
583 225048 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
584 : {
585 : amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
586 : });
587 : }
588 : }
589 : }
590 : }
591 : else
592 : {
593 81342 : FabArray<FAB> crse_S_fine(crse_S_fine_BA, S_fine.DistributionMap(), ncomp, 0, MFInfo(),DefaultFabFactory<FAB>());
594 :
595 : #ifdef AMREX_USE_GPU
596 : if (Gpu::inLaunchRegion() && crse_S_fine.isFusingCandidate()) {
597 : auto const& crsema = crse_S_fine.arrays();
598 : auto const& finema = S_fine.const_arrays();
599 : if (is_cell_centered) {
600 : ParallelFor(crse_S_fine, IntVect(0), ncomp,
601 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
602 : {
603 : amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
604 : });
605 : } else {
606 : ParallelFor(crse_S_fine, IntVect(0), ncomp,
607 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
608 : {
609 : amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
610 : });
611 : }
612 : if (!Gpu::inNoSyncRegion()) {
613 : Gpu::streamSynchronize();
614 : }
615 : } else
616 : #endif
617 : {
618 : #ifdef AMREX_USE_OMP
619 : #pragma omp parallel if (Gpu::notInLaunchRegion())
620 : #endif
621 85778 : for (MFIter mfi(crse_S_fine,TilingIfNotGPU()); mfi.isValid(); ++mfi)
622 : {
623 : // NOTE: The tilebox is defined at the coarse level.
624 58664 : const Box& bx = mfi.tilebox();
625 58664 : Array4<value_type> const& crsearr = crse_S_fine.array(mfi);
626 58664 : Array4<value_type const> const& finearr = S_fine.const_array(mfi);
627 :
628 : // NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab
629 : // because the crse fab is a temporary which was made starting at comp 0, it is
630 : // not part of the actual crse multifab which came in.
631 :
632 58664 : if (is_cell_centered) {
633 0 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
634 : {
635 : amrex_avgdown(i,j,k,n,crsearr,finearr,0,scomp,ratio);
636 : });
637 : } else {
638 17710800 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
639 : {
640 : amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,0,scomp,ratio);
641 : });
642 : }
643 : }
644 : }
645 :
646 27114 : S_crse.ParallelCopy(crse_S_fine,0,scomp,ncomp);
647 : }
648 30730 : }
649 :
650 :
651 :
652 :
653 :
654 : /**
655 : * \brief Returns part of a norm based on two MultiFabs.
656 : *
657 : * The MultiFabs MUST have the same underlying BoxArray.
658 : * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
659 : * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
660 : */
661 : template <typename F>
662 : Real
663 : NormHelper (const MultiFab& x, int xcomp,
664 : const MultiFab& y, int ycomp,
665 : F const& f,
666 : int numcomp, IntVect nghost, bool local)
667 : {
668 : BL_ASSERT(x.boxArray() == y.boxArray());
669 : BL_ASSERT(x.DistributionMap() == y.DistributionMap());
670 : BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
671 :
672 : Real sm = Real(0.0);
673 : #ifdef AMREX_USE_GPU
674 : if (Gpu::inLaunchRegion()) {
675 : auto const& xma = x.const_arrays();
676 : auto const& yma = y.const_arrays();
677 : sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<Real>{}, x, nghost,
678 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<Real>
679 : {
680 : Real t = Real(0.0);
681 : auto const& xfab = xma[box_no];
682 : auto const& yfab = yma[box_no];
683 : for (int n = 0; n < numcomp; ++n) {
684 : t += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
685 : }
686 : return t;
687 : });
688 : } else
689 : #endif
690 : {
691 : #ifdef AMREX_USE_OMP
692 : #pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
693 : #endif
694 : for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
695 : {
696 : Box const& bx = mfi.growntilebox(nghost);
697 : Array4<Real const> const& xfab = x.const_array(mfi);
698 : Array4<Real const> const& yfab = y.const_array(mfi);
699 : AMREX_LOOP_4D(bx, numcomp, i, j, k, n,
700 : {
701 : sm += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
702 : });
703 : }
704 : }
705 :
706 : if (!local) {
707 : ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
708 : }
709 :
710 : return sm;
711 : }
712 :
713 : /**
714 : * \brief Returns part of a norm based on three MultiFabs
715 : *
716 : * The MultiFabs MUST have the same underlying BoxArray.
717 : * The Predicate pf is used to test the mask
718 : * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
719 : * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
720 : */
721 : template <typename MMF, typename Pred, typename F>
722 : Real
723 : NormHelper (const MMF& mask,
724 : const MultiFab& x, int xcomp,
725 : const MultiFab& y, int ycomp,
726 : Pred const& pf,
727 : F const& f,
728 : int numcomp, IntVect nghost, bool local)
729 : {
730 : BL_ASSERT(x.boxArray() == y.boxArray());
731 : BL_ASSERT(x.boxArray() == mask.boxArray());
732 : BL_ASSERT(x.DistributionMap() == y.DistributionMap());
733 : BL_ASSERT(x.DistributionMap() == mask.DistributionMap());
734 : BL_ASSERT(x.nGrowVect().allGE(nghost) && y.nGrowVect().allGE(nghost));
735 : BL_ASSERT(mask.nGrowVect().allGE(nghost));
736 :
737 : Real sm = Real(0.0);
738 : #ifdef AMREX_USE_GPU
739 : if (Gpu::inLaunchRegion()) {
740 : auto const& xma = x.const_arrays();
741 : auto const& yma = y.const_arrays();
742 : auto const& mma = mask.const_arrays();
743 : sm = ParReduce(TypeList<ReduceOpSum>{}, TypeList<Real>{}, x, nghost,
744 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple<Real>
745 : {
746 : Real t = Real(0.0);
747 : if (pf(mma[box_no](i,j,k))) {
748 : auto const& xfab = xma[box_no];
749 : auto const& yfab = yma[box_no];
750 : for (int n = 0; n < numcomp; ++n) {
751 : t += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
752 : }
753 : }
754 : return t;
755 : });
756 : } else
757 : #endif
758 : {
759 : #ifdef AMREX_USE_OMP
760 : #pragma omp parallel if (!system::regtest_reduction) reduction(+:sm)
761 : #endif
762 : for (MFIter mfi(x,true); mfi.isValid(); ++mfi)
763 : {
764 : Box const& bx = mfi.growntilebox(nghost);
765 : Array4<Real const> const& xfab = x.const_array(mfi);
766 : Array4<Real const> const& yfab = y.const_array(mfi);
767 : auto const& mfab = mask.const_array(mfi);
768 : AMREX_LOOP_4D(bx, numcomp, i, j, k, n,
769 : {
770 : if (pf(mfab(i,j,k))) {
771 : sm += f(xfab(i,j,k,xcomp+n) , yfab(i,j,k,ycomp+n));
772 : }
773 : });
774 : }
775 : }
776 :
777 : if (!local) {
778 : ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub());
779 : }
780 :
781 : return sm;
782 : }
783 :
784 : template <typename CMF, typename FMF,
785 : std::enable_if_t<IsFabArray_v<CMF> && IsFabArray_v<FMF>, int> FOO>
786 : void average_face_to_cellcenter (CMF& cc, int dcomp,
787 : const Array<const FMF*,AMREX_SPACEDIM>& fc,
788 : int ngrow)
789 : {
790 : AMREX_ASSERT(cc.nComp() >= dcomp + AMREX_SPACEDIM);
791 : AMREX_ASSERT(fc[0]->nComp() == 1);
792 :
793 : #ifdef AMREX_USE_GPU
794 : if (Gpu::inLaunchRegion() && cc.isFusingCandidate()) {
795 : auto const& ccma = cc.arrays();
796 : AMREX_D_TERM(auto const& fxma = fc[0]->const_arrays();,
797 : auto const& fyma = fc[1]->const_arrays();,
798 : auto const& fzma = fc[2]->const_arrays(););
799 : ParallelFor(cc, IntVect(ngrow),
800 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept
801 : {
802 : #if (AMREX_SPACEDIM == 1)
803 : GeometryData gd{};
804 : gd.coord = 0;
805 : #endif
806 : amrex_avg_fc_to_cc(i,j,k, ccma[box_no], AMREX_D_DECL(fxma[box_no],
807 : fyma[box_no],
808 : fzma[box_no]),
809 : dcomp
810 : #if (AMREX_SPACEDIM == 1)
811 : , gd
812 : #endif
813 : );
814 : });
815 : if (!Gpu::inNoSyncRegion()) {
816 : Gpu::streamSynchronize();
817 : }
818 : } else
819 : #endif
820 : {
821 : #ifdef AMREX_USE_OMP
822 : #pragma omp parallel if (Gpu::notInLaunchRegion())
823 : #endif
824 : for (MFIter mfi(cc,TilingIfNotGPU()); mfi.isValid(); ++mfi)
825 : {
826 : const Box bx = mfi.growntilebox(ngrow);
827 : auto const& ccarr = cc.array(mfi);
828 : AMREX_D_TERM(auto const& fxarr = fc[0]->const_array(mfi);,
829 : auto const& fyarr = fc[1]->const_array(mfi);,
830 : auto const& fzarr = fc[2]->const_array(mfi););
831 :
832 : #if (AMREX_SPACEDIM == 1)
833 : AMREX_HOST_DEVICE_PARALLEL_FOR_3D( bx, i, j, k,
834 : {
835 : GeometryData gd;
836 : gd.coord = 0;
837 : amrex_avg_fc_to_cc(i,j,k, ccarr, fxarr, dcomp, gd);
838 : });
839 : #else
840 : AMREX_HOST_DEVICE_PARALLEL_FOR_3D( bx, i, j, k,
841 : {
842 : amrex_avg_fc_to_cc(i,j,k, ccarr, AMREX_D_DECL(fxarr,fyarr,fzarr), dcomp);
843 : });
844 : #endif
845 : }
846 : }
847 : }
848 :
849 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int>>
850 : void average_down_faces (const Vector<const MF*>& fine,
851 : const Vector<MF*>& crse,
852 : const IntVect& ratio, int ngcrse)
853 : {
854 : AMREX_ASSERT(fine.size() == AMREX_SPACEDIM && crse.size() == AMREX_SPACEDIM);
855 : average_down_faces(Array<const MF*,AMREX_SPACEDIM>
856 : {{AMREX_D_DECL(fine[0],fine[1],fine[2])}},
857 : Array<MF*,AMREX_SPACEDIM>
858 : {{AMREX_D_DECL(crse[0],crse[1],crse[2])}},
859 : ratio, ngcrse);
860 : }
861 :
862 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int>>
863 : void average_down_faces (const Vector<const MF*>& fine,
864 : const Vector<MF*>& crse, int ratio, int ngcrse)
865 : {
866 : average_down_faces(fine,crse,IntVect{ratio},ngcrse);
867 : }
868 :
869 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int>>
870 : void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
871 : const Array<MF*,AMREX_SPACEDIM>& crse,
872 : int ratio, int ngcrse)
873 : {
874 : average_down_faces(fine,crse,IntVect{ratio},ngcrse);
875 : }
876 :
877 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int>>
878 : void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
879 : const Array<MF*,AMREX_SPACEDIM>& crse,
880 : const IntVect& ratio, int ngcrse)
881 : {
882 : for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
883 : average_down_faces(*fine[idim], *crse[idim], ratio, ngcrse);
884 : }
885 : }
886 :
887 : template <typename FAB>
888 : void average_down_faces (const FabArray<FAB>& fine, FabArray<FAB>& crse,
889 : const IntVect& ratio, int ngcrse)
890 : {
891 : BL_PROFILE("average_down_faces");
892 :
893 : AMREX_ASSERT(crse.nComp() == fine.nComp());
894 : AMREX_ASSERT(fine.ixType() == crse.ixType());
895 : const auto type = fine.ixType();
896 : int dir;
897 : for (dir = 0; dir < AMREX_SPACEDIM; ++dir) {
898 : if (type.nodeCentered(dir)) { break; }
899 : }
900 : auto tmptype = type;
901 : tmptype.unset(dir);
902 : if (dir >= AMREX_SPACEDIM || !tmptype.cellCentered()) {
903 : amrex::Abort("average_down_faces: not face index type");
904 : }
905 : const int ncomp = crse.nComp();
906 : if (isMFIterSafe(fine, crse))
907 : {
908 : #ifdef AMREX_USE_GPU
909 : if (Gpu::inLaunchRegion() && crse.isFusingCandidate()) {
910 : auto const& crsema = crse.arrays();
911 : auto const& finema = fine.const_arrays();
912 : ParallelFor(crse, IntVect(ngcrse), ncomp,
913 : [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
914 : {
915 : amrex_avgdown_faces(i,j,k,n, crsema[box_no], finema[box_no], 0, 0, ratio, dir);
916 : });
917 : if (!Gpu::inNoSyncRegion()) {
918 : Gpu::streamSynchronize();
919 : }
920 : } else
921 : #endif
922 : {
923 : #ifdef AMREX_USE_OMP
924 : #pragma omp parallel if (Gpu::notInLaunchRegion())
925 : #endif
926 : for (MFIter mfi(crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
927 : {
928 : const Box& bx = mfi.growntilebox(ngcrse);
929 : auto const& crsearr = crse.array(mfi);
930 : auto const& finearr = fine.const_array(mfi);
931 : AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
932 : {
933 : amrex_avgdown_faces(i,j,k,n, crsearr, finearr, 0, 0, ratio, dir);
934 : });
935 : }
936 : }
937 : }
938 : else
939 : {
940 : FabArray<FAB> ctmp(amrex::coarsen(fine.boxArray(),ratio), fine.DistributionMap(),
941 : ncomp, ngcrse, MFInfo(), DefaultFabFactory<FAB>());
942 : average_down_faces(fine, ctmp, ratio, ngcrse);
943 : crse.ParallelCopy(ctmp,0,0,ncomp,ngcrse,ngcrse);
944 : }
945 : }
946 :
947 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int>>
948 : void average_down_faces (const Array<const MF*,AMREX_SPACEDIM>& fine,
949 : const Array<MF*,AMREX_SPACEDIM>& crse,
950 : const IntVect& ratio, const Geometry& crse_geom)
951 : {
952 : for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
953 : average_down_faces(*fine[idim], *crse[idim], ratio, crse_geom);
954 : }
955 : }
956 :
957 : template <typename FAB>
958 : void average_down_faces (const FabArray<FAB>& fine, FabArray<FAB>& crse,
959 : const IntVect& ratio, const Geometry& crse_geom)
960 : {
961 : FabArray<FAB> ctmp(amrex::coarsen(fine.boxArray(),ratio), fine.DistributionMap(),
962 : crse.nComp(), 0);
963 : average_down_faces(fine, ctmp, ratio, 0);
964 : crse.ParallelCopy(ctmp,0,0,crse.nComp(),0,0,crse_geom.periodicity());
965 : }
966 :
967 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> FOO>
968 : Vector<typename MF::value_type> get_cell_data (MF const& mf, IntVect const& cell)
969 : {
970 : using T = typename MF::value_type;
971 : const int ncomp = mf.nComp();
972 : Gpu::DeviceVector<T> dv(ncomp);
973 : auto* dp = dv.data();
974 : bool found = false;
975 : auto loc = cell.dim3();
976 : for (MFIter mfi(mf); mfi.isValid() && !found; ++mfi)
977 : {
978 : Box const& box = mfi.validbox();
979 : if (box.contains(cell)) {
980 : found = true;
981 : auto const& fab = mf.const_array(mfi);
982 : amrex::ParallelFor(1, [=] AMREX_GPU_DEVICE (int) noexcept
983 : {
984 : for (int n = 0; n < ncomp; ++n) {
985 : dp[n] = fab(loc.x,loc.y,loc.z,n);
986 : }
987 : });
988 : }
989 : }
990 : Vector<T> hv;
991 : if (found) {
992 : hv.resize(ncomp);
993 : Gpu::copy(Gpu::deviceToHost, dv.begin(), dv.end(), hv.begin());
994 : }
995 : return hv;
996 : }
997 :
998 : template <typename MF, std::enable_if_t<IsFabArray<MF>::value,int> FOO>
999 : MF get_line_data (MF const& mf, int dir, IntVect const& cell)
1000 : {
1001 : BoxArray const& ba = mf.boxArray();
1002 : DistributionMapping const& dm = mf.DistributionMap();
1003 : const auto nboxes = static_cast<int>(ba.size());
1004 :
1005 : BoxList bl(ba.ixType());
1006 : Vector<int> procmap;
1007 : Vector<int> index_map;
1008 : for (int i = 0; i < nboxes; ++i) {
1009 : Box const& b = ba[i];
1010 : IntVect lo = cell;
1011 : lo[dir] = b.smallEnd(dir);
1012 : if (b.contains(lo)) {
1013 : IntVect hi = lo;
1014 : hi[dir] = b.bigEnd(dir);
1015 : Box b1d(lo,hi,b.ixType());
1016 : bl.push_back(b1d);
1017 : procmap.push_back(dm[i]);
1018 : index_map.push_back(i);
1019 : }
1020 : }
1021 :
1022 : if (bl.isEmpty()) {
1023 : return MF();
1024 : } else {
1025 : BoxArray rba(std::move(bl));
1026 : DistributionMapping rdm(std::move(procmap));
1027 : MF rmf(rba, rdm, mf.nComp(), IntVect(0),
1028 : MFInfo().SetArena(mf.arena()));
1029 : #ifdef AMREX_USE_OMP
1030 : #pragma omp parallel if (Gpu::notInLaunchRegion())
1031 : #endif
1032 : for (MFIter mfi(rmf); mfi.isValid(); ++mfi) {
1033 : Box const& b = mfi.validbox();
1034 : auto const& dfab = rmf.array(mfi);
1035 : auto const& sfab = mf.const_array(index_map[mfi.index()]);
1036 : amrex::ParallelFor(b, mf.nComp(),
1037 : [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
1038 : {
1039 : dfab(i,j,k,n) = sfab(i,j,k,n);
1040 : });
1041 : }
1042 : return rmf;
1043 : }
1044 : }
1045 :
1046 : template <typename Op, typename T, typename FAB, typename F,
1047 : std::enable_if_t<IsBaseFab<FAB>::value
1048 : #ifndef AMREX_USE_CUDA
1049 : && IsCallableR<T,F,int,int,int,int>::value
1050 : #endif
1051 : , int> FOO>
1052 : BaseFab<T>
1053 : ReduceToPlane (int direction, Box const& domain, FabArray<FAB> const& mf, F const& f)
1054 : {
1055 : Box domain2d = domain;
1056 : domain2d.setRange(direction, 0);
1057 :
1058 : T initval;
1059 : Op().init(initval);
1060 :
1061 : BaseFab<T> r(domain2d);
1062 : r.template setVal<RunOn::Device>(initval);
1063 : auto const& ar = r.array();
1064 :
1065 : for (MFIter mfi(mf,MFItInfo().UseDefaultStream().DisableDeviceSync());
1066 : mfi.isValid(); ++mfi)
1067 : {
1068 : Box bx = mfi.validbox() & domain;
1069 : if (bx.ok()) {
1070 : int box_no = mfi.LocalIndex();
1071 : #if defined(AMREX_USE_GPU)
1072 : Box b2d = bx;
1073 : b2d.setRange(direction,0);
1074 : const auto blo = amrex::lbound(bx);
1075 : const auto len = amrex::length(bx);
1076 : constexpr int nthreads = 128;
1077 : auto nblocks = static_cast<int>(b2d.numPts());
1078 : #ifdef AMREX_USE_SYCL
1079 : constexpr std::size_t shared_mem_bytes = sizeof(T)*Gpu::Device::warp_size;
1080 : amrex::launch<nthreads>(nblocks, shared_mem_bytes, Gpu::gpuStream(),
1081 : [=] AMREX_GPU_DEVICE (Gpu::Handler const& h)
1082 : {
1083 : int bid = h.blockIdx();
1084 : int tid = h.threadIdx();
1085 : #else
1086 : amrex::launch<nthreads>(nblocks, Gpu::gpuStream(),
1087 : [=] AMREX_GPU_DEVICE ()
1088 : {
1089 : int bid = blockIdx.x;
1090 : int tid = threadIdx.x;
1091 : #endif
1092 : T tmp;
1093 : Op().init(tmp);
1094 : T* p;
1095 : if (direction == 0) {
1096 : int k = bid / len.y;
1097 : int j = bid - k*len.y;
1098 : k += blo.z;
1099 : j += blo.y;
1100 : for (int i = blo.x + tid; i < blo.x+len.x; i += nthreads) {
1101 : Op().local_update(tmp, f(box_no,i,j,k));
1102 : }
1103 : p = ar.ptr(0,j,k);
1104 : } else if (direction == 1) {
1105 : int k = bid / len.x;
1106 : int i = bid - k*len.x;
1107 : k += blo.z;
1108 : i += blo.x;
1109 : for (int j = blo.y + tid; j < blo.y+len.y; j += nthreads) {
1110 : Op().local_update(tmp, f(box_no,i,j,k));
1111 : }
1112 : p = ar.ptr(i,0,k);
1113 : } else {
1114 : int j = bid / len.x;
1115 : int i = bid - j*len.x;
1116 : j += blo.y;
1117 : i += blo.x;
1118 : for (int k = blo.z + tid; k < blo.z+len.z; k += nthreads) {
1119 : Op().local_update(tmp, f(box_no,i,j,k));
1120 : }
1121 : p = ar.ptr(i,j,0);
1122 : }
1123 : #ifdef AMREX_USE_SYCL
1124 : Op().template parallel_update<T>(*p, tmp, h);
1125 : #else
1126 : Op().template parallel_update<T,nthreads>(*p, tmp);
1127 : #endif
1128 : });
1129 : #else
1130 : // CPU
1131 : if (direction == 0) {
1132 : AMREX_LOOP_3D(bx, i, j, k,
1133 : {
1134 : Op().local_update(ar(0,j,k), f(box_no,i,j,k));
1135 : });
1136 : } else if (direction == 1) {
1137 : AMREX_LOOP_3D(bx, i, j, k,
1138 : {
1139 : Op().local_update(ar(i,0,k), f(box_no,i,j,k));
1140 : });
1141 : } else {
1142 : AMREX_LOOP_3D(bx, i, j, k,
1143 : {
1144 : Op().local_update(ar(i,j,0), f(box_no,i,j,k));
1145 : });
1146 : }
1147 : #endif
1148 : }
1149 : }
1150 : Gpu::streamSynchronize();
1151 :
1152 : return r;
1153 : }
1154 :
1155 : }
1156 :
1157 : #endif
|