Line data Source code
1 :
2 : #ifndef BL_FARRAYBOX_H
3 : #define BL_FARRAYBOX_H
4 : #include <AMReX_Config.H>
5 :
6 : #include <AMReX_BaseFab.H>
7 : #include <AMReX_FabConv.H>
8 : #include <AMReX_FabFactory.H>
9 :
10 : namespace amrex {
11 :
12 : class FArrayBox;
13 :
14 : /**
15 : * \brief A Class Facilitating I/O for Fabs
16 : *
17 : * This data-less class aids I/O for FABs and encapsulates information
18 : * about the floating point format being used in output.
19 : * Note that the "new" format for writing out FABs is self-describing;
20 : * i.e. we can always read in a FAB written in the "new" format. For this
21 : * reason, it is usually preferable to write FABs out in the native
22 : * format on the machine, unless you're doing computations in 64 bit and
23 : * only want to write out 32 bit FABs.
24 : *
25 : * With the exception of the enumeration constants, this class is
26 : * primarily for FArrayBox implementers; i.e. user's shouldn't
27 : * call any of the member functions in this class directly.
28 : */
29 : class FABio // NOLINT(cppcoreguidelines-special-member-functions)
30 : {
31 : public:
32 : /**
33 : * \brief An enum which controls precision of FAB output.
34 : * Valid values are FAB_FLOAT and FAB_DOUBLE. This
35 : * is deprecated; i.e. please don't use it except
36 : * for reading old FABs as it will probably be going
37 : * away in a later release.
38 : */
39 : enum Precision
40 : {
41 : FAB_FLOAT = 0,
42 : FAB_DOUBLE
43 : };
44 : /**
45 : * \brief An enum which controls format of FAB output.
46 : *
47 : * FAB_ASCII: write the FAB out in ASCII format.
48 : *
49 : * FAB_8BIT: write the FAB out with all floating-point
50 : * values scaled to range 0 - 255.
51 : *
52 : * FAB_NATIVE: write out floating-point values in the native
53 : * format. This is usually the "best" choice of formats.
54 : *
55 : * FAB_IEEE_32: write out floating-point values in IEEE 32
56 : * bit normal format. This is recommended for use when your
57 : * internal computations are done in 64 bits and you want to save
58 : * space when writing out the FABs.
59 : *
60 : * FAB_IEEE: this is deprecated. It is identical to
61 : * FAB_IEEE_32.
62 : *
63 : * FAB_NATIVE_32: write out values in the native 32 bit format.
64 : *
65 : */
66 : enum Format
67 : {
68 : FAB_ASCII,
69 : FAB_IEEE,
70 : FAB_NATIVE,
71 : //
72 : // This is set to four so that when reading in an old FAB,
73 : // we don't get confused when we see an old FAB_8BITRLE file.
74 : //
75 : FAB_8BIT = 4,
76 : FAB_IEEE_32,
77 : FAB_NATIVE_32
78 : };
79 : /**
80 : * \brief An enum which controls byte ordering of FAB output.
81 : * Valid values are FAB_NORMAL_ORDER, FAB_REVERSE_ORDER,
82 : * and FAB_REVERSE_ORDER_2. This is deprecated; i.e. please
83 : * don't use it except for reading old FABs as it will probably
84 : * be going away in a later release. These exist solely to
85 : * describe the ordering of "old" FABs that you want to read.
86 : */
87 : enum Ordering
88 : {
89 : FAB_NORMAL_ORDER,
90 : FAB_REVERSE_ORDER,
91 : FAB_REVERSE_ORDER_2
92 : };
93 :
94 : //! The virtual destructor.
95 : virtual ~FABio () = default;
96 : /**
97 : * \brief Pure virtual function. Derived classes MUST override this
98 : * function to read an FArrayBox from the istream, under the
99 : * assumption that the header has already been read.
100 : */
101 : virtual void read (std::istream& is,
102 : FArrayBox& fb) const = 0;
103 : /**
104 : * \brief Pure virtual function. Derived classes MUST override this
105 : * function to write the FArrayBox to the ostream, under the
106 : * assumption that the header for the FAB has already been
107 : * written. Write it out as if it only had num_comp components
108 : * with component comp being the first one.
109 : */
110 : virtual void write (std::ostream& os,
111 : const FArrayBox& fb,
112 : int comp,
113 : int num_comp) const = 0;
114 : /**
115 : * \brief Pure virtual function. Derived classes MUST override this
116 : * function to skip over the next FAB f in the istream, under the
117 : * assumption that the header for the FAB f has already been
118 : * skipped over.
119 : */
120 : virtual void skip (std::istream& is,
121 : FArrayBox& f) const = 0;
122 :
123 : virtual void skip (std::istream& is,
124 : FArrayBox& f,
125 : int nCompToSkip) const = 0;
126 : /**
127 : * \brief Write out a header describing FArrayBox f that contains
128 : * nvar components. It must be the case that nvar <= f.nComp().
129 : */
130 : virtual void write_header (std::ostream& os,
131 : const FArrayBox& f,
132 : int nvar) const;
133 : /**
134 : * \brief Read in the header from the istream.
135 : * Returns a new'd FABio of the written-out type.
136 : * Complements write_header. The user is responsible
137 : * for delete'ing the returned FABio*. The FArrayBox f is
138 : * resized to be on the Box and number of components read
139 : * in from the header file. This is in preparation for
140 : * next doing a read. This is split up so that we can make
141 : * the read functions virtual, while having all the code for
142 : * detailing the type of FArrayBox that was written out in one place.
143 : */
144 : static FABio* read_header (std::istream& is,
145 : FArrayBox& f);
146 :
147 : /**
148 : * \brief Same as above except create a single component fab with
149 : * data from the compIndex component of the istream fab.
150 : * Return the number of available components in the istream fab.
151 : */
152 : static FABio* read_header (std::istream& is,
153 : FArrayBox& f,
154 : int compIndex,
155 : int& nCompAvailable);
156 : };
157 :
158 : //
159 : // Our binary FABio type.
160 : //
161 : class FABio_binary
162 : :
163 : public FABio
164 : {
165 : public:
166 : FABio_binary (RealDescriptor* rd_);
167 :
168 : void read (std::istream& is,
169 : FArrayBox& fb) const override;
170 :
171 : void write (std::ostream& os,
172 : const FArrayBox& fb,
173 : int comp,
174 : int num_comp) const override;
175 :
176 : void skip (std::istream& is,
177 : FArrayBox& f) const override;
178 :
179 : void skip (std::istream& is,
180 : FArrayBox& f,
181 : int nCompToSkip) const override;
182 :
183 : private:
184 : void write_header (std::ostream& os,
185 : const FArrayBox& f,
186 : int nvar) const override;
187 :
188 : std::unique_ptr<RealDescriptor> realDesc;
189 : };
190 :
191 : /**
192 : * \brief A Fortran Array of REALs
193 : *
194 : * Fortran Array Box's (generally called FAB's) are objects constructed
195 : * to emulate the FORTRAN array. Useful operations can be performed
196 : * upon FAB's in C++, and they provide a convenient interface to
197 : * FORTRAN when it is necessary to retreat into that language.
198 : *
199 : * FArrayBox is derived from BaseFab<Real>.
200 : * FArrayBox adds additional useful capabilities which make sense
201 : * for Real types, such as I/O and L**p norms.
202 : *
203 : * FArrayBox's may be output in various formats (see FABio above).
204 : *
205 : * The format and precision may be set in a file read by the ParmParse
206 : * class by the "fab.format" variable. Allowed values are:
207 : * ASCII
208 : * 8BIT
209 : * NATIVE
210 : * NATIVE_32
211 : * IEEE32
212 : *
213 : * FABs written using operator<< are always written in ASCII.
214 : * FABS written using writOn use the FABio::Format specified with
215 : * setFormat or the FABio::Format specified in the ParmParse file
216 : * read by init. If the FABio::Format is not set explicitly by either
217 : * of these two methods, then it defaults to NATIVE.
218 : *
219 : * The C pre-processor macro AMREX_SPACEDIM must be defined to use
220 : * this class. The internal precision of FARRAYBOX objects is
221 : * set by defining either BL_USE_FLOAT or BL_USE_DOUBLE
222 : *
223 : * This class does NOT provide a copy constructor or assignment operator,
224 : * but it has a move constructor.
225 : */
226 : class FArrayBox
227 : :
228 : public BaseFab<Real>
229 : {
230 : //! FABio is a friend of ours.
231 : friend class FABio;
232 : public:
233 : //! Construct an invalid FAB with no memory.
234 : FArrayBox () noexcept = default;
235 :
236 : explicit FArrayBox (Arena* ar) noexcept;
237 :
238 : FArrayBox (const Box& b, int ncomp, Arena* ar);
239 :
240 : /**
241 : * \brief Construct an initial FAB with the data space allocated but
242 : * not initialized. ncomp is the number of components
243 : * (variables) at each data point in the Box.
244 : */
245 : explicit FArrayBox (const Box& b,
246 : int ncomp=1,
247 : bool alloc=true,
248 : bool shared=false,
249 : Arena* ar = nullptr);
250 :
251 : FArrayBox (const FArrayBox& rhs, MakeType make_type, int scomp, int ncomp);
252 :
253 : FArrayBox (const Box& b, int ncomp, Real const* p) noexcept;
254 :
255 : FArrayBox (const Box& b, int ncomp, Real* p) noexcept;
256 :
257 : explicit FArrayBox (Array4<Real> const& a) noexcept : BaseFab<Real>(a) {}
258 :
259 : FArrayBox (Array4<Real> const& a, IndexType t) noexcept : BaseFab<Real>(a,t) {}
260 :
261 : explicit FArrayBox (Array4<Real const> const& a) noexcept : BaseFab<Real>(a) {}
262 :
263 : explicit FArrayBox (Array4<Real const> const& a, IndexType t) noexcept : BaseFab<Real>(a,t) {}
264 :
265 : //! The destructor.
266 6827380 : ~FArrayBox () noexcept override = default;
267 :
268 : FArrayBox (FArrayBox&& rhs) noexcept = default;
269 : FArrayBox& operator= (FArrayBox&&) noexcept = default;
270 :
271 : FArrayBox (const FArrayBox&) = delete;
272 : FArrayBox& operator= (const FArrayBox&) = delete;
273 :
274 : //! Set the fab to the value r.
275 : #if defined(AMREX_USE_GPU)
276 : template <RunOn run_on>
277 : #else
278 : template <RunOn run_on=RunOn::Host>
279 : #endif
280 : FArrayBox& operator= (Real v) noexcept;
281 :
282 : /**
283 : * \brief Are there any NaNs in the FAB?
284 : * This may return false, even if the FAB contains NaNs, if the machine
285 : * doesn't support the appropriate NaN testing functions.
286 : */
287 : #if defined(AMREX_USE_GPU)
288 : template <RunOn run_on>
289 : #else
290 : template <RunOn run_on=RunOn::Host>
291 : #endif
292 : [[nodiscard]] bool contains_nan () const noexcept;
293 :
294 : #if defined(AMREX_USE_GPU)
295 : template <RunOn run_on>
296 : #else
297 : template <RunOn run_on=RunOn::Host>
298 : #endif
299 : [[nodiscard]] bool contains_nan (const Box& bx, int scomp, int ncomp) const noexcept;
300 : /**
301 : * \brief These versions return the cell index (though not the component) of
302 : * the first location of a NaN if there is one.
303 : */
304 : #if defined(AMREX_USE_GPU)
305 : template <RunOn run_on>
306 : #else
307 : template <RunOn run_on=RunOn::Host>
308 : #endif
309 : bool contains_nan (IntVect& where) const noexcept;
310 :
311 : #if defined(AMREX_USE_GPU)
312 : template <RunOn run_on>
313 : #else
314 : template <RunOn run_on=RunOn::Host>
315 : #endif
316 : bool contains_nan (const Box& bx, int scomp, int ncomp, IntVect& where) const noexcept;
317 : /**
318 : * \brief Are there any Infs in the FAB?
319 : * This may return false, even if the FAB contains Infs, if the machine
320 : * doesn't support the appropriate Inf testing functions.
321 : */
322 : #if defined(AMREX_USE_GPU)
323 : template <RunOn run_on>
324 : #else
325 : template <RunOn run_on=RunOn::Host>
326 : #endif
327 : [[nodiscard]] bool contains_inf () const noexcept;
328 :
329 : #if defined(AMREX_USE_GPU)
330 : template <RunOn run_on>
331 : #else
332 : template <RunOn run_on=RunOn::Host>
333 : #endif
334 : [[nodiscard]] bool contains_inf (const Box& bx, int scomp, int ncomp) const noexcept;
335 : /**
336 : * \brief These versions return the cell index (though not the component) of
337 : * the first location of an Inf if there is one.
338 : */
339 : #if defined(AMREX_USE_GPU)
340 : template <RunOn run_on>
341 : #else
342 : template <RunOn run_on=RunOn::Host>
343 : #endif
344 : bool contains_inf (IntVect& where) const noexcept;
345 :
346 : #if defined(AMREX_USE_GPU)
347 : template <RunOn run_on>
348 : #else
349 : template <RunOn run_on=RunOn::Host>
350 : #endif
351 : bool contains_inf (const Box& bx, int scomp, int ncomp, IntVect& where) const noexcept;
352 :
353 : //! For debugging purposes we hide BaseFab version and do some extra work.
354 : void resize (const Box& b, int N = 1, Arena* ar = nullptr);
355 :
356 : [[nodiscard]] FabType getType () const noexcept { return m_type; }
357 :
358 : void initVal () noexcept; // public for cuda
359 :
360 : //! Write FABs in ASCII form.
361 : friend std::ostream& operator<< (std::ostream& os, const FArrayBox& fb);
362 : //! Read FABs in ASCII form.
363 : friend std::istream& operator>> (std::istream& is, FArrayBox& fb);
364 : /**
365 : * \brief Writes out the FAB in whatever format you've set.
366 : * The default format is NATIVE.
367 : */
368 : void writeOn (std::ostream& os) const;
369 :
370 : /**
371 : * \brief Write only selected range of components. comp specifies
372 : * from which component (starting at 0) to write at each
373 : * point in space. num_comp specifies how many data points
374 : * to write out at each point is space -- it defaults to 1.
375 : * It must be the case the comp >= 0 && num_comp >= 1 &&
376 : * (comp+num_comp) <= nComp(). The FAB is written out in
377 : * whatever format you've set, with the default format being
378 : * NATIVE. The FAB that is written to disk will be an
379 : * num_comp component FAB.
380 : */
381 : void writeOn (std::ostream& os,
382 : int comp,
383 : int num_comp=1) const;
384 :
385 : //! Read FAB from istream. Format is as it was written out.
386 : void readFrom (std::istream& is);
387 :
388 : /**
389 : * \brief Read FAB from istream. Format is as it was written out.
390 : * This creates a single component FAB with data from
391 : * compIndex of the FAB from the istream.
392 : * Returns the number of components available in the fab.
393 : */
394 : int readFrom (std::istream& is, int compIndex);
395 :
396 : /**
397 : * \brief Skip over the next FAB from the input stream.
398 : * Return the Box defining the domain of the FAB and the
399 : * number of components.
400 : */
401 : static Box skipFAB (std::istream& is,
402 : int& num_comp);
403 :
404 : //! Skip over the next FAB from the input stream.
405 : static void skipFAB (std::istream& is);
406 :
407 : /**
408 : * \brief Set the FABio::Format in the program.
409 : * This is the preferred way to set the output format
410 : * in "new" FABs. When designing new programs, this should
411 : * be the only function that needs to be called in order
412 : * to set the format.
413 : */
414 : static void setFormat (FABio::Format fmt);
415 :
416 : //! Gets the FABio::Format set in the program.
417 : static FABio::Format getFormat ();
418 :
419 : /**
420 : * \brief Set the FABio::Ordering for reading old FABs. It does
421 : * NOT set the ordering for output.
422 : * This is deprecated. It exists only to facilitate
423 : * reading old FABs. When you're reading in an "old" FAB,
424 : * you must set the Ordering, before attempting
425 : * to read it in. This is because FABs written out in the
426 : * "old" format weren't self-describing; i.e. information
427 : * such as the Ordering was lost when the "old" FAB was
428 : * written out.
429 : */
430 : static void setOrdering (FABio::Ordering ordering);
431 :
432 : /**
433 : * \brief Gets the FABio::Ordering set in the program. This is
434 : * deprecated. It does NOT do the right thing with the
435 : * new FAB I/O format.
436 : */
437 : static FABio::Ordering getOrdering ();
438 :
439 : /**
440 : * \brief Set the FABio::Precision. This is deprecated. It
441 : * is not useful with the "new" FAB I/O format.
442 : */
443 : static void setPrecision (FABio::Precision precision);
444 :
445 : /**
446 : * \brief Returns the FABio::Precision. This is deprecated. It
447 : * is not useful with the "new" FAB I/O format. Always
448 : * returns FABio::Float.
449 : */
450 : static FABio::Precision getPrecision ();
451 :
452 : //! Returns reference to the FABio object used by the program.
453 : static const FABio& getFABio ();
454 :
455 : /**
456 : * \brief Sets the FABio object used by the program. It is an error
457 : * if the passed pointer rd is the null pointer.
458 : */
459 : static void setFABio (FABio* rd);
460 :
461 : static std::unique_ptr<RealDescriptor> getDataDescriptor ();
462 :
463 : static std::string getClassName ();
464 :
465 : static bool set_do_initval (bool tf);
466 : static bool get_do_initval ();
467 : static Real set_initval (Real iv);
468 : static Real get_initval ();
469 : //! Initialize from ParmParse with "fab" prefix.
470 : static void Initialize ();
471 : static void Finalize ();
472 : static bool initialized;
473 :
474 : protected:
475 :
476 : FabType m_type = FabType::regular;
477 :
478 : /**
479 : * \brief Format and ordering for all FAB output.
480 : * This stuff exists solely to support reading old FABs.
481 : */
482 : static FABio::Format format;
483 : static FABio::Ordering ordering;
484 :
485 : //! The FABio pointer describing our output format
486 : static FABio* fabio;
487 :
488 : //! initial value
489 : static bool do_initval;
490 : static Real initval;
491 : static bool init_snan;
492 : };
493 :
494 : using FArrayBoxFactory = DefaultFabFactory<FArrayBox>;
495 :
496 : template <RunOn run_on>
497 : FArrayBox&
498 : FArrayBox::operator= (Real v) noexcept
499 : {
500 : BaseFab<Real>::operator=<run_on>(v);
501 : return *this;
502 : }
503 :
504 : template <RunOn run_on>
505 : bool
506 0 : FArrayBox::contains_nan () const noexcept
507 : {
508 0 : const Real* dp = dptr;
509 0 : const Long n = numPts()*nvar;
510 : #ifdef AMREX_USE_GPU
511 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
512 : ReduceOps<ReduceOpLogicalOr> reduce_op;
513 : ReduceData<int> reduce_data(reduce_op);
514 : using ReduceTuple = ReduceData<int>::Type;
515 : reduce_op.eval(n, reduce_data,
516 : [=] AMREX_GPU_DEVICE (Long i) -> ReduceTuple
517 : {
518 : return { static_cast<int>(amrex::isnan(dp[i])) };
519 : });
520 : ReduceTuple hv = reduce_data.value(reduce_op);
521 : return amrex::get<0>(hv);
522 : } else
523 : #endif
524 : {
525 0 : for (Long i = 0; i < n; i++) {
526 0 : if (amrex::isnan(*dp++)) {
527 0 : return true;
528 : }
529 : }
530 0 : return false;
531 : }
532 : }
533 :
534 : template <RunOn run_on>
535 : bool
536 : FArrayBox::contains_nan (const Box& bx, int scomp, int ncomp) const noexcept
537 : {
538 : BL_ASSERT(scomp >= 0);
539 : BL_ASSERT(ncomp >= 1);
540 : BL_ASSERT(scomp < nComp());
541 : BL_ASSERT(ncomp <= nComp());
542 : BL_ASSERT(domain.contains(bx));
543 :
544 : const auto& a = this->array();
545 :
546 : #ifdef AMREX_USE_GPU
547 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
548 : ReduceOps<ReduceOpLogicalOr> reduce_op;
549 : ReduceData<int> reduce_data(reduce_op);
550 : using ReduceTuple = ReduceData<int>::Type;
551 : reduce_op.eval(bx, reduce_data,
552 : [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple
553 : {
554 : bool t = false;
555 : for (int n = scomp; n < scomp+ncomp; ++n) {
556 : t = t || amrex::isnan(a(i,j,k,n));
557 : }
558 : return { static_cast<int>(t) };
559 : });
560 : ReduceTuple hv = reduce_data.value(reduce_op);
561 : return amrex::get<0>(hv);
562 : } else
563 : #endif
564 : {
565 : const auto lo = amrex::lbound(bx);
566 : const auto hi = amrex::ubound(bx);
567 : for (int n = scomp; n < scomp+ncomp; ++n) {
568 : for (int k = lo.z; k <= hi.z; ++k) {
569 : for (int j = lo.y; j <= hi.y; ++j) {
570 : for (int i = lo.x; i <= hi.x; ++i) {
571 : if (amrex::isnan(a(i,j,k,n))) {
572 : return true;
573 : }
574 : }
575 : }
576 : }
577 : }
578 : return false;
579 : }
580 : }
581 :
582 : template <RunOn run_on>
583 : bool
584 : FArrayBox::contains_nan (IntVect& where) const noexcept
585 : {
586 : return contains_nan<run_on>(domain, 0, nComp(), where);
587 : }
588 :
589 : template <RunOn run_on>
590 : bool
591 : FArrayBox::contains_nan (const Box& bx, int scomp, int ncomp, IntVect& where) const noexcept
592 : {
593 : BL_ASSERT(scomp >= 0);
594 : BL_ASSERT(ncomp >= 1);
595 : BL_ASSERT(scomp < nComp());
596 : BL_ASSERT(ncomp <= nComp());
597 : BL_ASSERT(domain.contains(bx));
598 :
599 : const auto& a = this->array();
600 : #ifdef AMREX_USE_GPU
601 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
602 : Array<int,1+AMREX_SPACEDIM> ha{0,AMREX_D_DECL(std::numeric_limits<int>::lowest(),
603 : std::numeric_limits<int>::lowest(),
604 : std::numeric_limits<int>::lowest())};
605 : Gpu::DeviceVector<int> dv(1+AMREX_SPACEDIM);
606 : int* p = dv.data();
607 : Gpu::htod_memcpy_async(p, ha.data(), sizeof(int)*ha.size());
608 : amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
609 : {
610 : int* flag = p;
611 : bool t = false;
612 : for (int n = scomp; n < scomp+ncomp; ++n) {
613 : t = t || amrex::isnan(a(i,j,k,n));
614 : }
615 : if (t && (*flag == 0)) {
616 : if (Gpu::Atomic::Exch(flag,1) == 0) {
617 : AMREX_D_TERM(p[1] = i;,
618 : p[2] = j;,
619 : p[3] = k;);
620 : }
621 : }
622 : });
623 : Gpu::dtoh_memcpy_async(ha.data(), p, sizeof(int)*ha.size());
624 : Gpu::streamSynchronize();
625 : where = IntVect(AMREX_D_DECL(ha[1],ha[2],ha[3]));
626 : return ha[0];
627 : } else
628 : #endif
629 : {
630 : const auto lo = amrex::lbound(bx);
631 : const auto hi = amrex::ubound(bx);
632 : for (int n = scomp; n < scomp+ncomp; ++n) {
633 : for (int k = lo.z; k <= hi.z; ++k) {
634 : for (int j = lo.y; j <= hi.y; ++j) {
635 : for (int i = lo.x; i <= hi.x; ++i) {
636 : if (amrex::isnan(a(i,j,k,n))) {
637 : where = IntVect(AMREX_D_DECL(i,j,k));
638 : return true;
639 : }
640 : }
641 : }
642 : }
643 : }
644 : return false;
645 : }
646 : }
647 :
648 : template <RunOn run_on>
649 : bool
650 0 : FArrayBox::contains_inf () const noexcept
651 : {
652 0 : const Real* dp = dptr;
653 0 : const Long n = numPts()*nvar;
654 : #ifdef AMREX_USE_GPU
655 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
656 : ReduceOps<ReduceOpLogicalOr> reduce_op;
657 : ReduceData<int> reduce_data(reduce_op);
658 : using ReduceTuple = ReduceData<int>::Type;
659 : reduce_op.eval(n, reduce_data,
660 : [=] AMREX_GPU_DEVICE (Long i) -> ReduceTuple
661 : {
662 : return { static_cast<int>(amrex::isinf(dp[i])) };
663 : });
664 : ReduceTuple hv = reduce_data.value(reduce_op);
665 : return amrex::get<0>(hv);
666 : } else
667 : #endif
668 : {
669 0 : for (Long i = 0; i < n; i++) {
670 0 : if (amrex::isinf(*dp++)) {
671 0 : return true;
672 : }
673 : }
674 0 : return false;
675 : }
676 : }
677 :
678 : template <RunOn run_on>
679 : bool
680 : FArrayBox::contains_inf (const Box& bx, int scomp, int ncomp) const noexcept
681 : {
682 : BL_ASSERT(scomp >= 0);
683 : BL_ASSERT(ncomp >= 1);
684 : BL_ASSERT(scomp < nComp());
685 : BL_ASSERT(ncomp <= nComp());
686 : BL_ASSERT(domain.contains(bx));
687 :
688 : const auto& a = this->array();
689 :
690 : #ifdef AMREX_USE_GPU
691 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
692 : ReduceOps<ReduceOpLogicalOr> reduce_op;
693 : ReduceData<int> reduce_data(reduce_op);
694 : using ReduceTuple = ReduceData<int>::Type;
695 : reduce_op.eval(bx, reduce_data,
696 : [=] AMREX_GPU_DEVICE (int i, int j, int k) ->ReduceTuple
697 : {
698 : bool t = false;
699 : for (int n = scomp; n < scomp+ncomp; ++n) {
700 : t = t || amrex::isinf(a(i,j,k,n));
701 : }
702 : return { static_cast<int>(t) };
703 : });
704 : ReduceTuple hv = reduce_data.value(reduce_op);
705 : return amrex::get<0>(hv);
706 : } else
707 : #endif
708 : {
709 : const auto lo = amrex::lbound(bx);
710 : const auto hi = amrex::ubound(bx);
711 : for (int n = scomp; n < scomp+ncomp; ++n) {
712 : for (int k = lo.z; k <= hi.z; ++k) {
713 : for (int j = lo.y; j <= hi.y; ++j) {
714 : for (int i = lo.x; i <= hi.x; ++i) {
715 : if (amrex::isinf(a(i,j,k,n))) {
716 : return true;
717 : }
718 : }
719 : }
720 : }
721 : }
722 : return false;
723 : }
724 : }
725 :
726 : template <RunOn run_on>
727 : bool
728 : FArrayBox::contains_inf (IntVect& where) const noexcept
729 : {
730 : return contains_inf<run_on>(domain,0,nComp(),where);
731 : }
732 :
733 : template <RunOn run_on>
734 : bool
735 : FArrayBox::contains_inf (const Box& bx, int scomp, int ncomp, IntVect& where) const noexcept
736 : {
737 : BL_ASSERT(scomp >= 0);
738 : BL_ASSERT(ncomp >= 1);
739 : BL_ASSERT(scomp < nComp());
740 : BL_ASSERT(ncomp <= nComp());
741 : BL_ASSERT(domain.contains(bx));
742 :
743 : const auto& a = this->array();
744 : #ifdef AMREX_USE_GPU
745 : if (run_on == RunOn::Device && Gpu::inLaunchRegion()) {
746 : Array<int,1+AMREX_SPACEDIM> ha{0,AMREX_D_DECL(std::numeric_limits<int>::lowest(),
747 : std::numeric_limits<int>::lowest(),
748 : std::numeric_limits<int>::lowest())};
749 : Gpu::DeviceVector<int> dv(1+AMREX_SPACEDIM);
750 : int* p = dv.data();
751 : Gpu::htod_memcpy_async(p, ha.data(), sizeof(int)*ha.size());
752 : amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
753 : {
754 : int* flag = p;
755 : bool t = false;
756 : for (int n = scomp; n < scomp+ncomp; ++n) {
757 : t = t || amrex::isinf(a(i,j,k,n));
758 : }
759 : if (t && (*flag == 0)) {
760 : if (Gpu::Atomic::Exch(flag,1) == 0) {
761 : AMREX_D_TERM(p[1] = i;,
762 : p[2] = j;,
763 : p[3] = k;);
764 : }
765 : }
766 : });
767 : Gpu::dtoh_memcpy_async(ha.data(), p, sizeof(int)*ha.size());
768 : Gpu::streamSynchronize();
769 : where = IntVect(AMREX_D_DECL(ha[1],ha[2],ha[3]));
770 : return ha[0];
771 : } else
772 : #endif
773 : {
774 : const auto lo = amrex::lbound(bx);
775 : const auto hi = amrex::ubound(bx);
776 : for (int n = scomp; n < scomp+ncomp; ++n) {
777 : for (int k = lo.z; k <= hi.z; ++k) {
778 : for (int j = lo.y; j <= hi.y; ++j) {
779 : for (int i = lo.x; i <= hi.x; ++i) {
780 : if (amrex::isinf(a(i,j,k,n))) {
781 : where = IntVect(AMREX_D_DECL(i,j,k));
782 : return true;
783 : }
784 : }
785 : }
786 : }
787 : }
788 : return false;
789 : }
790 : }
791 :
792 : }
793 :
794 : #endif /*BL_FARRAYBOX_H*/
|