LCOV - code coverage report
Current view: top level - ext/amrex/3d-coverage-g++-24.08/include - AMReX_Loop.H (source / functions) Hit Total Coverage
Test: coverage_merged.info Lines: 7 7 100.0 %
Date: 2024-11-18 05:28:54 Functions: 5 225 2.2 %

          Line data    Source code
       1             : #ifndef AMREX_LOOP_H_
       2             : #define AMREX_LOOP_H_
       3             : #include <AMReX_Config.H>
       4             : 
       5             : #include <AMReX_Box.H>
       6             : #include <AMReX_Extension.H>
       7             : 
       8             : namespace amrex {
       9             : 
      10             : template <class F>
      11             : AMREX_GPU_HOST_DEVICE
      12             : AMREX_ATTRIBUTE_FLATTEN_FOR
      13             : void Loop (Dim3 lo, Dim3 hi, F const& f) noexcept
      14             : {
      15             :     for (int k = lo.z; k <= hi.z; ++k) {
      16             :     for (int j = lo.y; j <= hi.y; ++j) {
      17             :     for (int i = lo.x; i <= hi.x; ++i) {
      18             :         f(i,j,k);
      19             :     }}}
      20             : }
      21             : 
      22             : template <class F>
      23             : AMREX_GPU_HOST_DEVICE
      24             : AMREX_ATTRIBUTE_FLATTEN_FOR
      25             : void Loop (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
      26             : {
      27             :     for (int n = 0; n < ncomp; ++n) {
      28             :     for (int k = lo.z; k <= hi.z; ++k) {
      29             :     for (int j = lo.y; j <= hi.y; ++j) {
      30             :     for (int i = lo.x; i <= hi.x; ++i) {
      31             :         f(i,j,k,n);
      32             :     }}}}
      33             : }
      34             : 
      35             : template <class F>
      36             : AMREX_GPU_HOST_DEVICE
      37             : AMREX_ATTRIBUTE_FLATTEN_FOR
      38             : void LoopConcurrent (Dim3 lo, Dim3 hi, F const& f) noexcept
      39             : {
      40             :     for (int k = lo.z; k <= hi.z; ++k) {
      41             :     for (int j = lo.y; j <= hi.y; ++j) {
      42             :     AMREX_PRAGMA_SIMD
      43             :     for (int i = lo.x; i <= hi.x; ++i) {
      44             :         f(i,j,k);
      45             :     }}}
      46             : }
      47             : 
      48             : template <class F>
      49             : AMREX_GPU_HOST_DEVICE
      50             : AMREX_ATTRIBUTE_FLATTEN_FOR
      51             : void LoopConcurrent (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
      52             : {
      53             :     for (int n = 0; n < ncomp; ++n) {
      54             :     for (int k = lo.z; k <= hi.z; ++k) {
      55             :     for (int j = lo.y; j <= hi.y; ++j) {
      56             :     AMREX_PRAGMA_SIMD
      57             :     for (int i = lo.x; i <= hi.x; ++i) {
      58             :         f(i,j,k,n);
      59             :     }}}}
      60             : }
      61             : 
      62             : template <class F>
      63             : AMREX_GPU_HOST_DEVICE
      64             : AMREX_ATTRIBUTE_FLATTEN_FOR
      65             : void Loop (Box const& bx, F const& f) noexcept
      66             : {
      67             :     const auto lo = amrex::lbound(bx);
      68             :     const auto hi = amrex::ubound(bx);
      69             :     for (int k = lo.z; k <= hi.z; ++k) {
      70             :     for (int j = lo.y; j <= hi.y; ++j) {
      71             :     for (int i = lo.x; i <= hi.x; ++i) {
      72             :         f(i,j,k);
      73             :     }}}
      74             : }
      75             : 
      76             : template <class F>
      77             : AMREX_GPU_HOST_DEVICE
      78             : AMREX_ATTRIBUTE_FLATTEN_FOR
      79             : void Loop (Box const& bx, int ncomp, F const& f) noexcept
      80             : {
      81             :     const auto lo = amrex::lbound(bx);
      82             :     const auto hi = amrex::ubound(bx);
      83             :     for (int n = 0; n < ncomp; ++n) {
      84             :     for (int k = lo.z; k <= hi.z; ++k) {
      85             :     for (int j = lo.y; j <= hi.y; ++j) {
      86             :     for (int i = lo.x; i <= hi.x; ++i) {
      87             :         f(i,j,k,n);
      88             :     }}}}
      89             : }
      90             : 
      91             : template <class F>
      92             : AMREX_GPU_HOST_DEVICE
      93             : AMREX_ATTRIBUTE_FLATTEN_FOR
      94             : void LoopConcurrent (Box const& bx, F const& f) noexcept
      95             : {
      96             :     const auto lo = amrex::lbound(bx);
      97             :     const auto hi = amrex::ubound(bx);
      98             :     for (int k = lo.z; k <= hi.z; ++k) {
      99             :     for (int j = lo.y; j <= hi.y; ++j) {
     100             :     AMREX_PRAGMA_SIMD
     101             :     for (int i = lo.x; i <= hi.x; ++i) {
     102             :         f(i,j,k);
     103             :     }}}
     104             : }
     105             : 
     106             : template <class F>
     107             : AMREX_GPU_HOST_DEVICE
     108             : AMREX_ATTRIBUTE_FLATTEN_FOR
     109             : void LoopConcurrent (Box const& bx, int ncomp, F const& f) noexcept
     110             : {
     111             :     const auto lo = amrex::lbound(bx);
     112             :     const auto hi = amrex::ubound(bx);
     113             :     for (int n = 0; n < ncomp; ++n) {
     114             :     for (int k = lo.z; k <= hi.z; ++k) {
     115             :     for (int j = lo.y; j <= hi.y; ++j) {
     116             :     AMREX_PRAGMA_SIMD
     117             :     for (int i = lo.x; i <= hi.x; ++i) {
     118             :         f(i,j,k,n);
     119             :     }}}}
     120             : }
     121             : 
     122             : // The functions above are __host__ __device__ functions.  If f is not a
     123             : // __host__ __device__ function, we will get warning about calling __host__
     124             : // function from a __host__ __device__ function.  This is ugly.  To get rid
     125             : // of the warning, we have to use the functions below for those situations.
     126             : 
     127             : template <class F>
     128             : AMREX_ATTRIBUTE_FLATTEN_FOR
     129             : void LoopOnCpu (Dim3 lo, Dim3 hi, F const& f) noexcept
     130             : {
     131             :     for (int k = lo.z; k <= hi.z; ++k) {
     132             :     for (int j = lo.y; j <= hi.y; ++j) {
     133             :     for (int i = lo.x; i <= hi.x; ++i) {
     134             :         f(i,j,k);
     135             :     }}}
     136             : }
     137             : 
     138             : template <class F>
     139             : AMREX_ATTRIBUTE_FLATTEN_FOR
     140             : void LoopOnCpu (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
     141             : {
     142             :     for (int n = 0; n < ncomp; ++n) {
     143             :     for (int k = lo.z; k <= hi.z; ++k) {
     144             :     for (int j = lo.y; j <= hi.y; ++j) {
     145             :     for (int i = lo.x; i <= hi.x; ++i) {
     146             :         f(i,j,k,n);
     147             :     }}}}
     148             : }
     149             : 
     150             : template <class F>
     151             : AMREX_ATTRIBUTE_FLATTEN_FOR
     152             : void LoopConcurrentOnCpu (Dim3 lo, Dim3 hi, F const& f) noexcept
     153             : {
     154             :     for (int k = lo.z; k <= hi.z; ++k) {
     155             :     for (int j = lo.y; j <= hi.y; ++j) {
     156             :     AMREX_PRAGMA_SIMD
     157             :     for (int i = lo.x; i <= hi.x; ++i) {
     158             :         f(i,j,k);
     159             :     }}}
     160             : }
     161             : 
     162             : template <class F>
     163             : AMREX_ATTRIBUTE_FLATTEN_FOR
     164             : void LoopConcurrentOnCpu (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
     165             : {
     166             :     for (int n = 0; n < ncomp; ++n) {
     167             :     for (int k = lo.z; k <= hi.z; ++k) {
     168             :     for (int j = lo.y; j <= hi.y; ++j) {
     169             :     AMREX_PRAGMA_SIMD
     170             :     for (int i = lo.x; i <= hi.x; ++i) {
     171             :         f(i,j,k,n);
     172             :     }}}}
     173             : }
     174             : 
     175             : template <class F>
     176             : AMREX_ATTRIBUTE_FLATTEN_FOR
     177             : void LoopOnCpu (Box const& bx, F const& f) noexcept
     178             : {
     179             :     const auto lo = amrex::lbound(bx);
     180             :     const auto hi = amrex::ubound(bx);
     181             :     for (int k = lo.z; k <= hi.z; ++k) {
     182             :     for (int j = lo.y; j <= hi.y; ++j) {
     183             :     for (int i = lo.x; i <= hi.x; ++i) {
     184             :         f(i,j,k);
     185             :     }}}
     186             : }
     187             : 
     188             : template <class F>
     189             : AMREX_ATTRIBUTE_FLATTEN_FOR
     190             : void LoopOnCpu (Box const& bx, int ncomp, F const& f) noexcept
     191             : {
     192             :     const auto lo = amrex::lbound(bx);
     193             :     const auto hi = amrex::ubound(bx);
     194             :     for (int n = 0; n < ncomp; ++n) {
     195             :     for (int k = lo.z; k <= hi.z; ++k) {
     196             :     for (int j = lo.y; j <= hi.y; ++j) {
     197             :     for (int i = lo.x; i <= hi.x; ++i) {
     198             :         f(i,j,k,n);
     199             :     }}}}
     200             : }
     201             : 
     202             : template <class F>
     203             : AMREX_ATTRIBUTE_FLATTEN_FOR
     204             : void LoopConcurrentOnCpu (Box const& bx, F const& f) noexcept
     205             : {
     206             :     const auto lo = amrex::lbound(bx);
     207             :     const auto hi = amrex::ubound(bx);
     208             :     for (int k = lo.z; k <= hi.z; ++k) {
     209             :     for (int j = lo.y; j <= hi.y; ++j) {
     210             :     AMREX_PRAGMA_SIMD
     211             :     for (int i = lo.x; i <= hi.x; ++i) {
     212             :         f(i,j,k);
     213             :     }}}
     214             : }
     215             : 
     216             : template <class F>
     217             : AMREX_ATTRIBUTE_FLATTEN_FOR
     218       23528 : void LoopConcurrentOnCpu (Box const& bx, int ncomp, F const& f) noexcept
     219             : {
     220             :     const auto lo = amrex::lbound(bx);
     221             :     const auto hi = amrex::ubound(bx);
     222       66136 :     for (int n = 0; n < ncomp; ++n) {
     223      294492 :     for (int k = lo.z; k <= hi.z; ++k) {
     224     1710584 :     for (int j = lo.y; j <= hi.y; ++j) {
     225             :     AMREX_PRAGMA_SIMD
     226     9178700 :     for (int i = lo.x; i <= hi.x; ++i) {
     227     7719990 :         f(i,j,k,n);
     228             :     }}}}
     229       23528 : }
     230             : 
     231             : // Implementation of "constexpr for" based on
     232             : // https://artificial-mind.net/blog/2020/10/31/constexpr-for
     233             : //
     234             : // Approximates what one would get from a compile-time
     235             : // unrolling of the loop
     236             : // for (int i = 0; i < N; ++i) {
     237             : //    f(i);
     238             : // }
     239             : //
     240             : // The mechanism is recursive: we evaluate f(i) at the current
     241             : // i and then call the for loop at i+1. f() is a lambda function
     242             : // that provides the body of the loop and takes only an integer
     243             : // i as its argument.
     244             : 
     245             : template<auto I, auto N, class F>
     246             : AMREX_GPU_HOST_DEVICE AMREX_INLINE
     247             : constexpr void constexpr_for (F const& f)
     248             : {
     249             :     if constexpr (I < N) {
     250             :         f(std::integral_constant<decltype(I), I>());
     251             :         constexpr_for<I+1, N>(f);
     252             :     }
     253             : }
     254             : 
     255             : #include <AMReX_Loop.nolint.H>
     256             : 
     257             : }
     258             : 
     259             : #endif

Generated by: LCOV version 1.14