Line data Source code
1 : #ifndef AMREX_LOOP_H_
2 : #define AMREX_LOOP_H_
3 : #include <AMReX_Config.H>
4 :
5 : #include <AMReX_Box.H>
6 : #include <AMReX_Extension.H>
7 :
8 : namespace amrex {
9 :
10 : template <class F>
11 : AMREX_GPU_HOST_DEVICE
12 : AMREX_ATTRIBUTE_FLATTEN_FOR
13 : void Loop (Dim3 lo, Dim3 hi, F const& f) noexcept
14 : {
15 : for (int k = lo.z; k <= hi.z; ++k) {
16 : for (int j = lo.y; j <= hi.y; ++j) {
17 : for (int i = lo.x; i <= hi.x; ++i) {
18 : f(i,j,k);
19 : }}}
20 : }
21 :
22 : template <class F>
23 : AMREX_GPU_HOST_DEVICE
24 : AMREX_ATTRIBUTE_FLATTEN_FOR
25 : void Loop (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
26 : {
27 : for (int n = 0; n < ncomp; ++n) {
28 : for (int k = lo.z; k <= hi.z; ++k) {
29 : for (int j = lo.y; j <= hi.y; ++j) {
30 : for (int i = lo.x; i <= hi.x; ++i) {
31 : f(i,j,k,n);
32 : }}}}
33 : }
34 :
35 : template <class F>
36 : AMREX_GPU_HOST_DEVICE
37 : AMREX_ATTRIBUTE_FLATTEN_FOR
38 : void LoopConcurrent (Dim3 lo, Dim3 hi, F const& f) noexcept
39 : {
40 : for (int k = lo.z; k <= hi.z; ++k) {
41 : for (int j = lo.y; j <= hi.y; ++j) {
42 : AMREX_PRAGMA_SIMD
43 : for (int i = lo.x; i <= hi.x; ++i) {
44 : f(i,j,k);
45 : }}}
46 : }
47 :
48 : template <class F>
49 : AMREX_GPU_HOST_DEVICE
50 : AMREX_ATTRIBUTE_FLATTEN_FOR
51 : void LoopConcurrent (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
52 : {
53 : for (int n = 0; n < ncomp; ++n) {
54 : for (int k = lo.z; k <= hi.z; ++k) {
55 : for (int j = lo.y; j <= hi.y; ++j) {
56 : AMREX_PRAGMA_SIMD
57 : for (int i = lo.x; i <= hi.x; ++i) {
58 : f(i,j,k,n);
59 : }}}}
60 : }
61 :
62 : template <class F>
63 : AMREX_GPU_HOST_DEVICE
64 : AMREX_ATTRIBUTE_FLATTEN_FOR
65 : void Loop (Box const& bx, F const& f) noexcept
66 : {
67 : const auto lo = amrex::lbound(bx);
68 : const auto hi = amrex::ubound(bx);
69 : for (int k = lo.z; k <= hi.z; ++k) {
70 : for (int j = lo.y; j <= hi.y; ++j) {
71 : for (int i = lo.x; i <= hi.x; ++i) {
72 : f(i,j,k);
73 : }}}
74 : }
75 :
76 : template <class F>
77 : AMREX_GPU_HOST_DEVICE
78 : AMREX_ATTRIBUTE_FLATTEN_FOR
79 : void Loop (Box const& bx, int ncomp, F const& f) noexcept
80 : {
81 : const auto lo = amrex::lbound(bx);
82 : const auto hi = amrex::ubound(bx);
83 : for (int n = 0; n < ncomp; ++n) {
84 : for (int k = lo.z; k <= hi.z; ++k) {
85 : for (int j = lo.y; j <= hi.y; ++j) {
86 : for (int i = lo.x; i <= hi.x; ++i) {
87 : f(i,j,k,n);
88 : }}}}
89 : }
90 :
91 : template <class F>
92 : AMREX_GPU_HOST_DEVICE
93 : AMREX_ATTRIBUTE_FLATTEN_FOR
94 : void LoopConcurrent (Box const& bx, F const& f) noexcept
95 : {
96 : const auto lo = amrex::lbound(bx);
97 : const auto hi = amrex::ubound(bx);
98 : for (int k = lo.z; k <= hi.z; ++k) {
99 : for (int j = lo.y; j <= hi.y; ++j) {
100 : AMREX_PRAGMA_SIMD
101 : for (int i = lo.x; i <= hi.x; ++i) {
102 : f(i,j,k);
103 : }}}
104 : }
105 :
106 : template <class F>
107 : AMREX_GPU_HOST_DEVICE
108 : AMREX_ATTRIBUTE_FLATTEN_FOR
109 : void LoopConcurrent (Box const& bx, int ncomp, F const& f) noexcept
110 : {
111 : const auto lo = amrex::lbound(bx);
112 : const auto hi = amrex::ubound(bx);
113 : for (int n = 0; n < ncomp; ++n) {
114 : for (int k = lo.z; k <= hi.z; ++k) {
115 : for (int j = lo.y; j <= hi.y; ++j) {
116 : AMREX_PRAGMA_SIMD
117 : for (int i = lo.x; i <= hi.x; ++i) {
118 : f(i,j,k,n);
119 : }}}}
120 : }
121 :
122 : // The functions above are __host__ __device__ functions. If f is not a
123 : // __host__ __device__ function, we will get warning about calling __host__
124 : // function from a __host__ __device__ function. This is ugly. To get rid
125 : // of the warning, we have to use the functions below for those situations.
126 :
127 : template <class F>
128 : AMREX_ATTRIBUTE_FLATTEN_FOR
129 : void LoopOnCpu (Dim3 lo, Dim3 hi, F const& f) noexcept
130 : {
131 : for (int k = lo.z; k <= hi.z; ++k) {
132 : for (int j = lo.y; j <= hi.y; ++j) {
133 : for (int i = lo.x; i <= hi.x; ++i) {
134 : f(i,j,k);
135 : }}}
136 : }
137 :
138 : template <class F>
139 : AMREX_ATTRIBUTE_FLATTEN_FOR
140 : void LoopOnCpu (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
141 : {
142 : for (int n = 0; n < ncomp; ++n) {
143 : for (int k = lo.z; k <= hi.z; ++k) {
144 : for (int j = lo.y; j <= hi.y; ++j) {
145 : for (int i = lo.x; i <= hi.x; ++i) {
146 : f(i,j,k,n);
147 : }}}}
148 : }
149 :
150 : template <class F>
151 : AMREX_ATTRIBUTE_FLATTEN_FOR
152 : void LoopConcurrentOnCpu (Dim3 lo, Dim3 hi, F const& f) noexcept
153 : {
154 : for (int k = lo.z; k <= hi.z; ++k) {
155 : for (int j = lo.y; j <= hi.y; ++j) {
156 : AMREX_PRAGMA_SIMD
157 : for (int i = lo.x; i <= hi.x; ++i) {
158 : f(i,j,k);
159 : }}}
160 : }
161 :
162 : template <class F>
163 : AMREX_ATTRIBUTE_FLATTEN_FOR
164 : void LoopConcurrentOnCpu (Dim3 lo, Dim3 hi, int ncomp, F const& f) noexcept
165 : {
166 : for (int n = 0; n < ncomp; ++n) {
167 : for (int k = lo.z; k <= hi.z; ++k) {
168 : for (int j = lo.y; j <= hi.y; ++j) {
169 : AMREX_PRAGMA_SIMD
170 : for (int i = lo.x; i <= hi.x; ++i) {
171 : f(i,j,k,n);
172 : }}}}
173 : }
174 :
175 : template <class F>
176 : AMREX_ATTRIBUTE_FLATTEN_FOR
177 : void LoopOnCpu (Box const& bx, F const& f) noexcept
178 : {
179 : const auto lo = amrex::lbound(bx);
180 : const auto hi = amrex::ubound(bx);
181 : for (int k = lo.z; k <= hi.z; ++k) {
182 : for (int j = lo.y; j <= hi.y; ++j) {
183 : for (int i = lo.x; i <= hi.x; ++i) {
184 : f(i,j,k);
185 : }}}
186 : }
187 :
188 : template <class F>
189 : AMREX_ATTRIBUTE_FLATTEN_FOR
190 : void LoopOnCpu (Box const& bx, int ncomp, F const& f) noexcept
191 : {
192 : const auto lo = amrex::lbound(bx);
193 : const auto hi = amrex::ubound(bx);
194 : for (int n = 0; n < ncomp; ++n) {
195 : for (int k = lo.z; k <= hi.z; ++k) {
196 : for (int j = lo.y; j <= hi.y; ++j) {
197 : for (int i = lo.x; i <= hi.x; ++i) {
198 : f(i,j,k,n);
199 : }}}}
200 : }
201 :
202 : template <class F>
203 : AMREX_ATTRIBUTE_FLATTEN_FOR
204 : void LoopConcurrentOnCpu (Box const& bx, F const& f) noexcept
205 : {
206 : const auto lo = amrex::lbound(bx);
207 : const auto hi = amrex::ubound(bx);
208 : for (int k = lo.z; k <= hi.z; ++k) {
209 : for (int j = lo.y; j <= hi.y; ++j) {
210 : AMREX_PRAGMA_SIMD
211 : for (int i = lo.x; i <= hi.x; ++i) {
212 : f(i,j,k);
213 : }}}
214 : }
215 :
216 : template <class F>
217 : AMREX_ATTRIBUTE_FLATTEN_FOR
218 10219000 : void LoopConcurrentOnCpu (Box const& bx, int ncomp, F const& f) noexcept
219 : {
220 : const auto lo = amrex::lbound(bx);
221 : const auto hi = amrex::ubound(bx);
222 24438126 : for (int n = 0; n < ncomp; ++n) {
223 28438272 : for (int k = lo.z; k <= hi.z; ++k) {
224 118040562 : for (int j = lo.y; j <= hi.y; ++j) {
225 : AMREX_PRAGMA_SIMD
226 493690471 : for (int i = lo.x; i <= hi.x; ++i) {
227 389868753 : f(i,j,k,n);
228 : }}}}
229 10219000 : }
230 :
231 : // Implementation of "constexpr for" based on
232 : // https://artificial-mind.net/blog/2020/10/31/constexpr-for
233 : //
234 : // Approximates what one would get from a compile-time
235 : // unrolling of the loop
236 : // for (int i = 0; i < N; ++i) {
237 : // f(i);
238 : // }
239 : //
240 : // The mechanism is recursive: we evaluate f(i) at the current
241 : // i and then call the for loop at i+1. f() is a lambda function
242 : // that provides the body of the loop and takes only an integer
243 : // i as its argument.
244 :
245 : template<auto I, auto N, class F>
246 : AMREX_GPU_HOST_DEVICE AMREX_INLINE
247 : constexpr void constexpr_for (F const& f)
248 : {
249 : if constexpr (I < N) {
250 : f(std::integral_constant<decltype(I), I>());
251 : constexpr_for<I+1, N>(f);
252 : }
253 : }
254 :
255 : #include <AMReX_Loop.nolint.H>
256 :
257 : }
258 :
259 : #endif
|