Line data Source code
1 : #ifndef AMREX_GPU_UTILITY_H_
2 : #define AMREX_GPU_UTILITY_H_
3 : #include <AMReX_Config.H>
4 :
5 : #include <AMReX_GpuQualifiers.H>
6 : #include <AMReX_GpuTypes.H>
7 : #include <AMReX_GpuControl.H>
8 : #include <AMReX_GpuDevice.H>
9 : #include <AMReX_Extension.H>
10 : #include <AMReX_REAL.H>
11 : #include <AMReX_INT.H>
12 : #include <AMReX_Array.H>
13 : #include <AMReX_Array4.H>
14 : #include <iosfwd>
15 : #include <cmath>
16 : #include <cstring>
17 :
18 : #ifdef AMREX_USE_CUDA
19 : #include <cuda.h>
20 : #include <curand_kernel.h> // Is this needed here?
21 : #endif
22 :
23 : namespace amrex {
24 : namespace Gpu {
25 :
26 : template <typename T>
27 : AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
28 : T LDG (Array4<T> const& a, int i, int j, int k) noexcept {
29 : #if defined(AMREX_USE_CUDA)
30 : AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k));))
31 : AMREX_IF_ON_HOST((return a(i,j,k);))
32 : #else
33 : return a(i,j,k);
34 : #endif
35 : }
36 :
37 : template <typename T>
38 : AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
39 : T LDG (Array4<T> const& a, int i, int j, int k, int n) noexcept {
40 : #if defined(AMREX_USE_CUDA)
41 : AMREX_IF_ON_DEVICE((return __ldg(a.ptr(i,j,k,n));))
42 : AMREX_IF_ON_HOST((return a(i,j,k,n);))
43 : #else
44 : return a(i,j,k,n);
45 : #endif
46 : }
47 :
48 : inline bool isManaged (void const* p) noexcept {
49 : #ifdef AMREX_USE_CUDA
50 : CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_IS_MANAGED;
51 : unsigned int is_managed = 0;
52 : void* data[] = { (void*)(&is_managed) };
53 : CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
54 : return r == CUDA_SUCCESS && is_managed;
55 : #elif defined(AMREX_USE_SYCL)
56 : auto type = sycl::get_pointer_type(p,Device::syclContext());
57 : return type == sycl::usm::alloc::shared;
58 : #else
59 : amrex::ignore_unused(p);
60 : return false;
61 : #endif
62 : }
63 :
64 : inline bool isDevicePtr (void const* p) noexcept {
65 : #if defined(AMREX_USE_HIP)
66 : hipPointerAttribute_t attrib;
67 : hipError_t r = hipPointerGetAttributes(&attrib, p);
68 : #if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
69 : return r == hipSuccess && attrib.memoryType == hipMemoryTypeDevice;
70 : #else
71 : return r == hipSuccess && attrib.type == hipMemoryTypeDevice;
72 : #endif // (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
73 : #elif defined(AMREX_USE_CUDA)
74 : CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE;
75 : CUmemorytype mem_type = static_cast<CUmemorytype>(0);
76 : void* data[] = { (void*)(&mem_type) };
77 : CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
78 : return r == CUDA_SUCCESS && mem_type == CU_MEMORYTYPE_DEVICE;
79 : #elif defined(AMREX_USE_SYCL)
80 : auto type = sycl::get_pointer_type(p,Device::syclContext());
81 : return type == sycl::usm::alloc::device;
82 : #else
83 : amrex::ignore_unused(p);
84 : return false;
85 : #endif
86 : }
87 :
88 : inline bool isPinnedPtr (void const* p) noexcept {
89 : #if defined(AMREX_USE_HIP)
90 : hipPointerAttribute_t attrib;
91 : hipError_t r = hipPointerGetAttributes(&attrib, p);
92 : #if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
93 : return r == hipSuccess && attrib.memoryType == hipMemoryTypeHost;
94 : #else
95 : return r == hipSuccess && attrib.type == hipMemoryTypeHost;
96 : #endif // (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
97 : #elif defined(AMREX_USE_CUDA)
98 : CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE;
99 : CUmemorytype mem_type = static_cast<CUmemorytype>(0);
100 : void* data[] = { (void*)(&mem_type) };
101 : CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
102 : return r == CUDA_SUCCESS && mem_type == CU_MEMORYTYPE_HOST;
103 : #elif defined(AMREX_USE_SYCL)
104 : auto type = sycl::get_pointer_type(p,Device::syclContext());
105 : return type == sycl::usm::alloc::host;
106 : #else
107 : amrex::ignore_unused(p);
108 : return false;
109 : #endif
110 : }
111 :
112 : inline bool isGpuPtr (void const* p) noexcept {
113 : #if defined(AMREX_USE_HIP)
114 : if (isManaged(p)) { // We might be using CUDA/NVCC
115 : return true;
116 : } else {
117 : hipPointerAttribute_t attrib;
118 : hipError_t r = hipPointerGetAttributes(&attrib, p);
119 : #if defined(HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
120 : return r == hipSuccess &&
121 : (attrib.memoryType == hipMemoryTypeHost ||
122 : attrib.memoryType == hipMemoryTypeDevice);
123 : #else
124 : return r == hipSuccess &&
125 : (attrib.type == hipMemoryTypeHost ||
126 : attrib.type == hipMemoryTypeDevice);
127 : #endif // (HIP_VERSION_MAJOR) && (HIP_VERSION_MAJOR < 6)
128 : }
129 : #elif defined(AMREX_USE_CUDA)
130 : CUpointer_attribute attrib = CU_POINTER_ATTRIBUTE_MEMORY_TYPE;
131 : CUmemorytype mem_type = static_cast<CUmemorytype>(0);
132 : void* data[] = { (void*)(&mem_type) };
133 : CUresult r = cuPointerGetAttributes(1, &attrib, data, (CUdeviceptr)p);
134 : return r == CUDA_SUCCESS &&
135 : (mem_type == CU_MEMORYTYPE_HOST ||
136 : mem_type == CU_MEMORYTYPE_DEVICE ||
137 : mem_type == CU_MEMORYTYPE_ARRAY ||
138 : mem_type == CU_MEMORYTYPE_UNIFIED);
139 : #elif defined(AMREX_USE_SYCL)
140 : auto type = sycl::get_pointer_type(p,Device::syclContext());
141 : return type != sycl::usm::alloc::unknown;
142 : #else
143 : amrex::ignore_unused(p);
144 : return false;
145 : #endif
146 : }
147 :
148 : template <class T>
149 : AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
150 : bool isnan (T m) noexcept
151 : {
152 : #if defined(AMREX_USE_SYCL)
153 : return sycl::isnan(m);
154 : #else
155 0 : return std::isnan(m);
156 : #endif
157 : }
158 :
159 : template <class T>
160 : AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
161 : bool isinf (T m) noexcept
162 : {
163 : #if defined(AMREX_USE_SYCL)
164 : return sycl::isinf(m);
165 : #else
166 0 : return std::isinf(m);
167 : #endif
168 : }
169 :
170 : struct StreamItInfo
171 : {
172 : bool device_sync;
173 : StreamItInfo () noexcept
174 : : device_sync(!Gpu::inNoSyncRegion()) {}
175 : StreamItInfo& DisableDeviceSync () noexcept {
176 : device_sync = false;
177 : return *this;
178 : }
179 : };
180 :
181 : class StreamIter
182 : {
183 : public:
184 : StreamIter (int n, bool is_thread_safe=true) noexcept;
185 : StreamIter (int n, const StreamItInfo& info, bool is_thread_safe=true) noexcept;
186 :
187 : ~StreamIter ();
188 :
189 : StreamIter (StreamIter const&) = delete;
190 : StreamIter (StreamIter &&) = delete;
191 : void operator= (StreamIter const&) = delete;
192 : void operator= (StreamIter &&) = delete;
193 :
194 : int operator() () const noexcept { return m_i; }
195 :
196 : [[nodiscard]] bool isValid () const noexcept { return m_i < m_n; }
197 :
198 : #if !defined(AMREX_USE_GPU)
199 : void operator++ () noexcept { ++m_i; }
200 : #else
201 : void operator++ () noexcept;
202 : #endif
203 :
204 : private:
205 : void init () noexcept; // NOLINT
206 :
207 : int m_n;
208 : int m_i;
209 : bool m_threadsafe;
210 : bool m_sync;
211 : };
212 :
213 : AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
214 : void* memcpy (void* dest, const void* src, std::size_t count)
215 : {
216 : #if defined(__HIP_DEVICE_COMPILE__) && defined(AMREX_USE_HIP)
217 : return ::memcpy(dest, src, count);
218 : #else
219 : return std::memcpy(dest, src, count);
220 : #endif
221 : }
222 :
223 : } // namespace Gpu
224 :
225 : #ifdef AMREX_USE_GPU
226 : std::ostream& operator<< (std::ostream& os, const dim3& d);
227 : #endif
228 :
229 : using Gpu::isnan;
230 : using Gpu::isinf;
231 :
232 : } // namespace amrex
233 :
234 : #endif
|