Line data Source code
1 : #ifndef AMREX_GPU_CONTROL_H_ 2 : #define AMREX_GPU_CONTROL_H_ 3 : #include <AMReX_Config.H> 4 : 5 : #include <AMReX_GpuQualifiers.H> 6 : #include <AMReX_GpuTypes.H> 7 : 8 : #include <utility> 9 : 10 : #if defined(AMREX_USE_CUDA) && (__CUDACC_VER_MAJOR__ > 11 || ((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 2))) 11 : #define AMREX_CUDA_GE_11_2 1 12 : #endif 13 : 14 : #if defined(AMREX_USE_HIP) || defined(AMREX_CUDA_GE_11_2) 15 : #define AMREX_GPU_STREAM_ALLOC_SUPPORT 1 16 : #endif 17 : 18 : #if defined(AMREX_USE_HIP) 19 : #define AMREX_HIP_OR_CUDA(a,b) a 20 : #elif defined(AMREX_USE_CUDA) 21 : #define AMREX_HIP_OR_CUDA(a,b) b 22 : #else 23 : #define AMREX_HIP_OR_CUDA(a,b) ((void)0); 24 : #endif 25 : 26 : #if defined(AMREX_USE_HIP) 27 : #define AMREX_HIP_OR_CUDA_OR_SYCL(a,b,c) a 28 : #elif defined(AMREX_USE_CUDA) 29 : #define AMREX_HIP_OR_CUDA_OR_SYCL(a,b,c) b 30 : #elif defined(AMREX_USE_SYCL) 31 : #define AMREX_HIP_OR_CUDA_OR_SYCL(a,b,c) c 32 : #else 33 : #define AMREX_HIP_OR_CUDA_OR_SYCL(a,b,c) ((void)0); 34 : #endif 35 : 36 : #ifdef AMREX_USE_GPU 37 : #define AMREX_GPU_OR_CPU(a,b) a 38 : #else 39 : #define AMREX_GPU_OR_CPU(a,b) b 40 : #endif 41 : 42 : #ifdef AMREX_USE_SYCL 43 : #define AMREX_SYCL_ONLY(a) a 44 : #else 45 : #define AMREX_SYCL_ONLY(a) ((void)0) 46 : #endif 47 : 48 : #ifdef AMREX_USE_SYCL 49 : #if (AMREX_SPACEDIM == 1) 50 : # define AMREX_SYCL_1D_ONLY(a) a 51 : # define AMREX_SYCL_2D_ONLY(a) ((void)0) 52 : # define AMREX_SYCL_3D_ONLY(a) ((void)0) 53 : #elif (AMREX_SPACEDIM == 2) 54 : # define AMREX_SYCL_1D_ONLY(a) ((void)0) 55 : # define AMREX_SYCL_2D_ONLY(a) a 56 : # define AMREX_SYCL_3D_ONLY(a) ((void)0) 57 : #elif (AMREX_SPACEDIM == 3) 58 : # define AMREX_SYCL_1D_ONLY(a) ((void)0) 59 : # define AMREX_SYCL_2D_ONLY(a) ((void)0) 60 : # define AMREX_SYCL_3D_ONLY(a) a 61 : #endif 62 : #else 63 : # define AMREX_SYCL_1D_ONLY(a) ((void)0) 64 : # define AMREX_SYCL_2D_ONLY(a) ((void)0) 65 : # define AMREX_SYCL_3D_ONLY(a) ((void)0) 66 : #endif 67 : 68 : namespace amrex { 69 : enum struct RunOn { Gpu, Cpu, Device=Gpu, Host=Cpu }; 70 : } 71 : 72 : namespace amrex { // NOLINT(modernize-concat-nested-namespaces) 73 : 74 : #ifdef AMREX_USE_HIP 75 : using gpuStream_t = hipStream_t; 76 : #elif defined(AMREX_USE_CUDA) 77 : using gpuStream_t = cudaStream_t; 78 : #endif 79 : 80 : namespace Gpu { 81 : 82 : #if defined(AMREX_USE_GPU) 83 : 84 : extern bool in_launch_region; 85 : 86 : [[nodiscard]] inline bool inLaunchRegion () noexcept { return in_launch_region; } 87 : [[nodiscard]] inline bool notInLaunchRegion () noexcept { return !in_launch_region; } 88 : 89 : /** 90 : * Enable/disable GPU kernel launches. 91 : * 92 : * \note This will only switch from GPU to CPU for kernels launched 93 : * with macros. Functions like `amrex::ParallelFor` will be unaffected. 94 : * Therefore it should not be used for comparing GPU to non-GPU performance 95 : * or behavior. 96 : * 97 : * \code 98 : * Gpu::setLaunchRegion(0); 99 : * 100 : * //... 101 : * 102 : * Gpu::setLaunchRegion(1); 103 : * \endcode 104 : * 105 : * Will disable the launching of GPU kernels between the calls. 106 : */ 107 : 108 : inline bool setLaunchRegion (bool launch) noexcept { 109 : bool r = in_launch_region; 110 : in_launch_region = launch; 111 : return r; 112 : } 113 : 114 : extern bool in_graph_region; 115 : [[nodiscard]] inline bool inGraphRegion() { return (in_graph_region && in_launch_region); } 116 : [[nodiscard]] inline bool notInGraphRegion() { return (!in_graph_region || !in_launch_region); } 117 : 118 : inline bool setGraphRegion (bool graph) { 119 : bool r = in_graph_region; 120 : in_graph_region = graph; 121 : return r; 122 : } 123 : 124 : struct [[nodiscard]] LaunchSafeGuard 125 : { 126 : explicit LaunchSafeGuard (bool flag) noexcept 127 : : m_old(setLaunchRegion(flag)) {} 128 : ~LaunchSafeGuard () { setLaunchRegion(m_old); } 129 : private: 130 : bool m_old; 131 : }; 132 : 133 : struct [[nodiscard]] GraphSafeGuard 134 : { 135 : explicit GraphSafeGuard (bool flag) noexcept 136 : : m_old(setGraphRegion(flag)) {} 137 : ~GraphSafeGuard () { setGraphRegion(m_old); } 138 : private: 139 : bool m_old; 140 : }; 141 : 142 : extern bool in_single_stream_region; 143 : extern bool in_nosync_region; 144 : 145 : [[nodiscard]] inline bool inSingleStreamRegion () noexcept { return in_single_stream_region; } 146 : [[nodiscard]] inline bool inNoSyncRegion () noexcept { return in_nosync_region; } 147 : 148 : inline bool setSingleStreamRegion (bool b) noexcept { 149 : return std::exchange(in_single_stream_region, b); 150 : } 151 : 152 : inline bool setNoSyncRegion (bool b) noexcept { 153 : return std::exchange(in_nosync_region, b); 154 : } 155 : 156 : /** 157 : * This struct provides a RAII-style mechanism for changing the number 158 : * of streams returned by Gpu::numStreams() to a single stream. 159 : */ 160 : struct [[nodiscard]] SingleStreamRegion 161 : { 162 : SingleStreamRegion () noexcept 163 : : m_prev_flag(std::exchange(in_single_stream_region,true)) 164 : {} 165 : 166 : ~SingleStreamRegion () { in_single_stream_region = m_prev_flag; } 167 : 168 : private: 169 : bool m_prev_flag; 170 : }; 171 : 172 : /** 173 : * This struct provides a RAII-style mechanism for disabling GPU 174 : * synchronization in MFITer by default. Note that explicit calls to 175 : * Gpu::steramSynchronize and Gpu::deviceSynchronize still work. 176 : */ 177 : struct [[nodiscard]] NoSyncRegion 178 : { 179 : NoSyncRegion () noexcept 180 : : m_prev_flag(std::exchange(in_nosync_region,true)) 181 : {} 182 : 183 : ~NoSyncRegion () { in_nosync_region = m_prev_flag; } 184 : 185 : private: 186 : bool m_prev_flag; 187 : }; 188 : 189 : #else 190 : 191 : [[nodiscard]] inline constexpr bool inLaunchRegion () { return false; } 192 : [[nodiscard]] inline constexpr bool notInLaunchRegion () { return true; } 193 : [[nodiscard]] inline constexpr bool setLaunchRegion (bool) { return false; } 194 : 195 : [[nodiscard]] inline constexpr bool inGraphRegion () { return false; } 196 : [[nodiscard]] inline constexpr bool notInGraphRegion () { return true; } 197 : [[nodiscard]] inline constexpr bool setGraphRegion (bool) { return false; } 198 : 199 : struct [[nodiscard]] LaunchSafeGuard 200 : { 201 : explicit LaunchSafeGuard (bool) {} 202 : }; 203 : 204 : struct [[nodiscard]] GraphSafeGuard 205 : { 206 : explicit GraphSafeGuard (bool) {} 207 : }; 208 : 209 0 : [[nodiscard]] inline constexpr bool inSingleStreamRegion () { return false; } 210 0 : [[nodiscard]] inline constexpr bool inNoSyncRegion () { return true; } 211 : [[nodiscard]] inline constexpr bool setSingleStreamRegion (bool) { return false; } 212 : [[nodiscard]] inline constexpr bool setNoSyncRegion (bool) { return true; } 213 : struct [[nodiscard]] SingleStreamRegion {}; 214 : struct [[nodiscard]] NoSyncRegion {}; 215 : 216 : #endif 217 : 218 : } 219 : } 220 : 221 : #endif