LCOV - code coverage report
Current view: top level - ext/amrex/2d-coverage-g++-24.08/include - AMReX_GpuControl.H (source / functions) Hit Total Coverage
Test: coverage_merged.info Lines: 0 2 0.0 %
Date: 2024-11-18 05:28:54 Functions: 0 2 0.0 %

          Line data    Source code
       1             : #ifndef AMREX_GPU_CONTROL_H_
       2             : #define AMREX_GPU_CONTROL_H_
       3             : #include <AMReX_Config.H>
       4             : 
       5             : #include <AMReX_GpuQualifiers.H>
       6             : #include <AMReX_GpuTypes.H>
       7             : 
       8             : #include <utility>
       9             : 
      10             : #if defined(AMREX_USE_CUDA) && (__CUDACC_VER_MAJOR__ > 11 || ((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 2)))
      11             : #define AMREX_CUDA_GE_11_2 1
      12             : #endif
      13             : 
      14             : #if defined(AMREX_USE_HIP) || defined(AMREX_CUDA_GE_11_2)
      15             : #define AMREX_GPU_STREAM_ALLOC_SUPPORT 1
      16             : #endif
      17             : 
      18             : #if defined(AMREX_USE_HIP)
      19             : #define AMREX_HIP_OR_CUDA(a,b) a
      20             : #elif defined(AMREX_USE_CUDA)
      21             : #define AMREX_HIP_OR_CUDA(a,b) b
      22             : #else
      23             : #define AMREX_HIP_OR_CUDA(a,b) ((void)0);
      24             : #endif
      25             : 
      26             : #if defined(AMREX_USE_HIP)
      27             : #define AMREX_HIP_OR_CUDA_OR_SYCL(a,b,c) a
      28             : #elif defined(AMREX_USE_CUDA)
      29             : #define AMREX_HIP_OR_CUDA_OR_SYCL(a,b,c) b
      30             : #elif defined(AMREX_USE_SYCL)
      31             : #define AMREX_HIP_OR_CUDA_OR_SYCL(a,b,c) c
      32             : #else
      33             : #define AMREX_HIP_OR_CUDA_OR_SYCL(a,b,c) ((void)0);
      34             : #endif
      35             : 
      36             : #ifdef AMREX_USE_GPU
      37             : #define AMREX_GPU_OR_CPU(a,b) a
      38             : #else
      39             : #define AMREX_GPU_OR_CPU(a,b) b
      40             : #endif
      41             : 
      42             : #ifdef AMREX_USE_SYCL
      43             : #define AMREX_SYCL_ONLY(a) a
      44             : #else
      45             : #define AMREX_SYCL_ONLY(a) ((void)0)
      46             : #endif
      47             : 
      48             : #ifdef AMREX_USE_SYCL
      49             : #if (AMREX_SPACEDIM == 1)
      50             : #  define AMREX_SYCL_1D_ONLY(a) a
      51             : #  define AMREX_SYCL_2D_ONLY(a) ((void)0)
      52             : #  define AMREX_SYCL_3D_ONLY(a) ((void)0)
      53             : #elif (AMREX_SPACEDIM == 2)
      54             : #  define AMREX_SYCL_1D_ONLY(a) ((void)0)
      55             : #  define AMREX_SYCL_2D_ONLY(a) a
      56             : #  define AMREX_SYCL_3D_ONLY(a) ((void)0)
      57             : #elif (AMREX_SPACEDIM == 3)
      58             : #  define AMREX_SYCL_1D_ONLY(a) ((void)0)
      59             : #  define AMREX_SYCL_2D_ONLY(a) ((void)0)
      60             : #  define AMREX_SYCL_3D_ONLY(a) a
      61             : #endif
      62             : #else
      63             : #  define AMREX_SYCL_1D_ONLY(a) ((void)0)
      64             : #  define AMREX_SYCL_2D_ONLY(a) ((void)0)
      65             : #  define AMREX_SYCL_3D_ONLY(a) ((void)0)
      66             : #endif
      67             : 
      68             : namespace amrex {
      69             :     enum struct RunOn { Gpu, Cpu, Device=Gpu, Host=Cpu };
      70             : }
      71             : 
      72             : namespace amrex { // NOLINT(modernize-concat-nested-namespaces)
      73             : 
      74             : #ifdef AMREX_USE_HIP
      75             : using gpuStream_t = hipStream_t;
      76             : #elif defined(AMREX_USE_CUDA)
      77             : using gpuStream_t = cudaStream_t;
      78             : #endif
      79             : 
      80             : namespace Gpu {
      81             : 
      82             : #if defined(AMREX_USE_GPU)
      83             : 
      84             :     extern bool in_launch_region;
      85             : 
      86             :     [[nodiscard]] inline bool inLaunchRegion () noexcept { return in_launch_region; }
      87             :     [[nodiscard]] inline bool notInLaunchRegion () noexcept { return !in_launch_region; }
      88             : 
      89             :     /**
      90             :      * Enable/disable GPU kernel launches.
      91             :      *
      92             :      * \note This will only switch from GPU to CPU for kernels launched
      93             :      * with macros. Functions like `amrex::ParallelFor` will be unaffected.
      94             :      * Therefore it should not be used for comparing GPU to non-GPU performance
      95             :      * or behavior.
      96             :      *
      97             :      * \code
      98             :      *  Gpu::setLaunchRegion(0);
      99             :      *
     100             :      *  //...
     101             :      *
     102             :      *  Gpu::setLaunchRegion(1);
     103             :      * \endcode
     104             :      *
     105             :      * Will disable the launching of GPU kernels between the calls.
     106             :      */
     107             : 
     108             :     inline bool setLaunchRegion (bool launch) noexcept {
     109             :         bool r = in_launch_region;
     110             :         in_launch_region = launch;
     111             :         return r;
     112             :     }
     113             : 
     114             :     extern bool in_graph_region;
     115             :     [[nodiscard]] inline bool inGraphRegion() { return (in_graph_region && in_launch_region); }
     116             :     [[nodiscard]] inline bool notInGraphRegion() { return (!in_graph_region || !in_launch_region); }
     117             : 
     118             :     inline bool setGraphRegion (bool graph) {
     119             :         bool r = in_graph_region;
     120             :         in_graph_region = graph;
     121             :         return r;
     122             :     }
     123             : 
     124             :     struct [[nodiscard]] LaunchSafeGuard
     125             :     {
     126             :         explicit LaunchSafeGuard (bool flag) noexcept
     127             :             : m_old(setLaunchRegion(flag)) {}
     128             :         ~LaunchSafeGuard () { setLaunchRegion(m_old); }
     129             :     private:
     130             :         bool m_old;
     131             :     };
     132             : 
     133             :     struct [[nodiscard]] GraphSafeGuard
     134             :     {
     135             :         explicit GraphSafeGuard (bool flag) noexcept
     136             :             : m_old(setGraphRegion(flag)) {}
     137             :         ~GraphSafeGuard () { setGraphRegion(m_old); }
     138             :     private:
     139             :         bool m_old;
     140             :     };
     141             : 
     142             :     extern bool in_single_stream_region;
     143             :     extern bool in_nosync_region;
     144             : 
     145             :     [[nodiscard]] inline bool inSingleStreamRegion () noexcept { return in_single_stream_region; }
     146             :     [[nodiscard]] inline bool inNoSyncRegion () noexcept { return in_nosync_region; }
     147             : 
     148             :     inline bool setSingleStreamRegion (bool b) noexcept {
     149             :         return std::exchange(in_single_stream_region, b);
     150             :     }
     151             : 
     152             :     inline bool setNoSyncRegion (bool b) noexcept {
     153             :         return std::exchange(in_nosync_region, b);
     154             :     }
     155             : 
     156             :     /**
     157             :      * This struct provides a RAII-style mechanism for changing the number
     158             :      * of streams returned by Gpu::numStreams() to a single stream.
     159             :      */
     160             :     struct [[nodiscard]] SingleStreamRegion
     161             :     {
     162             :         SingleStreamRegion () noexcept
     163             :             : m_prev_flag(std::exchange(in_single_stream_region,true))
     164             :         {}
     165             : 
     166             :         ~SingleStreamRegion () { in_single_stream_region = m_prev_flag; }
     167             : 
     168             :     private:
     169             :         bool m_prev_flag;
     170             :     };
     171             : 
     172             :     /**
     173             :      * This struct provides a RAII-style mechanism for disabling GPU
     174             :      * synchronization in MFITer by default.  Note that explicit calls to
     175             :      * Gpu::steramSynchronize and Gpu::deviceSynchronize still work.
     176             :      */
     177             :     struct [[nodiscard]] NoSyncRegion
     178             :     {
     179             :         NoSyncRegion () noexcept
     180             :             : m_prev_flag(std::exchange(in_nosync_region,true))
     181             :         {}
     182             : 
     183             :         ~NoSyncRegion () { in_nosync_region = m_prev_flag; }
     184             : 
     185             :     private:
     186             :         bool m_prev_flag;
     187             :     };
     188             : 
     189             : #else
     190             : 
     191             :     [[nodiscard]] inline constexpr bool inLaunchRegion () { return false; }
     192             :     [[nodiscard]] inline constexpr bool notInLaunchRegion () { return true; }
     193             :     [[nodiscard]] inline constexpr bool setLaunchRegion (bool) { return false; }
     194             : 
     195             :     [[nodiscard]] inline constexpr bool inGraphRegion () { return false; }
     196             :     [[nodiscard]] inline constexpr bool notInGraphRegion () { return true; }
     197             :     [[nodiscard]] inline constexpr bool setGraphRegion (bool) { return false; }
     198             : 
     199             :     struct [[nodiscard]] LaunchSafeGuard
     200             :     {
     201             :         explicit LaunchSafeGuard (bool) {}
     202             :     };
     203             : 
     204             :     struct [[nodiscard]] GraphSafeGuard
     205             :     {
     206             :         explicit GraphSafeGuard (bool) {}
     207             :     };
     208             : 
     209           0 :     [[nodiscard]] inline constexpr bool inSingleStreamRegion () { return false; }
     210           0 :     [[nodiscard]] inline constexpr bool inNoSyncRegion () { return true; }
     211             :     [[nodiscard]] inline constexpr bool setSingleStreamRegion (bool) { return false; }
     212             :     [[nodiscard]] inline constexpr bool setNoSyncRegion (bool) { return true; }
     213             :     struct [[nodiscard]] SingleStreamRegion {};
     214             :     struct [[nodiscard]] NoSyncRegion {};
     215             : 
     216             : #endif
     217             : 
     218             : }
     219             : }
     220             : 
     221             : #endif

Generated by: LCOV version 1.14