LCOV - code coverage report
Current view: top level - ext/amrex/3d-coverage-g++-24.08/include - AMReX_DistributionMapping.H (source / functions) Hit Total Coverage
Test: coverage_merged.info Lines: 4 8 50.0 %
Date: 2024-11-18 05:28:54 Functions: 4 8 50.0 %

          Line data    Source code
       1             : #ifndef BL_DISTRIBUTIONMAPPING_H
       2             : #define BL_DISTRIBUTIONMAPPING_H
       3             : #include <AMReX_Config.H>
       4             : 
       5             : #include <AMReX.H>
       6             : #include <AMReX_Array.H>
       7             : #include <AMReX_Vector.H>
       8             : #include <AMReX_Box.H>
       9             : #include <AMReX_REAL.H>
      10             : #include <AMReX_ParallelDescriptor.H>
      11             : 
      12             : #include <map>
      13             : #include <limits>
      14             : #include <memory>
      15             : #include <cstddef>
      16             : #include <iosfwd>
      17             : 
      18             : namespace amrex {
      19             : 
      20             : class BoxArray;
      21             : class MultiFab;
      22             : template <typename T> class FabArray;
      23             : template <typename T> class LayoutData;
      24             : class FabArrayBase;
      25             : 
      26             : /**
      27             : * \brief Calculates the distribution of FABs to MPI processes.
      28             : *
      29             : *  This class calculates the distribution of FABs to MPI processes in a
      30             : *  FabArray in a multi-processor environment.  By distribution is meant what
      31             : *  MPI process in the multi-processor environment owns what FAB.  Only the BoxArray
      32             : *  on which the FabArray is built is used in determining the distribution.
      33             : *  The three types of distributions supported are round-robin, knapsack, and SFC.
      34             : *  In the round-robin distribution FAB i is owned by CPU i%N where N is total
      35             : *  number of CPUs.  In the knapsack distribution the FABs are partitioned
      36             : *  across CPUs such that the total volume of the Boxes in the underlying
      37             : *  BoxArray are as equal across CPUs as is possible.  The SFC distribution is
      38             : *  based on a space filling curve.
      39             : */
      40       11120 : class DistributionMapping
      41             : {
      42             :   public:
      43             : 
      44             :     template <typename T> friend class FabArray;
      45             :     friend class FabArrayBase;
      46             : 
      47             :     //! The distribution strategies
      48             :     enum Strategy { UNDEFINED = -1, ROUNDROBIN, KNAPSACK, SFC, RRSFC };
      49             : 
      50             :     //! The default constructor.
      51             :     DistributionMapping () noexcept;
      52             : 
      53             :     //! The copy constructor.
      54         600 :     DistributionMapping (const DistributionMapping& rhs) = default;
      55             : 
      56             :     //! The copy assignment operator.
      57             :     DistributionMapping& operator= (const DistributionMapping& rhs) = default;
      58             : 
      59             :     //! The move constructor.
      60           0 :     DistributionMapping (DistributionMapping&& rhs) noexcept = default;
      61             : 
      62             :     //! The move assignment operator.
      63             :     DistributionMapping& operator= (DistributionMapping&& rhs) noexcept = default;
      64             : 
      65             :    //! The destructor.
      66      454073 :     ~DistributionMapping() noexcept = default;
      67             : 
      68             :     /**
      69             :     * \brief Create an object with the specified mapping.
      70             :     */
      71             :     explicit DistributionMapping (const Vector<int>& pmap);
      72             :     explicit DistributionMapping (Vector<int>&& pmap) noexcept;
      73             :     //! Build mapping out of BoxArray over nprocs processors.
      74             :     explicit DistributionMapping (const BoxArray& boxes,
      75             :                                   int nprocs = ParallelDescriptor::NProcs());
      76             :     /**
      77             :     * \brief This is a very specialized distribution map.
      78             :     * Do NOT use it unless you really understand what it does.
      79             :     */
      80             :     DistributionMapping (const DistributionMapping& d1,
      81             :                          const DistributionMapping& d2);
      82             : 
      83             :     /**
      84             :     * \brief Build mapping out of BoxArray over nprocs processors.
      85             :     * You need to call this if you built your DistributionMapping
      86             :     * with the default constructor.
      87             :     */
      88             :     void define (const BoxArray& boxes, int nprocs = ParallelDescriptor::NProcs());
      89             :     /**
      90             :     * \brief Build mapping out of an Array of ints. You need to call this if you
      91             :     * built your DistributionMapping with the default constructor.
      92             :     */
      93             :     void define (const Vector<int>& pmap);
      94             :     void define (Vector<int>&& pmap) noexcept;
      95             :     /**
      96             :     * \brief Returns a constant reference to the mapping of boxes in the
      97             :     * underlying BoxArray to the CPU that holds the FAB on that Box.
      98             :     * ProcessorMap()[i] is an integer in the interval [0, NCPU) where
      99             :     * NCPU is the number of CPUs being used.
     100             :     */
     101             :     [[nodiscard]] const Vector<int>& ProcessorMap () const noexcept;
     102             : 
     103             :     //! Length of the underlying processor map.
     104             :     [[nodiscard]] Long size () const noexcept { return Long(m_ref->m_pmap.size()); }
     105             :     [[nodiscard]] Long capacity () const noexcept { return Long(m_ref->m_pmap.capacity()); }
     106             :     [[nodiscard]] bool empty () const noexcept { return m_ref->m_pmap.empty(); }
     107             : 
     108             :     //! Number of references to this DistributionMapping
     109             :     [[nodiscard]] Long linkCount () const noexcept { return m_ref.use_count(); }
     110             : 
     111             :     //! Equivalent to ProcessorMap()[index].
     112             :     [[nodiscard]] int operator[] (int index) const noexcept { return m_ref->m_pmap[index]; }
     113             : 
     114             :     std::istream& readFrom (std::istream& is);
     115             : 
     116             :     std::ostream& writeOn (std::ostream& os) const;
     117             : 
     118             :     //! Set/get the distribution strategy.
     119             :     static void strategy (Strategy how);
     120             : 
     121             :     static Strategy strategy ();
     122             : 
     123             :     //! Set/get the space filling curve threshold.
     124             :     static void SFC_Threshold (int n);
     125             : 
     126             :     static int SFC_Threshold ();
     127             : 
     128             :     //! Are the distributions equal?
     129             :     bool operator== (const DistributionMapping& rhs) const noexcept;
     130             : 
     131             :     //! Are the distributions different?
     132             :     bool operator!= (const DistributionMapping& rhs) const noexcept;
     133             : 
     134             :     void SFCProcessorMap (const BoxArray& boxes, const std::vector<Long>& wgts, int nprocs,
     135             :                           bool sort=true);
     136             :     void SFCProcessorMap (const BoxArray& boxes, const std::vector<Long>& wgts, int nprocs,
     137             :                           Real& efficiency, bool sort=true);
     138             :     void KnapSackProcessorMap (const std::vector<Long>& wgts, int nprocs,
     139             :                                Real* efficiency=nullptr,
     140             :                                bool do_full_knapsack=true,
     141             :                                int nmax=std::numeric_limits<int>::max(),
     142             :                                bool sort=true);
     143             :     void KnapSackProcessorMap (const DistributionMapping& olddm,
     144             :                                const std::vector<Long>& wgts, Real keep_ratio,
     145             :                                Real& old_efficiency, Real& new_efficiency,
     146             :                                int nmax=std::numeric_limits<int>::max());
     147             :     void RoundRobinProcessorMap (int nboxes, int nprocs, bool sort=true);
     148             :     void RoundRobinProcessorMap (const std::vector<Long>& wgts, int nprocs, bool sort=true);
     149             : 
     150             :     /**
     151             :     * \brief Initializes distribution strategy from ParmParse.
     152             :     *
     153             :     * ParmParse options are:
     154             :     *
     155             :     *   DistributionMapping.strategy = ROUNDROBIN
     156             :     *   DistributionMapping.strategy = KNAPSACK
     157             :     *   DistributionMapping.strategy = SFC
     158             :     *   DistributionMapping.strategy = RRFC
     159             :     */
     160             :     static void Initialize ();
     161             : 
     162             :     static void Finalize ();
     163             : 
     164             :     static bool SameRefs (const DistributionMapping& lhs,
     165             :                           const DistributionMapping& rhs)
     166             :           { return lhs.m_ref == rhs.m_ref; }
     167             : 
     168             :     static DistributionMapping makeKnapSack (const MultiFab& weight,
     169             :                                              int nmax=std::numeric_limits<int>::max());
     170             :     static DistributionMapping makeKnapSack (const MultiFab& weight, Real& eff,
     171             :                                              int nmax=std::numeric_limits<int>::max());
     172             :     static DistributionMapping makeKnapSack (const Vector<Real>& rcost,
     173             :                                              int nmax=std::numeric_limits<int>::max());
     174             :     static DistributionMapping makeKnapSack (const Vector<Real>& rcost, Real& eff,
     175             :                                              int nmax=std::numeric_limits<int>::max(),
     176             :                                              bool sort=true);
     177             : 
     178             :     /** \brief Computes a new distribution mapping by distributing input costs
     179             :      * according to the `knapsack` algorithm.
     180             :      * @param[in] rcost_local LayoutData of costs; contains, e.g., costs for the
     181             :      *            local boxes in the FAB array, corresponding indices in the global
     182             :      *            indices in the FAB array, and the distribution mapping
     183             :      * @param[in,out] currentEfficiency writes the efficiency (i.e., mean cost over
     184             :      *                all MPI ranks, normalized to the max cost) given the current
     185             :      *                distribution mapping
     186             :      * @param[in,out] proposedEfficiency writes the efficiency for the proposed
     187             :      *                distribution mapping
     188             :      * @param[in] nmax the maximum number of boxes that can be assigned to any
     189             :      *            MPI rank by the knapsack algorithm
     190             :      * @param[in] broadcastToAll controls whether to transmit the proposed
     191             :      *            distribution mapping to all other processes; setting this to
     192             :      *            false allows to, e.g., test whether the proposed distribution
     193             :      *            mapping is an improvement relative to the current distribution
     194             :      *            mapping, before deciding to broadcast the proposed distribution
     195             :      *            mapping
     196             :      * @param[in] root which process to collect the local costs from others and
     197             :      *            compute the proposed distribution mapping
     198             :      * @param[in] keep_ratio controls the fraction of load that should be kept on
     199             :      *            the original process.
     200             :      * @return the proposed load-balanced distribution mapping
     201             :      */
     202             :     static DistributionMapping makeKnapSack (const LayoutData<Real>& rcost_local,
     203             :                                              Real& currentEfficiency, Real& proposedEfficiency,
     204             :                                              int nmax=std::numeric_limits<int>::max(),
     205             :                                              bool broadcastToAll=true,
     206             :                                              int root=ParallelDescriptor::IOProcessorNumber(),
     207             :                                              Real keep_ratio = Real(0.0));
     208             : 
     209             :     static DistributionMapping makeRoundRobin (const MultiFab& weight);
     210             :     static DistributionMapping makeSFC (const MultiFab& weight, bool sort=true);
     211             :     static DistributionMapping makeSFC (const MultiFab& weight, Real& eff, bool sort=true);
     212             :     static DistributionMapping makeSFC (const Vector<Real>& rcost,
     213             :                                         const BoxArray& ba, bool sort=true);
     214             :     static DistributionMapping makeSFC (const Vector<Real>& rcost,
     215             :                                         const BoxArray& ba, Real& eff, bool sort=true);
     216             : 
     217             :     /** \brief Computes a new distribution mapping by distributing input costs
     218             :      * according to a `space filling curve` (SFC) algorithm.
     219             :      * @param[in] rcost_local LayoutData of costs; contains, e.g., costs for the
     220             :      *            local boxes in the FAB array, corresponding indices in the global
     221             :      *            indices in the FAB array, and the distribution mapping
     222             :      * @param[in,out] currentEfficiency writes the efficiency (i.e., mean cost over
     223             :      *                all MPI ranks, normalized to the max cost) given the current
     224             :      *                distribution mapping
     225             :      * @param[in,out] proposedEfficiency writes the efficiency for the proposed
     226             :      *                distribution mapping
     227             :      * @param[in] broadcastToAll controls whether to transmit the proposed
     228             :      *            distribution mapping to all other processes; setting this to
     229             :      *            false allows to, e.g., test whether the proposed distribution
     230             :      *            mapping is an improvement relative to the current distribution
     231             :      *            mapping, before deciding to broadcast the proposed distribution
     232             :      *            mapping
     233             :      * @param[in] root which process to collect the local costs from others and
     234             :      *            compute the proposed distribution mapping
     235             :      * @return the proposed load-balanced distribution mapping
     236             :      */
     237             :     static DistributionMapping makeSFC (const LayoutData<Real>& rcost_local,
     238             :                                         Real& currentEfficiency, Real& proposedEfficiency,
     239             :                                         bool broadcastToAll=true,
     240             :                                         int root=ParallelDescriptor::IOProcessorNumber());
     241             : 
     242             :     /**
     243             :     * if use_box_vol is true, weight boxes by their volume in Distribute
     244             :     * otherwise, all boxes will be treated with equal weight
     245             :     */
     246             :     static std::vector<std::vector<int> > makeSFC (const BoxArray& ba,
     247             :                                                    bool use_box_vol=true,
     248             :                                                    int nprocs=ParallelContext::NProcsSub() );
     249             : 
     250             :     /** \brief Computes the average cost per MPI rank given a distribution mapping
     251             :      * global cost vector.
     252             :      * @param[in] dm distribution mapping (mapping from FAB to MPI processes)
     253             :      * @param[in] cost vector giving mapping from FAB to the corresponding cost
     254             :      * @param[in,out] efficiency average cost per MPI process, as computed from
     255             :      *                the given distribution mapping and cost
     256             :      */
     257             :     template <typename T>
     258             :     static void ComputeDistributionMappingEfficiency (const DistributionMapping& dm,
     259             :                                                       const std::vector<T>& cost,
     260             :                                                       Real* efficiency);
     261             : 
     262             : private:
     263             : 
     264             :     const Vector<int>& getIndexArray ();
     265             :     const std::vector<bool>& getOwnerShip ();
     266             : 
     267             :     //! Ways to create the processor map.
     268             :     void RoundRobinProcessorMap (const BoxArray& boxes, int nprocs);
     269             :     void KnapSackProcessorMap   (const BoxArray& boxes, int nprocs);
     270             :     void SFCProcessorMap        (const BoxArray& boxes, int nprocs);
     271             :     void RRSFCProcessorMap      (const BoxArray& boxes, int nprocs);
     272             : 
     273             :     using LIpair = std::pair<Long,int>;
     274             : 
     275             :     struct LIpairLT
     276             :     {
     277             :         bool operator () (const LIpair& lhs,
     278             :                           const LIpair& rhs) const noexcept
     279             :             {
     280             :                 return lhs.first < rhs.first;
     281             :             }
     282             :     };
     283             : 
     284             :     struct LIpairGT
     285             :     {
     286             :         bool operator () (const LIpair& lhs,
     287             :                           const LIpair& rhs) const noexcept
     288             :             {
     289             :                 return lhs.first > rhs.first;
     290             :             }
     291             :     };
     292             : 
     293             :     static void Sort (std::vector<LIpair>& vec, bool reverse);
     294             : 
     295             :     void RoundRobinDoIt (int                  nboxes,
     296             :                          int                  nprocs,
     297             :                          std::vector<LIpair>* LIpairV = nullptr,
     298             :                          bool                 sort = true);
     299             : 
     300             :     void KnapSackDoIt (const std::vector<Long>& wgts,
     301             :                        int                      nprocs,
     302             :                        Real&                    efficiency,
     303             :                        bool                     do_full_knapsack,
     304             :                        int                      nmax=std::numeric_limits<int>::max(),
     305             :                        bool                     sort=true);
     306             : 
     307             :     void SFCProcessorMapDoIt (const BoxArray&          boxes,
     308             :                               const std::vector<Long>& wgts,
     309             :                               int                      nprocs,
     310             :                               bool                     sort=true,
     311             :                               Real*                    efficiency=nullptr);
     312             : 
     313             :     void RRSFCDoIt           (const BoxArray&          boxes,
     314             :                               int                      nprocs);
     315             : 
     316             :     //! Least used ordering of CPUs (by # of bytes of FAB data).
     317             :     static void LeastUsedCPUs (int nprocs, Vector<int>& result);
     318             :     /**
     319             :     * \brief rteam: Least used ordering of Teams
     320             :     * rworker[i]: Least used ordering of team workers for Team i
     321             :     */
     322             :     static void LeastUsedTeams (Vector<int>& rteam, Vector<Vector<int> >& rworker, int nteams, int nworkers);
     323             : 
     324             :     //! A useful typedef.
     325             :     using PVMF = void (DistributionMapping::*)(const BoxArray &, int);
     326             : 
     327             :     //! Everyone uses the same Strategy -- defaults to SFC.
     328             :     static Strategy m_Strategy;
     329             :     /**
     330             :     * \brief Pointer to one of the CreateProcessorMap() functions.
     331             :     * Corresponds to the one specified by m_Strategy.
     332             :     */
     333             :     static PVMF m_BuildMap;
     334             : 
     335             :     struct Ref
     336             :     {
     337             :         friend class DistributionMapping;
     338             : 
     339             :         //! Constructors to match those in DistributionMapping ....
     340             :         Ref () = default;
     341             : 
     342             :         explicit Ref (int len) : m_pmap(len) {}
     343             : 
     344             :         explicit Ref (const Vector<int>& pmap) : m_pmap(pmap) {}
     345             : 
     346             :         explicit Ref (Vector<int>&& pmap) noexcept : m_pmap(std::move(pmap)) {}
     347             : 
     348             :         //! dtor, copy-ctor, copy-op=, move-ctor, and move-op= are compiler generated.
     349             : 
     350             :         void clear () { m_pmap.clear();  m_index_array.clear();   m_ownership.clear(); }
     351             : 
     352             :         Vector<int> m_pmap; //!< index array for all boxes
     353             :         Vector<int> m_index_array;  //!< index array for local boxes owned by the team
     354             :         std::vector<bool> m_ownership; //!< true ownership
     355             :     };
     356             :     //
     357             :     //! The data -- a reference-counted pointer to a Ref.
     358             :     std::shared_ptr<Ref> m_ref;
     359             : 
     360             : public:
     361             :     struct RefID {
     362      403505 :         constexpr RefID () noexcept {} // =default does not work due to a clang bug // NOLINT
     363           0 :         explicit RefID (Ref* data_) noexcept : data(data_) {}
     364             :         bool operator<  (const RefID& rhs) const noexcept { return std::less<>{}(data,rhs.data); }
     365             :         bool operator== (const RefID& rhs) const noexcept { return data == rhs.data; }
     366           0 :         bool operator!= (const RefID& rhs) const noexcept { return data != rhs.data; }
     367             :         [[nodiscard]] const Ref *dataPtr() const noexcept { return data; }
     368             :         void PrintPtr(std::ostream &os) const { os << data << '\n'; }
     369             :         friend std::ostream& operator<< (std::ostream& os, const RefID& id);
     370             :    private:
     371             :         Ref* data = nullptr;
     372             :     };
     373             : 
     374             :     //! This gives a unique ID of the reference, which is different from dmID above.
     375           0 :     [[nodiscard]] RefID getRefID () const noexcept { return RefID { m_ref.get() }; }
     376             : };
     377             : 
     378             : //! Our output operator.
     379             : std::ostream& operator<< (std::ostream& os, const DistributionMapping& pmap);
     380             : 
     381             : std::ostream& operator<< (std::ostream& os, const DistributionMapping::RefID& id);
     382             : 
     383             : /**
     384             :  *  \brief Function that creates a DistributionMapping "similar" to that of a MultiFab.
     385             :  *
     386             :  *  "Similar" means that, if a box in "ba" intersects with any of the
     387             :  *  boxes in the BoxArray associated with "mf", taking "ngrow" ghost cells into account,
     388             :  *  then that box will be assigned to the proc owning the one it has the maximum amount
     389             :  *  of overlap with.
     390             :  *
     391             :  *  @param[in] ba The BoxArray we want to generate a DistributionMapping for.
     392             :  *  @param[in] mf The MultiFab we want said DistributionMapping to be similar to.
     393             :  *  @param[in] ng The number of grow cells to use when computing intersection / overlap
     394             :  *  @return The computed DistributionMapping.
     395             :  */
     396             : DistributionMapping MakeSimilarDM (const BoxArray& ba, const MultiFab& mf, const IntVect& ng);
     397             : 
     398             : /**
     399             :  *  \brief Function that creates a DistributionMapping "similar" to that of a MultiFab.
     400             :  *
     401             :  *  "Similar" means that, if a box in "ba" intersects with any of the
     402             :  *  boxes in the BoxArray associated with "mf", taking "ngrow" ghost cells into account,
     403             :  *  then that box will be assigned to the proc owning the one it has the maximum amount
     404             :  *  of overlap with.
     405             :  *
     406             :  *  @param[in] ba The BoxArray we want to generate a DistributionMapping for.
     407             :  *  @param[in] src_ba The BoxArray associated with the src DistributionMapping.
     408             :  *  @param[in] src_dm The input DistributionMapping we want the output to be similar to.
     409             :  *  @param[in] ng The number of grow cells to use when computing intersection / overlap
     410             :  *  @return The computed DistributionMapping.
     411             :  */
     412             : DistributionMapping MakeSimilarDM (const BoxArray& ba, const BoxArray& src_ba,
     413             :                                    const DistributionMapping& src_dm, const IntVect& ng);
     414             : 
     415             : template <typename T>
     416             : void DistributionMapping::ComputeDistributionMappingEfficiency (
     417             :     const DistributionMapping& dm, const std::vector<T>& cost, Real* efficiency)
     418             : {
     419             :     const int nprocs = ParallelDescriptor::NProcs();
     420             :     Vector<T> wgts(nprocs, T(0));
     421             : 
     422             :     const auto nboxes = int(dm.size());
     423             :     for (int ibox = 0; ibox < nboxes; ++ibox) {
     424             :         wgts[dm[ibox]] += cost[ibox];
     425             :     }
     426             : 
     427             :     T max_weight = 0;
     428             :     T sum_weight = 0;
     429             :     for (auto const& w : wgts) {
     430             :         max_weight = std::max(w, max_weight);
     431             :         sum_weight += w;
     432             :     }
     433             : 
     434             :     *efficiency = static_cast<Real>(sum_weight) /
     435             :         (static_cast<Real>(nprocs) * static_cast<Real>(max_weight));
     436             : }
     437             : 
     438             : }
     439             : 
     440             : #endif /*BL_DISTRIBUTIONMAPPING_H*/

Generated by: LCOV version 1.14