LCOV - code coverage report
Current view: top level - src - qs_fb_distribution_methods.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 76.8 % 272 209
Test Date: 2025-07-25 12:55:17 Functions: 55.6 % 18 10

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : MODULE qs_fb_distribution_methods
       9              : 
      10              :    USE cell_types,                      ONLY: cell_type
      11              :    USE cp_dbcsr_api,                    ONLY: dbcsr_distribution_get,&
      12              :                                               dbcsr_distribution_type,&
      13              :                                               dbcsr_get_info,&
      14              :                                               dbcsr_p_type,&
      15              :                                               dbcsr_type
      16              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      17              :                                               cp_logger_type
      18              :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      19              :                                               cp_print_key_unit_nr
      20              :    USE input_section_types,             ONLY: section_vals_type
      21              :    USE kinds,                           ONLY: dp
      22              :    USE message_passing,                 ONLY: mp_para_env_type
      23              :    USE particle_types,                  ONLY: particle_type
      24              :    USE qs_environment_types,            ONLY: get_qs_env,&
      25              :                                               qs_environment_type
      26              :    USE qs_fb_atomic_halo_types,         ONLY: &
      27              :         fb_atomic_halo_build_halo_atoms, fb_atomic_halo_cost, fb_atomic_halo_create, &
      28              :         fb_atomic_halo_init, fb_atomic_halo_nullify, fb_atomic_halo_obj, fb_atomic_halo_release, &
      29              :         fb_atomic_halo_set, fb_build_pair_radii
      30              :    USE qs_fb_env_types,                 ONLY: fb_env_get,&
      31              :                                               fb_env_obj,&
      32              :                                               fb_env_set
      33              :    USE qs_kind_types,                   ONLY: qs_kind_type
      34              :    USE util,                            ONLY: sort
      35              : #include "./base/base_uses.f90"
      36              : 
      37              :    IMPLICIT NONE
      38              : 
      39              :    PRIVATE
      40              : 
      41              :    PUBLIC :: fb_distribution_build
      42              : 
      43              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_fb_distribution_methods'
      44              : 
      45              : ! **************************************************************************************************
      46              : !> \brief derived type containing cost data used for process distribution
      47              : !> \param id               : global atomic index
      48              : !> \param cost             : computational cost for the atomic matrix associated
      49              : !>                           to this atom
      50              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      51              : ! **************************************************************************************************
      52              :    TYPE fb_distribution_element
      53              :       INTEGER :: id = -1
      54              :       REAL(KIND=dp) :: cost = -1.0_dp
      55              :    END TYPE fb_distribution_element
      56              : 
      57              : ! **************************************************************************************************
      58              : !> \brief derived type containing the list of atoms currently allocated to a
      59              : !>        processor
      60              : !> \param list             : list of atoms and their associated costs
      61              : !> \param cost             : total cost of the list
      62              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      63              : ! **************************************************************************************************
      64              :    TYPE fb_distribution_list
      65              :       TYPE(fb_distribution_element), DIMENSION(:), POINTER :: list => NULL()
      66              :       INTEGER :: nelements = -1
      67              :       REAL(KIND=dp) :: cost = -1.0_dp
      68              :    END TYPE fb_distribution_list
      69              : 
      70              : ! **************************************************************************************************
      71              : !> \brief In filter matrix algorithm, each atomic matrix contributes to a
      72              : !>        column in the filter matrix, which is stored in DBCSR format.
      73              : !>        When distributing the atoms (and hence the atomic matrics) to the
      74              : !>        processors, we want the processors to have atoms that would be
      75              : !>        correspond to the block columns in the DBCSR format local to them.
      76              : !>        This derived type stores this information. For each atom, it
      77              : !>        corresponds to a DBCSR block column, and the list of processors
      78              : !>        in the 2D processor grid responsible for this column will be the
      79              : !>        preferred processors for this atom.
      80              : !> \param list             : list of preferred processors for an atom
      81              : !>                           note that here the processors are indexed from
      82              : !>                           1, i.e. = MPI_RANK+1
      83              : !> \param nprocs           : number of processors in the list
      84              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
      85              : ! **************************************************************************************************
      86              :    TYPE fb_preferred_procs_list
      87              :       INTEGER, DIMENSION(:), POINTER :: list => NULL()
      88              :       INTEGER :: nprocs = -1
      89              :    END TYPE fb_preferred_procs_list
      90              : 
      91              : ! Parameters related to automatic resizing of the hash_table:
      92              : ! Resize by EXPAND_FACTOR if total no. slots / no. of filled slots < ENLARGE_RATIO
      93              :    INTEGER, PARAMETER, PRIVATE :: ENLARGE_RATIO = 1
      94              :    INTEGER, PARAMETER, PRIVATE :: REDUCE_RATIO = 3
      95              :    INTEGER, PARAMETER, PRIVATE :: EXPAND_FACTOR = 2
      96              :    INTEGER, PARAMETER, PRIVATE :: SHRINK_FACTOR = 2
      97              : 
      98              :    INTERFACE fb_distribution_remove
      99              :       MODULE PROCEDURE fb_distribution_remove_ind, &
     100              :          fb_distribution_remove_el
     101              :    END INTERFACE fb_distribution_remove
     102              : 
     103              :    INTERFACE fb_distribution_move
     104              :       MODULE PROCEDURE fb_distribution_move_ind, &
     105              :          fb_distribution_move_el
     106              :    END INTERFACE fb_distribution_move
     107              : 
     108              : CONTAINS
     109              : 
     110              : ! **************************************************************************************************
     111              : !> \brief Build local atoms associated to filter matrix algorithm for each
     112              : !>        MPI process, trying to balance the load for calculating the
     113              : !>        filter matrix
     114              : !> \param fb_env : the filter matrix environment
     115              : !> \param qs_env : quickstep environment
     116              : !> \param scf_section : SCF input section
     117              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     118              : ! **************************************************************************************************
     119           10 :    SUBROUTINE fb_distribution_build(fb_env, qs_env, scf_section)
     120              :       TYPE(fb_env_obj), INTENT(INOUT)                    :: fb_env
     121              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     122              :       TYPE(section_vals_type), POINTER                   :: scf_section
     123              : 
     124              :       CHARACTER(len=*), PARAMETER :: routineN = 'fb_distribution_build'
     125              : 
     126              :       INTEGER :: handle, i_common_set, iatom, ii, ipe, lb, lowest_cost_ind, my_pe, n_common_sets, &
     127              :          natoms, nhalo_atoms, nkinds, nprocs, owner_id_in_halo, pref_pe, ub
     128           10 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: common_set_ids, local_atoms_all, &
     129           10 :                                                             local_atoms_sizes, local_atoms_starts, &
     130           10 :                                                             pe, pos_in_preferred_list
     131           10 :       INTEGER, DIMENSION(:), POINTER                     :: halo_atoms, local_atoms
     132              :       LOGICAL                                            :: acceptable_move, move_happened
     133              :       REAL(KIND=dp)                                      :: average_cost
     134           10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cost_per_atom, cost_per_proc
     135              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: pair_radii
     136           10 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: rcut
     137              :       TYPE(cell_type), POINTER                           :: cell
     138           10 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: mat_ks
     139              :       TYPE(fb_atomic_halo_obj)                           :: atomic_halo
     140              :       TYPE(fb_distribution_element)                      :: element
     141              :       TYPE(fb_distribution_list), ALLOCATABLE, &
     142           10 :          DIMENSION(:)                                    :: dist
     143              :       TYPE(fb_preferred_procs_list), ALLOCATABLE, &
     144           10 :          DIMENSION(:)                                    :: preferred_procs_set
     145              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     146           10 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     147           10 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     148              : 
     149           10 :       CALL timeset(routineN, handle)
     150              : 
     151           10 :       NULLIFY (mat_ks, rcut, cell, para_env, particle_set, qs_kind_set, &
     152           10 :                halo_atoms, local_atoms)
     153           10 :       CALL fb_atomic_halo_nullify(atomic_halo)
     154              : 
     155              :       ! obtain relevant data from fb_env, qs_env
     156              :       CALL fb_env_get(fb_env=fb_env, &
     157           10 :                       rcut=rcut)
     158              :       CALL get_qs_env(qs_env=qs_env, &
     159              :                       natom=natoms, &
     160              :                       particle_set=particle_set, &
     161              :                       qs_kind_set=qs_kind_set, &
     162              :                       nkind=nkinds, &
     163              :                       cell=cell, &
     164              :                       para_env=para_env, &
     165           10 :                       matrix_ks=mat_ks)
     166           10 :       nprocs = para_env%num_pe
     167           10 :       my_pe = para_env%mepos + 1 ! counting from 1
     168              : 
     169              :       ! for each global atom, build atomic halo and get the associated cost
     170           40 :       ALLOCATE (pair_radii(nkinds, nkinds))
     171           10 :       CALL fb_build_pair_radii(rcut, nkinds, pair_radii)
     172           10 :       CALL fb_atomic_halo_create(atomic_halo)
     173           30 :       ALLOCATE (cost_per_atom(natoms))
     174           90 :       DO iatom = 1, natoms
     175           80 :          CALL fb_atomic_halo_init(atomic_halo)
     176              :          CALL fb_atomic_halo_build_halo_atoms(iatom, &
     177              :                                               particle_set, &
     178              :                                               cell, &
     179              :                                               pair_radii, &
     180              :                                               halo_atoms, &
     181              :                                               nhalo_atoms, &
     182           80 :                                               owner_id_in_halo)
     183              :          CALL fb_atomic_halo_set(atomic_halo=atomic_halo, &
     184              :                                  owner_atom=iatom, &
     185              :                                  natoms=nhalo_atoms, &
     186           80 :                                  halo_atoms=halo_atoms)
     187           80 :          NULLIFY (halo_atoms)
     188          170 :          cost_per_atom(iatom) = fb_atomic_halo_cost(atomic_halo, particle_set, qs_kind_set)
     189              :       END DO
     190           10 :       DEALLOCATE (pair_radii)
     191           10 :       CALL fb_atomic_halo_release(atomic_halo)
     192              : 
     193              :       ! build the preferred_procs_set according to DBCSR mat H
     194          110 :       ALLOCATE (preferred_procs_set(natoms))
     195           30 :       ALLOCATE (common_set_ids(natoms))
     196              :       CALL fb_build_preferred_procs(mat_ks(1)%matrix, &
     197              :                                     natoms, &
     198              :                                     preferred_procs_set, &
     199              :                                     common_set_ids, &
     200           10 :                                     n_common_sets)
     201              : 
     202              :       ! for each atomic halo, construct distribution_element, and assign
     203              :       ! the element to a processors using preferred_procs_set in a
     204              :       ! round-robin manner
     205           50 :       ALLOCATE (dist(nprocs))
     206           30 :       DO ipe = 1, nprocs
     207           30 :          CALL fb_distribution_init(dist=dist(ipe))
     208              :       END DO
     209           30 :       ALLOCATE (pos_in_preferred_list(n_common_sets))
     210           20 :       pos_in_preferred_list(:) = 0
     211           90 :       DO iatom = 1, natoms
     212           80 :          element%id = iatom
     213           80 :          element%cost = cost_per_atom(iatom)
     214           80 :          i_common_set = common_set_ids(iatom)
     215              :          pos_in_preferred_list(i_common_set) = &
     216              :             MOD(pos_in_preferred_list(i_common_set), &
     217           80 :                 preferred_procs_set(iatom)%nprocs) + 1
     218           80 :          ipe = preferred_procs_set(iatom)%list(pos_in_preferred_list(i_common_set))
     219           90 :          CALL fb_distribution_add(dist(ipe), element)
     220              :       END DO
     221              : 
     222           10 :       DEALLOCATE (pos_in_preferred_list)
     223           10 :       DEALLOCATE (common_set_ids)
     224           10 :       DEALLOCATE (cost_per_atom)
     225              : 
     226              :       ! sort processors according to the overall cost of their assigned
     227              :       ! corresponding distribution
     228           30 :       ALLOCATE (cost_per_proc(nprocs))
     229           30 :       DO ipe = 1, nprocs
     230           30 :          cost_per_proc(ipe) = dist(ipe)%cost
     231              :       END DO
     232           30 :       ALLOCATE (pe(nprocs))
     233           10 :       CALL sort(cost_per_proc, nprocs, pe)
     234              :       ! now that cost_per_proc is sorted, ipe's no longer give mpi
     235              :       ! ranks, the correct one to use should be pe(ipe)
     236              : 
     237              :       ! work out the ideal average cost per proc if work load is evenly
     238              :       ! distributed
     239           30 :       average_cost = SUM(cost_per_proc)/REAL(nprocs, dp)
     240              : 
     241           10 :       DEALLOCATE (cost_per_proc)
     242              : 
     243              :       ! loop over the processors, starting with the highest cost, move
     244              :       ! atoms one by one:
     245              :       !   1. FIRST to the next processor in the preferred list that has
     246              :       !      cost below average. IF no such proc is found, THEN
     247              :       !   2. to the next procesor in the overall list that has cost
     248              :       !      below average.
     249              :       ! repeat until the cost on this processor is less than or equal
     250              :       ! to the average cost
     251           10 :       lowest_cost_ind = 1
     252           30 :       DO ipe = nprocs, 1, -1
     253           30 :          redistribute: DO WHILE (dist(pe(ipe))%cost .GT. average_cost)
     254            0 :             iatom = dist(pe(ipe))%list(lowest_cost_ind)%id
     255            0 :             move_happened = .FALSE.
     256              :             ! first try to move to a preferred process
     257            0 :             preferred: DO ii = 1, preferred_procs_set(iatom)%nprocs
     258            0 :                pref_pe = preferred_procs_set(iatom)%list(ii)
     259              :                acceptable_move = &
     260              :                   fb_distribution_acceptable_move(dist(pe(ipe)), &
     261              :                                                   dist(pe(ipe))%list(lowest_cost_ind), &
     262              :                                                   dist(pref_pe), &
     263            0 :                                                   average_cost)
     264            0 :                IF ((pref_pe .NE. pe(ipe)) .AND. acceptable_move) THEN
     265              :                   CALL fb_distribution_move(dist(pe(ipe)), &
     266              :                                             lowest_cost_ind, &
     267            0 :                                             dist(pref_pe))
     268              :                   move_happened = .TRUE.
     269              :                   EXIT preferred
     270              :                END IF
     271              :             END DO preferred
     272              :             ! if no preferred process is available, move to a proc in
     273              :             ! the sorted list that has cost less than average.  remember
     274              :             ! that some of the proc may have already taken redistributed
     275              :             ! atoms, and thus may become unavailable (full)
     276              :             IF (.NOT. move_happened) THEN
     277              :                ! searching from the proc with the least initial cost
     278            0 :                next_in_line: DO ii = 1, nprocs
     279              :                   acceptable_move = &
     280              :                      fb_distribution_acceptable_move(dist(pe(ipe)), &
     281              :                                                      dist(pe(ipe))%list(lowest_cost_ind), &
     282              :                                                      dist(pe(ii)), &
     283            0 :                                                      average_cost)
     284            0 :                   IF ((pe(ii) .NE. pe(ipe)) .AND. acceptable_move) THEN
     285              :                      CALL fb_distribution_move(dist(pe(ipe)), &
     286              :                                                lowest_cost_ind, &
     287            0 :                                                dist(pe(ii)))
     288            0 :                      move_happened = .TRUE.
     289            0 :                      EXIT next_in_line
     290              :                   END IF
     291              :                END DO next_in_line
     292              :             END IF
     293              :             ! if the atom cannot be moved, then this means it is too
     294              :             ! costly for all other processes to accept. When this
     295              :             ! happens we must stop the redistribution process for this
     296              :             ! processor---as all other of its atoms will be even more
     297              :             ! costly
     298           20 :             IF (.NOT. move_happened) THEN
     299              :                EXIT redistribute
     300              :             END IF
     301              :          END DO redistribute ! while
     302              :       END DO ! ipe
     303              : 
     304           10 :       DEALLOCATE (pe)
     305           90 :       DO ii = 1, SIZE(preferred_procs_set)
     306           90 :          CALL fb_preferred_procs_list_release(preferred_procs_set(ii))
     307              :       END DO
     308           10 :       DEALLOCATE (preferred_procs_set)
     309              : 
     310              :       ! generate local atoms from dist
     311           30 :       ALLOCATE (local_atoms_all(natoms))
     312           20 :       ALLOCATE (local_atoms_starts(nprocs))
     313           20 :       ALLOCATE (local_atoms_sizes(nprocs))
     314              :       CALL fb_distribution_to_local_atoms(dist, &
     315              :                                           local_atoms_all, &
     316              :                                           local_atoms_starts, &
     317           10 :                                           local_atoms_sizes)
     318           30 :       ALLOCATE (local_atoms(local_atoms_sizes(my_pe)))
     319           10 :       lb = local_atoms_starts(my_pe)
     320           10 :       ub = local_atoms_starts(my_pe) + local_atoms_sizes(my_pe) - 1
     321           50 :       local_atoms(1:local_atoms_sizes(my_pe)) = local_atoms_all(lb:ub)
     322              :       CALL fb_env_set(fb_env=fb_env, &
     323              :                       local_atoms=local_atoms, &
     324           10 :                       nlocal_atoms=local_atoms_sizes(my_pe))
     325              : 
     326              :       ! write out info
     327           10 :       CALL fb_distribution_write_info(dist, scf_section)
     328              : 
     329           10 :       DEALLOCATE (local_atoms_all)
     330           10 :       DEALLOCATE (local_atoms_starts)
     331           10 :       DEALLOCATE (local_atoms_sizes)
     332           30 :       DO ipe = 1, SIZE(dist)
     333           30 :          CALL fb_distribution_release(dist(ipe))
     334              :       END DO
     335           10 :       DEALLOCATE (dist)
     336              : 
     337           10 :       CALL timestop(handle)
     338              : 
     339           20 :    END SUBROUTINE fb_distribution_build
     340              : 
     341              : ! **************************************************************************************************
     342              : !> \brief Checks if moving an element from one distribution to another is
     343              : !>        allowed in mind of load balancing.
     344              : !> \param dist_from : the source distribution
     345              : !> \param element   : the element in source distribution considered for the
     346              : !>                    move
     347              : !> \param dist_to   : the destination distribution
     348              : !> \param threshold ...
     349              : !> \return : TRUE or FALSE
     350              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     351              : ! **************************************************************************************************
     352            0 :    PURE FUNCTION fb_distribution_acceptable_move(dist_from, &
     353              :                                                  element, &
     354              :                                                  dist_to, &
     355              :                                                  threshold) &
     356              :       RESULT(acceptable)
     357              :       TYPE(fb_distribution_list), INTENT(IN)             :: dist_from
     358              :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     359              :       TYPE(fb_distribution_list), INTENT(IN)             :: dist_to
     360              :       REAL(KIND=dp), INTENT(IN)                          :: threshold
     361              :       LOGICAL                                            :: acceptable
     362              : 
     363              :       acceptable = (dist_to%cost + element%cost .LT. dist_from%cost) .AND. &
     364            0 :                    (dist_to%cost .LT. threshold)
     365            0 :    END FUNCTION fb_distribution_acceptable_move
     366              : 
     367              : ! **************************************************************************************************
     368              : !> \brief Write out information on the load distribution on processors
     369              : !> \param dist_set    : set of distributions for the processors
     370              : !> \param scf_section : SCF input section
     371              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     372              : ! **************************************************************************************************
     373           10 :    SUBROUTINE fb_distribution_write_info(dist_set, scf_section)
     374              :       TYPE(fb_distribution_list), DIMENSION(:), &
     375              :          INTENT(IN)                                      :: dist_set
     376              :       TYPE(section_vals_type), POINTER                   :: scf_section
     377              : 
     378              :       INTEGER                                            :: ii, max_natoms, min_natoms, natoms, &
     379              :                                                             nprocs, unit_nr
     380              :       REAL(KIND=dp)                                      :: ave_cost, ave_natoms, max_cost, &
     381              :                                                             min_cost, total_cost
     382              :       TYPE(cp_logger_type), POINTER                      :: logger
     383              : 
     384           10 :       nprocs = SIZE(dist_set)
     385           10 :       natoms = 0
     386           10 :       total_cost = 0.0_dp
     387           30 :       DO ii = 1, nprocs
     388           20 :          natoms = natoms + dist_set(ii)%nelements
     389           30 :          total_cost = total_cost + dist_set(ii)%cost
     390              :       END DO
     391           10 :       ave_natoms = REAL(natoms, dp)/REAL(nprocs, dp)
     392           10 :       ave_cost = total_cost/REAL(nprocs, dp)
     393           10 :       max_natoms = 0
     394           10 :       max_cost = 0._dp
     395           30 :       DO ii = 1, nprocs
     396           20 :          max_natoms = MAX(max_natoms, dist_set(ii)%nelements)
     397           30 :          max_cost = MAX(max_cost, dist_set(ii)%cost)
     398              :       END DO
     399           10 :       min_natoms = natoms
     400           10 :       min_cost = total_cost
     401           30 :       DO ii = 1, nprocs
     402           20 :          min_natoms = MIN(min_natoms, dist_set(ii)%nelements)
     403           30 :          min_cost = MIN(min_cost, dist_set(ii)%cost)
     404              :       END DO
     405              : 
     406           10 :       logger => cp_get_default_logger()
     407              :       unit_nr = cp_print_key_unit_nr(logger, scf_section, &
     408              :                                      "PRINT%FILTER_MATRIX", &
     409           10 :                                      extension="")
     410              : 
     411           10 :       IF (unit_nr > 0) THEN
     412              :          WRITE (UNIT=unit_nr, FMT="(/,A,I6,A)") &
     413            5 :             " FILTER_MAT_DIAG| Load distribution across ", nprocs, " processors:"
     414              :          WRITE (UNIT=unit_nr, &
     415              :                 FMT="(A,T40,A,T55,A,T70,A,T85,A)") &
     416            5 :             " FILTER_MAT_DIAG| ", "Total", "Average", "Max", "Min"
     417              :          WRITE (UNIT=unit_nr, &
     418              :                 FMT="(A,T40,I12,T55,F12.1,T70,I12,T85,I10)") &
     419            5 :             " FILTER_MAT_DIAG|   Atomic Matrices", &
     420           10 :             natoms, ave_natoms, max_natoms, min_natoms
     421              :          WRITE (UNIT=unit_nr, &
     422              :                 FMT="(A,T40,D12.7,T55,D12.7,T70,D12.7,T85,D12.7)") &
     423            5 :             " FILTER_MAT_DIAG|   Cost*", &
     424           10 :             total_cost, ave_cost, max_cost, min_cost
     425              :          WRITE (UNIT=unit_nr, FMT="(A)") &
     426            5 :             " FILTER_MAT_DIAG| (* cost is calculated as sum of cube of atomic matrix sizes)"
     427              :       END IF
     428              :       CALL cp_print_key_finished_output(unit_nr, logger, scf_section, &
     429           10 :                                         "PRINT%FILTER_MATRIX")
     430           10 :    END SUBROUTINE fb_distribution_write_info
     431              : 
     432              : ! **************************************************************************************************
     433              : !> \brief Build the preferred list of processors for atoms
     434              : !> \param dbcsr_mat   : the reference DBCSR matrix, from which the local block
     435              : !>                      cols and the processor maps are obtained
     436              : !> \param natoms      : total number of atoms globally
     437              : !> \param preferred_procs_set : set of preferred procs list for each atom
     438              : !> \param common_set_ids : atoms (block cols) local to the same processor grid
     439              : !>                         col will have the same preferred list. This list
     440              : !>                         maps each atom to their corresponding group
     441              : !> \param n_common_sets  : number of unique preferred lists (groups)
     442              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     443              : ! **************************************************************************************************
     444           40 :    SUBROUTINE fb_build_preferred_procs(dbcsr_mat, &
     445              :                                        natoms, &
     446           10 :                                        preferred_procs_set, &
     447           10 :                                        common_set_ids, &
     448              :                                        n_common_sets)
     449              :       TYPE(dbcsr_type), POINTER                          :: dbcsr_mat
     450              :       INTEGER, INTENT(IN)                                :: natoms
     451              :       TYPE(fb_preferred_procs_list), DIMENSION(:), &
     452              :          INTENT(INOUT)                                   :: preferred_procs_set
     453              :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: common_set_ids
     454              :       INTEGER, INTENT(OUT)                               :: n_common_sets
     455              : 
     456              :       INTEGER                                            :: icol, nblkcols_tot, nprows, pcol, prow
     457           10 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist
     458           10 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
     459              :       TYPE(dbcsr_distribution_type)                      :: dbcsr_dist
     460              : 
     461           10 :       CALL dbcsr_get_info(dbcsr_mat, nblkcols_total=nblkcols_tot)
     462           10 :       CPASSERT(natoms <= nblkcols_tot)
     463           10 :       CPASSERT(SIZE(preferred_procs_set) >= natoms)
     464           10 :       CPASSERT(SIZE(common_set_ids) >= natoms)
     465              : 
     466           10 :       CALL dbcsr_get_info(dbcsr_mat, distribution=dbcsr_dist, proc_col_dist=col_dist)
     467           10 :       CALL dbcsr_distribution_get(dbcsr_dist, pgrid=pgrid, nprows=nprows, npcols=n_common_sets)
     468              : 
     469           90 :       DO icol = 1, natoms
     470           80 :          IF (ASSOCIATED(preferred_procs_set(icol)%list)) THEN
     471            0 :             DEALLOCATE (preferred_procs_set(icol)%list)
     472              :          END IF
     473          240 :          ALLOCATE (preferred_procs_set(icol)%list(nprows))
     474           80 :          pcol = col_dist(icol)
     475              :          ! dbcsr prow and pcol counts from 0
     476          240 :          DO prow = 0, nprows - 1
     477              :             ! here, we count processes from 1, so +1 from mpirank
     478          240 :             preferred_procs_set(icol)%list(prow + 1) = pgrid(prow, pcol) + 1
     479              :          END DO
     480           90 :          preferred_procs_set(icol)%nprocs = nprows
     481              :       END DO
     482              : 
     483           90 :       common_set_ids(:) = 0
     484           90 :       common_set_ids(1:natoms) = col_dist(1:natoms) + 1
     485              : 
     486           10 :    END SUBROUTINE fb_build_preferred_procs
     487              : 
     488              : ! **************************************************************************************************
     489              : !> \brief Release a preferred_procs_list
     490              : !> \param preferred_procs_list  : the preferred procs list in question
     491              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     492              : ! **************************************************************************************************
     493           80 :    SUBROUTINE fb_preferred_procs_list_release(preferred_procs_list)
     494              :       TYPE(fb_preferred_procs_list), INTENT(INOUT)       :: preferred_procs_list
     495              : 
     496           80 :       IF (ASSOCIATED(preferred_procs_list%list)) THEN
     497           80 :          DEALLOCATE (preferred_procs_list%list)
     498              :       END IF
     499           80 :    END SUBROUTINE fb_preferred_procs_list_release
     500              : 
     501              : ! **************************************************************************************************
     502              : !> \brief Convert distribution data to 1D array containing information of
     503              : !>        which atoms are distributed to which processor
     504              : !> \param dist_set    : set of distributions for the processors
     505              : !> \param local_atoms : continuous array of atoms arranged in order
     506              : !>                      corresponding their allocated processors
     507              : !> \param local_atoms_starts : starting position in local_atoms array for
     508              : !>                             each processor
     509              : !> \param local_atoms_sizes  : number of atoms local to each processor
     510              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     511              : ! **************************************************************************************************
     512           10 :    SUBROUTINE fb_distribution_to_local_atoms(dist_set, &
     513           20 :                                              local_atoms, &
     514           10 :                                              local_atoms_starts, &
     515           10 :                                              local_atoms_sizes)
     516              :       TYPE(fb_distribution_list), DIMENSION(:), &
     517              :          INTENT(IN)                                      :: dist_set
     518              :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: local_atoms, local_atoms_starts, &
     519              :                                                             local_atoms_sizes
     520              : 
     521              :       INTEGER                                            :: iatom, ipe, n_procs, pos
     522              :       LOGICAL                                            :: check_ok
     523              : 
     524           10 :       n_procs = SIZE(dist_set)
     525              : 
     526           10 :       check_ok = SIZE(local_atoms_starts) .GE. n_procs
     527           10 :       CPASSERT(check_ok)
     528           10 :       check_ok = SIZE(local_atoms_sizes) .GE. n_procs
     529           10 :       CPASSERT(check_ok)
     530              : 
     531           90 :       local_atoms(:) = 0
     532           30 :       local_atoms_starts(:) = 0
     533           30 :       local_atoms_sizes(:) = 0
     534              : 
     535              :       pos = 1
     536           30 :       DO ipe = 1, n_procs
     537           20 :          local_atoms_starts(ipe) = pos
     538          110 :          DO iatom = 1, dist_set(ipe)%nelements
     539           80 :             local_atoms(pos) = dist_set(ipe)%list(iatom)%id
     540           80 :             pos = pos + 1
     541          100 :             local_atoms_sizes(ipe) = local_atoms_sizes(ipe) + 1
     542              :          END DO
     543              :       END DO
     544           10 :    END SUBROUTINE fb_distribution_to_local_atoms
     545              : 
     546              : ! **************************************************************************************************
     547              : !> \brief Initialise a distribution
     548              : !> \param dist        : the distribution in question
     549              : !> \param nmax        : [OPTIONAL] size of the list array to be allocated
     550              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     551              : ! **************************************************************************************************
     552           20 :    SUBROUTINE fb_distribution_init(dist, nmax)
     553              :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     554              :       INTEGER, INTENT(IN), OPTIONAL                      :: nmax
     555              : 
     556              :       INTEGER                                            :: ii, my_nmax
     557              : 
     558           20 :       my_nmax = 0
     559           20 :       IF (PRESENT(nmax)) my_nmax = nmax
     560           20 :       IF (ASSOCIATED(dist%list)) THEN
     561            0 :          DEALLOCATE (dist%list)
     562              :       END IF
     563           20 :       NULLIFY (dist%list)
     564           20 :       IF (my_nmax .GT. 0) THEN
     565            0 :          ALLOCATE (dist%list(my_nmax))
     566            0 :          DO ii = 1, SIZE(dist%list)
     567            0 :             dist%list(ii)%id = 0
     568            0 :             dist%list(ii)%cost = 0.0_dp
     569              :          END DO
     570              :       END IF
     571           20 :       dist%nelements = 0
     572           20 :       dist%cost = 0.0_dp
     573           20 :    END SUBROUTINE fb_distribution_init
     574              : 
     575              : ! **************************************************************************************************
     576              : !> \brief Resize the list array in a distribution
     577              : !> \param dist        : The distribution in question
     578              : !> \param nmax        : new size of the list array
     579              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     580              : ! **************************************************************************************************
     581           60 :    SUBROUTINE fb_distribution_resize(dist, nmax)
     582              :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     583              :       INTEGER, INTENT(IN)                                :: nmax
     584              : 
     585              :       INTEGER                                            :: ii, my_nmax
     586              :       TYPE(fb_distribution_element), DIMENSION(:), &
     587           60 :          POINTER                                         :: new_list
     588              : 
     589           60 :       IF (.NOT. ASSOCIATED(dist%list)) THEN
     590           20 :          my_nmax = MAX(nmax, 1)
     591           80 :          ALLOCATE (dist%list(my_nmax))
     592              :       ELSE
     593           40 :          my_nmax = MAX(nmax, dist%nelements)
     594          240 :          ALLOCATE (new_list(my_nmax))
     595          160 :          DO ii = 1, SIZE(new_list)
     596          120 :             new_list(ii)%id = 0
     597          160 :             new_list(ii)%cost = 0.0_dp
     598              :          END DO
     599          100 :          DO ii = 1, dist%nelements
     600          100 :             new_list(ii) = dist%list(ii)
     601              :          END DO
     602           40 :          DEALLOCATE (dist%list)
     603           40 :          dist%list => new_list
     604              :       END IF
     605           60 :    END SUBROUTINE fb_distribution_resize
     606              : 
     607              : ! **************************************************************************************************
     608              : !> \brief Add an atom (element) to a distribution
     609              : !> \param dist        : the distribution in question
     610              : !> \param element     : the element to be added
     611              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     612              : ! **************************************************************************************************
     613           80 :    SUBROUTINE fb_distribution_add(dist, element)
     614              :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     615              :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     616              : 
     617              :       INTEGER                                            :: ii, new_nelements, pos
     618              : 
     619           80 :       new_nelements = dist%nelements + 1
     620              : 
     621              :       ! resize list if necessary
     622           80 :       IF (.NOT. ASSOCIATED(dist%list)) THEN
     623           20 :          CALL fb_distribution_resize(dist, new_nelements)
     624           60 :       ELSE IF (new_nelements*ENLARGE_RATIO .GT. SIZE(dist%list)) THEN
     625           40 :          CALL fb_distribution_resize(dist, SIZE(dist%list)*EXPAND_FACTOR)
     626              :       END IF
     627              :       ! assuming the list of elements is always sorted with respect to cost
     628              :       ! slot the new element into the appropriate spot
     629           80 :       IF (new_nelements == 1) THEN
     630           20 :          dist%list(1) = element
     631              :       ELSE
     632           60 :          pos = fb_distribution_find_slot(dist, element)
     633           60 :          DO ii = dist%nelements, pos, -1
     634           60 :             dist%list(ii + 1) = dist%list(ii)
     635              :          END DO
     636           60 :          dist%list(pos) = element
     637              :       END IF
     638           80 :       dist%nelements = new_nelements
     639           80 :       dist%cost = dist%cost + element%cost
     640           80 :    END SUBROUTINE fb_distribution_add
     641              : 
     642              : ! **************************************************************************************************
     643              : !> \brief Find the correct slot in the list array to add a new element, so that
     644              : !>        the list will always be ordered with respect to cost
     645              : !> \param dist        : the distribution in question
     646              : !> \param element     : element to be added
     647              : !> \return : the correct position to add the new element
     648              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     649              : ! **************************************************************************************************
     650           60 :    PURE FUNCTION fb_distribution_find_slot(dist, element) RESULT(pos)
     651              :       TYPE(fb_distribution_list), INTENT(IN)             :: dist
     652              :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     653              :       INTEGER                                            :: pos
     654              : 
     655              :       INTEGER                                            :: lower, middle, N, upper
     656              : 
     657           60 :       N = dist%nelements
     658           60 :       IF (element%cost .LT. dist%list(1)%cost) THEN
     659           60 :          pos = 1
     660              :          RETURN
     661              :       END IF
     662           60 :       IF (element%cost .GE. dist%list(N)%cost) THEN
     663           60 :          pos = N + 1
     664           60 :          RETURN
     665              :       END IF
     666              :       lower = 1
     667              :       upper = N
     668            0 :       DO WHILE ((upper - lower) .GT. 1)
     669            0 :          middle = (lower + upper)/2
     670            0 :          IF (element%cost .LT. dist%list(middle)%cost) THEN
     671              :             upper = middle
     672              :          ELSE
     673            0 :             lower = middle
     674              :          END IF
     675              :       END DO
     676           60 :       pos = upper
     677              :    END FUNCTION fb_distribution_find_slot
     678              : 
     679              : ! **************************************************************************************************
     680              : !> \brief Remove the pos-th element from a distribution
     681              : !> \param dist        : the distribution in question
     682              : !> \param pos         : index of the element in the list array
     683              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     684              : ! **************************************************************************************************
     685            0 :    SUBROUTINE fb_distribution_remove_ind(dist, pos)
     686              :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     687              :       INTEGER, INTENT(IN)                                :: pos
     688              : 
     689              :       INTEGER                                            :: ii
     690              :       LOGICAL                                            :: check_ok
     691              : 
     692            0 :       check_ok = pos .GT. 0
     693            0 :       CPASSERT(check_ok)
     694            0 :       IF (pos .LE. dist%nelements) THEN
     695            0 :          dist%cost = dist%cost - dist%list(pos)%cost
     696            0 :          DO ii = pos, dist%nelements - 1
     697            0 :             dist%list(ii) = dist%list(ii + 1)
     698              :          END DO
     699            0 :          dist%list(dist%nelements)%id = 0
     700            0 :          dist%list(dist%nelements)%cost = 0.0_dp
     701            0 :          dist%nelements = dist%nelements - 1
     702              :          ! auto resize if required
     703            0 :          IF (dist%nelements*REDUCE_RATIO .LT. SIZE(dist%list)) THEN
     704            0 :             CALL fb_distribution_resize(dist, dist%nelements/SHRINK_FACTOR)
     705              :          END IF
     706              :       END IF
     707            0 :    END SUBROUTINE fb_distribution_remove_ind
     708              : 
     709              : ! **************************************************************************************************
     710              : !> \brief Remove a given element from a distribution
     711              : !> \param dist        : the distribution in question
     712              : !> \param element     : the element in question
     713              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     714              : ! **************************************************************************************************
     715            0 :    SUBROUTINE fb_distribution_remove_el(dist, element)
     716              :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     717              :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     718              : 
     719              :       INTEGER                                            :: ii, pos
     720              : 
     721            0 :       pos = dist%nelements + 1
     722            0 :       DO ii = 1, dist%nelements
     723            0 :          IF (element%id == dist%list(ii)%id) THEN
     724            0 :             pos = ii
     725            0 :             EXIT
     726              :          END IF
     727              :       END DO
     728            0 :       CALL fb_distribution_remove_ind(dist, pos)
     729            0 :    END SUBROUTINE fb_distribution_remove_el
     730              : 
     731              : ! **************************************************************************************************
     732              : !> \brief Move the pos-th element from a distribution to another
     733              : !> \param dist_from   : the source distribution
     734              : !> \param pos         : index of the element in the source distribution
     735              : !> \param dist_to     : the destination distribution
     736              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     737              : ! **************************************************************************************************
     738            0 :    SUBROUTINE fb_distribution_move_ind(dist_from, pos, dist_to)
     739              :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_from
     740              :       INTEGER, INTENT(IN)                                :: pos
     741              :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_to
     742              : 
     743              :       LOGICAL                                            :: check_ok
     744              :       TYPE(fb_distribution_element)                      :: element
     745              : 
     746            0 :       check_ok = ASSOCIATED(dist_from%list)
     747            0 :       CPASSERT(check_ok)
     748            0 :       check_ok = pos .LE. dist_from%nelements
     749            0 :       CPASSERT(check_ok)
     750            0 :       element = dist_from%list(pos)
     751            0 :       CALL fb_distribution_add(dist_to, element)
     752            0 :       CALL fb_distribution_remove(dist_from, pos)
     753            0 :    END SUBROUTINE fb_distribution_move_ind
     754              : 
     755              : ! **************************************************************************************************
     756              : !> \brief Move a given element from a distribution to another
     757              : !> \param dist_from   : the source distribution
     758              : !> \param element     : the element in question
     759              : !> \param dist_to     : the destination distribution
     760              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     761              : ! **************************************************************************************************
     762            0 :    SUBROUTINE fb_distribution_move_el(dist_from, element, dist_to)
     763              :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_from
     764              :       TYPE(fb_distribution_element), INTENT(IN)          :: element
     765              :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist_to
     766              : 
     767              :       LOGICAL                                            :: check_ok
     768              : 
     769            0 :       check_ok = ASSOCIATED(dist_from%list)
     770            0 :       CPASSERT(check_ok)
     771            0 :       CALL fb_distribution_add(dist_to, element)
     772            0 :       CALL fb_distribution_remove(dist_from, element)
     773            0 :    END SUBROUTINE fb_distribution_move_el
     774              : 
     775              : ! **************************************************************************************************
     776              : !> \brief Release a distribution
     777              : !> \param dist  : the distribution in question
     778              : !> \author Lianheng Tong (LT) lianheng.tong@kcl.ac.uk
     779              : ! **************************************************************************************************
     780           20 :    SUBROUTINE fb_distribution_release(dist)
     781              :       TYPE(fb_distribution_list), INTENT(INOUT)          :: dist
     782              : 
     783           20 :       IF (ASSOCIATED(dist%list)) THEN
     784           20 :          DEALLOCATE (dist%list)
     785              :       END IF
     786           20 :    END SUBROUTINE fb_distribution_release
     787              : 
     788            0 : END MODULE qs_fb_distribution_methods
        

Generated by: LCOV version 2.0-1