LCOV - code coverage report
Current view: top level - src/fm - cp_fm_diag_utils.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 71.9 % 135 97
Test Date: 2025-07-25 12:55:17 Functions: 66.7 % 9 6

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Auxiliary tools to redistribute cp_fm_type matrices before and after diagonalization.
      10              : !>        Heuristics are used to determine the optimal number of CPUs for diagonalization and the
      11              : !>        input matrices are redistributed if necessary
      12              : !> \par History
      13              : !>      - [01.2018] moved redistribution related code from cp_fm_syevd here
      14              : !> \author Nico Holmberg [01.2018]
      15              : ! **************************************************************************************************
      16              : MODULE cp_fm_diag_utils
      17              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_create,&
      18              :                                               cp_blacs_env_release,&
      19              :                                               cp_blacs_env_type
      20              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      21              :                                               cp_fm_struct_release,&
      22              :                                               cp_fm_struct_type
      23              :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      24              :                                               cp_fm_get_info,&
      25              :                                               cp_fm_release,&
      26              :                                               cp_fm_type
      27              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      28              :                                               cp_logger_get_default_io_unit,&
      29              :                                               cp_logger_type
      30              :    USE kinds,                           ONLY: dp
      31              :    USE mathlib,                         ONLY: gcd
      32              :    USE message_passing,                 ONLY: mp_para_env_type
      33              : #include "../base/base_uses.f90"
      34              : 
      35              :    IMPLICIT NONE
      36              : 
      37              :    PRIVATE
      38              : 
      39              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_fm_diag_utils'
      40              : 
      41              :    ! Information on redistribution
      42              :    TYPE, PUBLIC :: cp_fm_redistribute_info
      43              :       INTEGER :: matrix_order = -1
      44              :       INTEGER :: num_pe_old = -1 ! number of processes before a potential redistribute
      45              :       INTEGER :: num_pe_new = -1 ! number of processes after a potential redistribute
      46              :       INTEGER :: num_pe_opt = -1 ! optimal number of processes for the given matrix
      47              :       INTEGER :: num_pe_max_nz_col = -1 ! the maximal number of processes s.t. no column has zero width, may be < 0 if ignored
      48              :       LOGICAL :: redistribute = .FALSE. ! whether or not the matrix was actually redistributed
      49              :    CONTAINS
      50              :       PROCEDURE, PASS(self) :: write => cp_fm_redistribute_info_write
      51              :    END TYPE
      52              : 
      53              :    ! Container for redistribution settings and temporary work structs
      54              :    TYPE cp_fm_redistribute_type
      55              :       ! Settings
      56              :       INTEGER                                  :: a = -1, x = -1
      57              :       LOGICAL                                  :: should_print = .FALSE.
      58              :       LOGICAL                                  :: elpa_force_redistribute = .FALSE.
      59              :       ! Temporaries
      60              :       INTEGER, DIMENSION(:), POINTER           :: group_distribution => NULL(), &
      61              :                                                   group_partition => NULL()
      62              :       TYPE(cp_blacs_env_type), POINTER         :: blacs_env_new => NULL()
      63              :       TYPE(mp_para_env_type), POINTER          :: para_env_new => NULL()
      64              :    END TYPE cp_fm_redistribute_type
      65              : 
      66              :    ! Permanent instance of the redistribute type
      67              :    TYPE(cp_fm_redistribute_type), PRIVATE, &
      68              :       SAVE                                     :: work_redistribute
      69              : 
      70              :    ! Public subroutines
      71              : 
      72              :    PUBLIC :: cp_fm_redistribute_start, &
      73              :              cp_fm_redistribute_end, &
      74              :              cp_fm_redistribute_init
      75              : 
      76              : CONTAINS
      77              : 
      78              : ! **************************************************************************************************
      79              : !> \brief Write the redistribute info nicely formatted to the given I/O unit
      80              : !> \param self reference to the cp_fm_redistribute_info instance
      81              : !> \param io_unit I/O unit to use for writing
      82              : ! **************************************************************************************************
      83            0 :    SUBROUTINE cp_fm_redistribute_info_write(self, io_unit)
      84              :       CLASS(cp_fm_redistribute_info), INTENT(IN) :: self
      85              :       INTEGER, INTENT(IN) :: io_unit
      86              : 
      87            0 :       WRITE (UNIT=io_unit, FMT="(A)") ""
      88              :       WRITE (UNIT=io_unit, FMT="(T2,A,T71,I10)") &
      89            0 :          "CP_FM_DIAG| Number of processes over which the matrix is distributed ", self%num_pe_old, &
      90            0 :          "CP_FM_DIAG| Matrix order ", self%matrix_order
      91              :       WRITE (UNIT=io_unit, FMT="(T2,A,T71,I10)") &
      92            0 :          "CP_FM_DIAG| Optimal number of CPUs ", self%num_pe_opt
      93            0 :       IF (self%num_pe_max_nz_col < 0) THEN
      94              :          WRITE (UNIT=io_unit, FMT="(T2,A,T71,A10)") &
      95            0 :             "CP_FM_DIAG| Maximum number of CPUs (with non-zero columns) ", "<N/A>"
      96              :       ELSE
      97              :          WRITE (UNIT=io_unit, FMT="(T2,A,T71,I10)") &
      98            0 :             "CP_FM_DIAG| Maximum number of CPUs (with non-zero columns): ", self%num_pe_max_nz_col
      99              :       END IF
     100            0 :       IF (self%redistribute) THEN
     101              :          WRITE (UNIT=io_unit, FMT="(T2,A,T71,I10)") &
     102            0 :             "CP_FM_DIAG| Number of processes for the redistribution ", self%num_pe_new
     103              :       ELSE
     104              :          WRITE (UNIT=io_unit, FMT="(T2,A)") &
     105            0 :             "CP_FM_DIAG| The matrix will NOT be redistributed"
     106              :       END IF
     107            0 :       WRITE (UNIT=io_unit, FMT="(A)") ""
     108              : 
     109            0 :    END SUBROUTINE cp_fm_redistribute_info_write
     110              : 
     111              : ! **************************************************************************************************
     112              : !> \brief  Releases the temporary storage needed when redistributing arrays
     113              : !> \param  has_redistributed flag that determines if the processors holds a part of the
     114              : !>                           redistributed array
     115              : !> \author Nico Holmberg [01.2018]
     116              : ! **************************************************************************************************
     117       160982 :    SUBROUTINE cp_fm_redistribute_work_finalize(has_redistributed)
     118              :       LOGICAL, INTENT(IN)                                :: has_redistributed
     119              : 
     120       160982 :       IF (ASSOCIATED(work_redistribute%group_distribution)) THEN
     121       160982 :          IF (has_redistributed) THEN
     122        82200 :             CALL cp_blacs_env_release(work_redistribute%blacs_env_new)
     123              :          END IF
     124       160982 :          CALL work_redistribute%para_env_new%free()
     125       160982 :          DEALLOCATE (work_redistribute%para_env_new)
     126       160982 :          DEALLOCATE (work_redistribute%group_distribution)
     127       160982 :          DEALLOCATE (work_redistribute%group_partition)
     128              :       END IF
     129              :       ! Return work to its initial state
     130       160982 :       work_redistribute = cp_fm_redistribute_type()
     131              : 
     132       160982 :    END SUBROUTINE cp_fm_redistribute_work_finalize
     133              : 
     134              : ! **************************************************************************************************
     135              : !> \brief  Initializes the parameters that determine how to calculate the optimal number of CPUs
     136              : !>         for diagonalizing a matrix. The parameters are read from the GLOBAL input section.
     137              : !> \param a                integer parameter used to define the rule for determining the optimal
     138              : !>                         number of CPUs for diagonalization
     139              : !> \param x                integer parameter used to define the rule for determining the optimal
     140              : !>                         number of CPUs for diagonalization
     141              : !> \param should_print     flag that determines if information about the redistribution process
     142              : !>                         should be printed
     143              : !> \param elpa_force_redistribute  flag that if redistribution should always be performed when
     144              : !>                                 the ELPA diagonalization library is in use
     145              : !> \author Nico Holmberg [01.2018]
     146              : ! **************************************************************************************************
     147         9835 :    SUBROUTINE cp_fm_redistribute_init(a, x, should_print, elpa_force_redistribute)
     148              :       INTEGER, INTENT(IN)                                :: a, x
     149              :       LOGICAL, INTENT(IN)                                :: should_print, elpa_force_redistribute
     150              : 
     151              :       work_redistribute%a = a
     152              :       work_redistribute%x = x
     153              :       work_redistribute%should_print = should_print
     154              :       work_redistribute%elpa_force_redistribute = elpa_force_redistribute
     155              :       ! Init work
     156         9835 :       work_redistribute = cp_fm_redistribute_type()
     157              : 
     158         9835 :    END SUBROUTINE cp_fm_redistribute_init
     159              : 
     160              : ! **************************************************************************************************
     161              : !> \brief  Calculates the optimal number of CPUs for diagonalizing a matrix.
     162              : !> \param  size  the size of the diagonalized matrix
     163              : !> \return the optimal number of CPUs
     164              : !> \author Nico Holmberg [01.2018]
     165              : ! **************************************************************************************************
     166       181144 :    PURE FUNCTION cp_fm_diag_get_optimal_ncpu(size) RESULT(ncpu)
     167              :       INTEGER, INTENT(IN)                                :: size
     168              :       INTEGER                                            :: ncpu
     169              : 
     170              :       ncpu = ((size + work_redistribute%a*work_redistribute%x - 1)/ &
     171       181144 :               (work_redistribute%a*work_redistribute%x))*work_redistribute%a
     172              : 
     173       181144 :    END FUNCTION cp_fm_diag_get_optimal_ncpu
     174              : 
     175              : #if defined(__parallel)
     176              : ! **************************************************************************************************
     177              : !> \brief  Determines the largest number of CPUs a matrix can be distributed on without any of the
     178              : !>         processors getting a zero-width column (currently only needed for ELPA).
     179              : !> \param  matrix the matrix that will be diagonalized
     180              : !> \return the maximum number of CPUs for ELPA
     181              : !> \author Nico Holmberg [01.2018]
     182              : ! **************************************************************************************************
     183        64091 :    FUNCTION cp_fm_max_ncpu_non_zero_column(matrix) RESULT(ncpu)
     184              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
     185              :       INTEGER                                            :: ncpu
     186              : 
     187              :       INTEGER                                            :: gcd_max, ipe, jpe, ncol_block, &
     188              :                                                             ncol_global, npcol, nrow_block, &
     189              :                                                             nrow_global, num_pe_old, nzero
     190        64091 :       INTEGER, DIMENSION(:), POINTER                     :: ncol_locals
     191              :       INTEGER, EXTERNAL                                  :: numroc
     192              : 
     193        64091 :       NULLIFY (ncol_locals)
     194              :       ! First check if there are any zero width columns in current layout
     195              :       CALL cp_fm_get_info(matrix, ncol_locals=ncol_locals, &
     196              :                           nrow_global=nrow_global, ncol_global=ncol_global, &
     197        64091 :                           nrow_block=nrow_block, ncol_block=ncol_block)
     198       128182 :       nzero = COUNT(ncol_locals == 0)
     199        64091 :       num_pe_old = matrix%matrix_struct%para_env%num_pe
     200        64091 :       ncpu = num_pe_old - nzero
     201              : 
     202              :       ! Avoid layouts with odd number of CPUs (blacs grid layout will be square)
     203        64091 :       IF (ncpu > 2) &
     204            0 :          ncpu = ncpu - MODULO(ncpu, 2)
     205              : 
     206              :       ! if there are no zero-width columns and the number of processors was even, leave it at that
     207        64091 :       IF (ncpu == num_pe_old) &
     208              :          RETURN
     209              : 
     210              :       ! Iteratively search for the maximum number of CPUs for ELPA
     211              :       ! On each step, we test whether the blacs grid created with ncpu processes
     212              :       ! contains any columns with zero width
     213            0 :       DO WHILE (ncpu > 1)
     214              :          ! Determine layout of new blacs grid with ncpu CPUs
     215              :          ! (snippet copied from cp_blacs_env.F:cp_blacs_env_create)
     216            0 :          gcd_max = -1
     217            0 :          DO ipe = 1, CEILING(SQRT(REAL(ncpu, dp)))
     218            0 :             jpe = ncpu/ipe
     219            0 :             IF (ipe*jpe .NE. ncpu) &
     220              :                CYCLE
     221            0 :             IF (gcd(ipe, jpe) >= gcd_max) THEN
     222            0 :                npcol = jpe
     223            0 :                gcd_max = gcd(ipe, jpe)
     224              :             END IF
     225              :          END DO
     226              : 
     227              :          ! Count the number of processors without any columns
     228              :          ! (snippet copied from cp_fm_struct.F:cp_fm_struct_create)
     229            0 :          nzero = 0
     230            0 :          DO ipe = 0, npcol - 1
     231            0 :             IF (numroc(ncol_global, ncol_block, ipe, 0, npcol) == 0) &
     232            0 :                nzero = nzero + 1
     233              :          END DO
     234              : 
     235            0 :          IF (nzero == 0) &
     236              :             EXIT
     237              : 
     238            0 :          ncpu = ncpu - nzero
     239              : 
     240            0 :          IF (ncpu > 2) &
     241            0 :             ncpu = ncpu - MODULO(ncpu, 2)
     242              :       END DO
     243              : 
     244        64091 :    END FUNCTION cp_fm_max_ncpu_non_zero_column
     245              : #endif
     246              : 
     247              : ! **************************************************************************************************
     248              : !> \brief   Determines the optimal number of CPUs for matrix diagonalization and redistributes
     249              : !>          the input matrices if necessary
     250              : !> \param matrix           the input cp_fm_type matrix to be diagonalized
     251              : !> \param eigenvectors     the cp_fm_type matrix that will hold the eigenvectors of the input matrix
     252              : !> \param matrix_new       the redistributed input matrix which will subsequently be diagonalized,
     253              : !>                         or a pointer to the original matrix if no redistribution is required
     254              : !> \param eigenvectors_new the redistributed eigenvectors matrix, or a pointer to the original
     255              : !>                         matrix if no redistribution is required
     256              : !> \param caller_is_elpa   flag that determines if ELPA is used for diagonalization
     257              : !> \param redist_info      get info about the redistribution
     258              : !> \par History
     259              : !>      - [01.2018] created by moving redistribution related code from cp_fm_syevd here
     260              : !> \author Nico Holmberg [01.2018]
     261              : ! **************************************************************************************************
     262        64091 :    SUBROUTINE cp_fm_redistribute_start(matrix, eigenvectors, matrix_new, eigenvectors_new, &
     263              :                                        caller_is_elpa, redist_info)
     264              : 
     265              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors
     266              :       TYPE(cp_fm_type), INTENT(OUT)            :: matrix_new, eigenvectors_new
     267              :       LOGICAL, OPTIONAL, INTENT(IN)            :: caller_is_elpa
     268              : 
     269              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_redistribute_start'
     270              : 
     271              :       INTEGER                                  :: handle
     272              :       LOGICAL                                  :: is_elpa
     273              :       TYPE(cp_fm_redistribute_info), OPTIONAL, INTENT(OUT) :: redist_info
     274              : 
     275              : #if defined(__parallel)
     276              :       REAL(KIND=dp)                            :: fake_local_data(1, 1)
     277              :       INTEGER                                  :: fake_descriptor(9), mepos_old, &
     278              :                                                   io_unit, ngroups, ncol_block, blksize, nrow_block
     279              :       TYPE(cp_fm_struct_type), POINTER         :: fm_struct_new
     280              :       TYPE(mp_para_env_type), POINTER          :: para_env
     281              :       TYPE(cp_logger_type), POINTER            :: logger
     282              :       TYPE(cp_fm_redistribute_info)          :: rdinfo
     283              : #endif
     284              : 
     285       181144 :       CALL timeset(routineN, handle)
     286       181144 :       is_elpa = .FALSE.
     287       181144 :       IF (PRESENT(caller_is_elpa)) THEN
     288              : #if defined(__ELPA)
     289        64091 :          is_elpa = caller_is_elpa
     290              : #else
     291              :          CPABORT("CP2K compiled without the ELPA library.")
     292              : #endif
     293              :       END IF
     294              : 
     295              : #if defined(__parallel)
     296              : 
     297       181144 :       logger => cp_get_default_logger()
     298       181144 :       io_unit = cp_logger_get_default_io_unit(logger)
     299              : 
     300              :       ! first figure out the optimal number of cpus
     301              :       ! this is pure heuristics, the defaults are based on rosa timings
     302              :       ! that demonstrate that timings go up sharply if too many tasks are used
     303              :       ! we take a multiple of 4, and approximately n/60
     304       181144 :       para_env => matrix%matrix_struct%para_env
     305       181144 :       mepos_old = para_env%mepos
     306       181144 :       ncol_block = -1 ! normally we also want to adjust the block size according to the optimal # of CPUs
     307       181144 :       nrow_block = -1
     308       181144 :       blksize = -1
     309              : 
     310       181144 :       rdinfo%matrix_order = matrix%matrix_struct%nrow_global
     311       181144 :       rdinfo%num_pe_old = para_env%num_pe
     312       181144 :       rdinfo%num_pe_opt = cp_fm_diag_get_optimal_ncpu(rdinfo%matrix_order)
     313       181144 :       rdinfo%num_pe_new = rdinfo%num_pe_opt
     314              :       rdinfo%num_pe_max_nz_col = -1
     315              :       rdinfo%redistribute = .FALSE.
     316              : 
     317       181144 :       IF (is_elpa) THEN
     318              :          ! with ELPA we don't have to redistribute if not necessary (scales, unlike ScaLAPACK)
     319        64091 :          rdinfo%num_pe_new = rdinfo%num_pe_old
     320              : 
     321              :          ! BUT: Diagonalization with ELPA fails when a processor column has zero width
     322              :          ! Determine the maximum number of CPUs the matrix can be distributed without zero-width columns
     323              :          ! for the current block size.
     324        64091 :          rdinfo%num_pe_max_nz_col = cp_fm_max_ncpu_non_zero_column(matrix)
     325              : 
     326              :          ! if the user wants to redistribute to the ScaLAPACK optimal number of CPUs anyway, let him if it's safe.
     327        64091 :          IF (work_redistribute%elpa_force_redistribute .AND. rdinfo%num_pe_opt < rdinfo%num_pe_max_nz_col) THEN
     328              :             ! Use heuristics to determine the need for redistribution (when num_pe_opt is smaller than the safe maximum)
     329              :             ! in this case we can also take the block size used for ScaLAPACK
     330            0 :             rdinfo%num_pe_new = rdinfo%num_pe_opt
     331        64091 :          ELSE IF (rdinfo%num_pe_old > rdinfo%num_pe_max_nz_col) THEN
     332              :             ! Otherwise, only redistribute if we have to
     333            0 :             rdinfo%num_pe_new = rdinfo%num_pe_max_nz_col
     334              :             ! do NOT let cp_fm_struct_create automatically adjust the block size because the
     335              :             ! calculated number of processors such that no block has 0 columns wouldn't match (see #578):
     336              :             ! if the automatically chosen block size is larger than the present one we would still end
     337              :             ! up with empty processors
     338              :          END IF
     339              : 
     340        64091 :          CALL cp_fm_get_info(matrix, ncol_block=ncol_block, nrow_block=nrow_block)
     341              : 
     342              :          ! On GPUs, ELPA requires the block size to be a power of 2
     343        64091 :          blksize = 1
     344       292208 :          DO WHILE (2*blksize <= MIN(nrow_block, ncol_block))
     345        64091 :             blksize = blksize*2
     346              :          END DO
     347        64091 :          nrow_block = blksize
     348        64091 :          ncol_block = blksize
     349              :       END IF
     350              : 
     351              :       ! finally, only redistribute if we're going to use less CPUs than before or changed the block size
     352              :       rdinfo%redistribute = (rdinfo%num_pe_old > rdinfo%num_pe_new) .OR. (blksize >= 0 .AND. &
     353       181144 :                                    ((blksize /= matrix%matrix_struct%ncol_block) .OR. (blksize /= matrix%matrix_struct%nrow_block)))
     354              : 
     355       181144 :       IF (work_redistribute%should_print .AND. io_unit > 0) THEN
     356            0 :          IF (is_elpa) THEN
     357            0 :             IF (work_redistribute%elpa_force_redistribute) THEN
     358              :                WRITE (UNIT=io_unit, FMT="(T2,A,T78,A3)") &
     359            0 :                   "CP_FM_DIAG| Force redistribute (ELPA):", "YES"
     360              :             ELSE
     361              :                WRITE (UNIT=io_unit, FMT="(T2,A,T79,A2)") &
     362            0 :                   "CP_FM_DIAG| Force redistribute (ELPA):", "NO"
     363              :             END IF
     364              :          END IF
     365            0 :          CALL rdinfo%write(io_unit)
     366              :       END IF
     367       181144 :       CALL para_env%sync()
     368              : 
     369              :       ! if the optimal is smaller than num_pe, we will redistribute the input matrix
     370       181144 :       IF (rdinfo%redistribute) THEN
     371              :          ! split comm, the first num_pe_new tasks will do the work
     372       482946 :          ALLOCATE (work_redistribute%group_distribution(0:rdinfo%num_pe_old - 1))
     373       160982 :          ALLOCATE (work_redistribute%group_partition(0:1))
     374       482946 :          work_redistribute%group_partition = (/rdinfo%num_pe_new, rdinfo%num_pe_old - rdinfo%num_pe_new/)
     375       160982 :          ALLOCATE (work_redistribute%para_env_new)
     376              :          CALL work_redistribute%para_env_new%from_split( &
     377              :             comm=para_env, ngroups=ngroups, group_distribution=work_redistribute%group_distribution, &
     378       160982 :             n_subgroups=2, group_partition=work_redistribute%group_partition)
     379              : 
     380       160982 :          IF (work_redistribute%group_distribution(mepos_old) == 0) THEN
     381              : 
     382              :             ! create blacs, should inherit the preferences for the layout and so on, from the higher level
     383        82200 :             NULLIFY (work_redistribute%blacs_env_new)
     384        82200 :             CALL cp_blacs_env_create(blacs_env=work_redistribute%blacs_env_new, para_env=work_redistribute%para_env_new)
     385              : 
     386              :             ! create new matrix
     387        82200 :             NULLIFY (fm_struct_new)
     388        82200 :             IF (nrow_block == -1 .OR. ncol_block == -1) THEN
     389              :                CALL cp_fm_struct_create(fmstruct=fm_struct_new, &
     390              :                                         para_env=work_redistribute%para_env_new, &
     391              :                                         context=work_redistribute%blacs_env_new, &
     392              :                                         nrow_global=rdinfo%matrix_order, ncol_global=rdinfo%matrix_order, &
     393        59220 :                                         ncol_block=ncol_block, nrow_block=nrow_block)
     394              :             ELSE
     395              :                CALL cp_fm_struct_create(fmstruct=fm_struct_new, &
     396              :                                         para_env=work_redistribute%para_env_new, &
     397              :                                         context=work_redistribute%blacs_env_new, &
     398              :                                         nrow_global=rdinfo%matrix_order, ncol_global=rdinfo%matrix_order, &
     399        22980 :                                         ncol_block=ncol_block, nrow_block=nrow_block, force_block=.TRUE.)
     400              :             END IF
     401        82200 :             CALL cp_fm_create(matrix_new, matrix_struct=fm_struct_new, name="yevd_new_mat")
     402        82200 :             CALL cp_fm_create(eigenvectors_new, matrix_struct=fm_struct_new, name="yevd_new_vec")
     403        82200 :             CALL cp_fm_struct_release(fm_struct_new)
     404              : 
     405              :             ! redistribute old
     406              :             CALL pdgemr2d(rdinfo%matrix_order, rdinfo%matrix_order, matrix%local_data(1, 1), 1, 1, &
     407              :                           matrix%matrix_struct%descriptor, &
     408              :                           matrix_new%local_data(1, 1), 1, 1, matrix_new%matrix_struct%descriptor, &
     409        82200 :                           matrix%matrix_struct%context)
     410              :          ELSE
     411              :             ! these tasks must help redistribute (they own part of the data),
     412              :             ! but need fake 'new' data, and their descriptor must indicate this with -1
     413              :             ! see also scalapack comments on pdgemr2d
     414       787820 :             fake_descriptor = -1
     415              :             CALL pdgemr2d(rdinfo%matrix_order, rdinfo%matrix_order, matrix%local_data(1, 1), 1, 1, &
     416              :                           matrix%matrix_struct%descriptor, &
     417              :                           fake_local_data(1, 1), 1, 1, fake_descriptor, &
     418        78782 :                           matrix%matrix_struct%context)
     419              :          END IF
     420              :       ELSE
     421              :          ! No need to redistribute, just return pointers to the original arrays
     422        20162 :          matrix_new = matrix
     423        20162 :          eigenvectors_new = eigenvectors
     424              :       END IF
     425              : 
     426       181144 :       IF (PRESENT(redist_info)) &
     427        64091 :          redist_info = rdinfo
     428              : #else
     429              : 
     430              :       MARK_USED(matrix)
     431              :       MARK_USED(eigenvectors)
     432              :       MARK_USED(matrix_new)
     433              :       MARK_USED(eigenvectors_new)
     434              :       MARK_USED(redist_info)
     435              :       CPABORT("Routine called in non-parallel case.")
     436              : #endif
     437              : 
     438       181144 :       CALL timestop(handle)
     439              : 
     440       181144 :    END SUBROUTINE cp_fm_redistribute_start
     441              : 
     442              : ! **************************************************************************************************
     443              : !> \brief Redistributes eigenvectors and eigenvalues  back to the original communicator group
     444              : !> \param matrix           the input cp_fm_type matrix to be diagonalized
     445              : !> \param eigenvectors     the cp_fm_type matrix that will hold the eigenvectors of the input matrix
     446              : !> \param eig              global array holding the eigenvalues of the input matrixmatrix
     447              : !> \param matrix_new       the redistributed input matrix which will subsequently be diagonalized,
     448              : !>                         or a pointer to the original matrix if no redistribution is required
     449              : !> \param eigenvectors_new the redistributed eigenvectors matrix, or a pointer to the original
     450              : !>                         matrix if no redistribution is required
     451              : !> \par History
     452              : !>      - [01.2018] created by moving redistribution related code from cp_fm_syevd here
     453              : !> \author Nico Holmberg [01.2018]
     454              : ! **************************************************************************************************
     455       181144 :    SUBROUTINE cp_fm_redistribute_end(matrix, eigenvectors, eig, matrix_new, eigenvectors_new)
     456              : 
     457              :       TYPE(cp_fm_type), INTENT(IN)             :: matrix, eigenvectors
     458              :       REAL(KIND=dp), DIMENSION(:), INTENT(INOUT) :: eig
     459              :       TYPE(cp_fm_type), INTENT(INOUT)          :: matrix_new, eigenvectors_new
     460              : 
     461              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_fm_redistribute_end'
     462              : 
     463              :       INTEGER                                  :: handle
     464              : #if defined(__parallel)
     465              :       REAL(KIND=dp)                            :: fake_local_data(1, 1)
     466              :       INTEGER                                  :: fake_descriptor(9), mepos_old, n
     467              :       TYPE(mp_para_env_type), POINTER          :: para_env
     468              : #endif
     469              : 
     470       181144 :       CALL timeset(routineN, handle)
     471              : 
     472              : #if defined(__parallel)
     473              : 
     474              :       ! Check if matrix was redistributed
     475       181144 :       IF (ASSOCIATED(work_redistribute%group_distribution)) THEN
     476       160982 :          n = matrix%matrix_struct%nrow_global
     477       160982 :          para_env => matrix%matrix_struct%para_env
     478       160982 :          mepos_old = para_env%mepos
     479              : 
     480       160982 :          IF (work_redistribute%group_distribution(mepos_old) == 0) THEN
     481              :             ! redistribute results on CPUs that hold the redistributed matrix
     482              :             CALL pdgemr2d(n, n, eigenvectors_new%local_data(1, 1), 1, 1, eigenvectors_new%matrix_struct%descriptor, &
     483              :                           eigenvectors%local_data(1, 1), 1, 1, eigenvectors%matrix_struct%descriptor, &
     484        82200 :                           eigenvectors%matrix_struct%context)
     485        82200 :             CALL cp_fm_release(matrix_new)
     486        82200 :             CALL cp_fm_release(eigenvectors_new)
     487              :          ELSE
     488              :             ! these tasks must help redistribute (they own part of the data),
     489              :             ! but need fake 'new' data, and their descriptor must indicate this with -1
     490              :             ! see also scalapack comments on pdgemr2d
     491       787820 :             fake_descriptor = -1
     492              :             CALL pdgemr2d(n, n, fake_local_data(1, 1), 1, 1, fake_descriptor, &
     493              :                           eigenvectors%local_data(1, 1), 1, 1, eigenvectors%matrix_struct%descriptor, &
     494        78782 :                           eigenvectors%matrix_struct%context)
     495              :          END IF
     496              :          ! free work
     497       160982 :          CALL cp_fm_redistribute_work_finalize(work_redistribute%group_distribution(mepos_old) == 0)
     498              : 
     499              :          ! finally, also the eigenvalues need to end up on the non-group member tasks
     500      3616704 :          CALL para_env%bcast(eig, 0)
     501              :       END IF
     502              : 
     503              : #else
     504              : 
     505              :       MARK_USED(matrix)
     506              :       MARK_USED(eigenvectors)
     507              :       MARK_USED(eig)
     508              :       MARK_USED(matrix_new)
     509              :       MARK_USED(eigenvectors_new)
     510              :       CPABORT("Routine called in non-parallel case.")
     511              : #endif
     512              : 
     513       181144 :       CALL timestop(handle)
     514              : 
     515       181144 :    END SUBROUTINE cp_fm_redistribute_end
     516              : 
     517            0 : END MODULE cp_fm_diag_utils
        

Generated by: LCOV version 2.0-1