LCOV - code coverage report
Current view: top level - src - mp2_ri_grad_util.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 98.4 % 756 744
Test Date: 2025-07-25 12:55:17 Functions: 77.8 % 9 7

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Routines for calculating RI-MP2 gradients
      10              : !> \par History
      11              : !>      10.2013 created [Mauro Del Ben]
      12              : ! **************************************************************************************************
      13              : MODULE mp2_ri_grad_util
      14              : !
      15              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_create,&
      16              :                                               cp_blacs_env_release,&
      17              :                                               cp_blacs_env_type
      18              :    USE cp_dbcsr_api,                    ONLY: dbcsr_p_type,&
      19              :                                               dbcsr_type,&
      20              :                                               dbcsr_type_no_symmetry
      21              :    USE cp_dbcsr_operations,             ONLY: copy_fm_to_dbcsr,&
      22              :                                               cp_dbcsr_m_by_n_from_template
      23              :    USE cp_fm_basic_linalg,              ONLY: cp_fm_geadd
      24              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      25              :                                               cp_fm_struct_get,&
      26              :                                               cp_fm_struct_release,&
      27              :                                               cp_fm_struct_type
      28              :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      29              :                                               cp_fm_get_info,&
      30              :                                               cp_fm_release,&
      31              :                                               cp_fm_set_all,&
      32              :                                               cp_fm_to_fm,&
      33              :                                               cp_fm_type
      34              :    USE group_dist_types,                ONLY: create_group_dist,&
      35              :                                               get_group_dist,&
      36              :                                               group_dist_d1_type,&
      37              :                                               release_group_dist
      38              :    USE kinds,                           ONLY: dp
      39              :    USE libint_2c_3c,                    ONLY: compare_potential_types
      40              :    USE message_passing,                 ONLY: mp_para_env_type,&
      41              :                                               mp_request_null,&
      42              :                                               mp_request_type,&
      43              :                                               mp_waitall
      44              :    USE mp2_types,                       ONLY: integ_mat_buffer_type,&
      45              :                                               mp2_type
      46              :    USE parallel_gemm_api,               ONLY: parallel_gemm
      47              :    USE util,                            ONLY: get_limit
      48              : #include "./base/base_uses.f90"
      49              : 
      50              :    IMPLICIT NONE
      51              : 
      52              :    PRIVATE
      53              : 
      54              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad_util'
      55              : 
      56              :    PUBLIC :: complete_gamma, array2fm, fm2array, prepare_redistribution, create_dbcsr_gamma
      57              : 
      58              :    TYPE index_map
      59              :       INTEGER, DIMENSION(:, :), ALLOCATABLE :: map
      60              :    END TYPE
      61              : 
      62              : CONTAINS
      63              : 
      64              : ! **************************************************************************************************
      65              : !> \brief complete the calculation of the Gamma matrices
      66              : !> \param mp2_env ...
      67              : !> \param B_ia_Q ...
      68              : !> \param dimen_RI ...
      69              : !> \param homo ...
      70              : !> \param virtual ...
      71              : !> \param para_env ...
      72              : !> \param para_env_sub ...
      73              : !> \param ngroup ...
      74              : !> \param my_group_L_size ...
      75              : !> \param my_group_L_start ...
      76              : !> \param my_group_L_end ...
      77              : !> \param my_B_size ...
      78              : !> \param my_B_virtual_start ...
      79              : !> \param gd_array ...
      80              : !> \param gd_B_virtual ...
      81              : !> \param kspin ...
      82              : !> \author Mauro Del Ben
      83              : ! **************************************************************************************************
      84          306 :    SUBROUTINE complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, &
      85              :                              my_group_L_size, my_group_L_start, my_group_L_end, &
      86              :                              my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
      87              : 
      88              :       TYPE(mp2_type)                                     :: mp2_env
      89              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
      90              :          INTENT(INOUT)                                   :: B_ia_Q
      91              :       INTEGER, INTENT(IN)                                :: dimen_RI, homo, virtual
      92              :       TYPE(mp_para_env_type), INTENT(IN), POINTER        :: para_env, para_env_sub
      93              :       INTEGER, INTENT(IN)                                :: ngroup, my_group_L_size, &
      94              :                                                             my_group_L_start, my_group_L_end, &
      95              :                                                             my_B_size, my_B_virtual_start
      96              :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_array, gd_B_virtual
      97              :       INTEGER, INTENT(IN)                                :: kspin
      98              : 
      99              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'complete_gamma'
     100              : 
     101              :       INTEGER :: dimen_ia, handle, i, ispin, kkB, my_ia_end, my_ia_size, my_ia_start, my_P_end, &
     102              :          my_P_size, my_P_start, nspins, pos_group, pos_sub
     103          306 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: pos_info
     104          306 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: group_grid_2_mepos, mepos_2_grid_group
     105          306 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: BIb_C_2D, Gamma_2D, Gamma_PQ
     106              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     107              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_ia, fm_struct_RI
     108              :       TYPE(cp_fm_type) :: fm_Gamma, fm_Gamma_PQ, fm_Gamma_PQ_2, fm_Gamma_PQ_temp, &
     109              :          fm_Gamma_PQ_temp_2, fm_ia_P, fm_Y, operator_half, PQ_half
     110          306 :       TYPE(group_dist_d1_type)                           :: gd_array_new, gd_ia, gd_ia_new, gd_P, &
     111          306 :                                                             gd_P_new
     112              : 
     113          306 :       CALL timeset(routineN, handle)
     114              : 
     115              :       ! reshape the data into a 2D array
     116              :       ! reorder the local data in such a way to help the next stage of matrix creation
     117              :       ! now the data inside the group are divided into a ia x K matrix
     118          306 :       dimen_ia = homo*virtual
     119          306 :       CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
     120          306 :       CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
     121              : 
     122              :       CALL mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     123              :                         my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     124              : 
     125              :       CALL mat_3d_to_2d_gamma(mp2_env%ri_grad%Gamma_P_ia(kspin)%array, Gamma_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     126          306 :                               my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     127              : 
     128              :       ! create the processor map and size arrays
     129          306 :       CALL create_group_dist(gd_ia_new, para_env%num_pe)
     130          306 :       CALL create_group_dist(gd_array_new, para_env%num_pe)
     131          306 :       CALL create_group_dist(gd_P, para_env_sub%num_pe, dimen_RI)
     132          306 :       CALL create_group_dist(gd_P_new, para_env%num_pe)
     133              : 
     134          306 :       CALL get_group_dist(gd_P, para_env_sub%mepos, my_P_start, my_P_end, my_P_size)
     135              : 
     136              :       CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
     137              :                                   group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
     138              : 
     139          918 :       DO i = 0, para_env%num_pe - 1
     140              :          ! calculate position of the group
     141          612 :          pos_group = i/para_env_sub%num_pe
     142              :          ! calculate position in the subgroup
     143          612 :          pos_sub = pos_info(i)
     144              :          ! 1 -> rows, 2 -> cols
     145          612 :          CALL get_group_dist(gd_ia, pos_sub, gd_ia_new, i)
     146          612 :          CALL get_group_dist(gd_array, pos_group, gd_array_new, i)
     147          918 :          CALL get_group_dist(gd_P, pos_sub, gd_P_new, i)
     148              :       END DO
     149              : 
     150              :       ! create the blacs env
     151          306 :       NULLIFY (blacs_env)
     152          306 :       CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env)
     153              : 
     154          306 :       NULLIFY (fm_struct_ia)
     155              :       CALL cp_fm_struct_create(fm_struct_ia, context=blacs_env, nrow_global=dimen_ia, &
     156          306 :                                ncol_global=dimen_RI, para_env=para_env)
     157              : 
     158              :       ! create the fm matrix Gamma
     159              :       CALL array2fm(Gamma_2D, fm_struct_ia, my_ia_start, my_ia_end, &
     160              :                     my_group_L_start, my_group_L_end, &
     161              :                     gd_ia_new, gd_array_new, &
     162              :                     group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     163          306 :                     fm_Y)
     164              :       ! create the fm matrix B_ia_P
     165              :       CALL array2fm(BIb_C_2D, fm_struct_ia, my_ia_start, my_ia_end, &
     166              :                     my_group_L_start, my_group_L_end, &
     167              :                     gd_ia_new, gd_array_new, &
     168              :                     group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     169          306 :                     fm_ia_P)
     170              : 
     171          306 :       NULLIFY (fm_struct_RI)
     172              :       CALL cp_fm_struct_create(fm_struct_RI, context=blacs_env, nrow_global=dimen_RI, &
     173          306 :                                ncol_global=dimen_RI, para_env=para_env)
     174              : 
     175              :       ! since we will need (P|Q)^(-1/2) in the future, make a copy
     176              :       CALL array2fm(mp2_env%ri_grad%PQ_half, fm_struct_RI, my_P_start, my_P_end, &
     177              :                     my_group_L_start, my_group_L_end, &
     178              :                     gd_P_new, gd_array_new, &
     179              :                     group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     180          306 :                     PQ_half, do_release_mat=.FALSE.)
     181              : 
     182          306 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     183              :          CALL array2fm(mp2_env%ri_grad%operator_half, fm_struct_RI, my_P_start, my_P_end, &
     184              :                        my_group_L_start, my_group_L_end, &
     185              :                        gd_P_new, gd_array_new, &
     186              :                        group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     187            8 :                        operator_half, do_release_mat=.FALSE.)
     188              :       END IF
     189              : 
     190          306 :       CALL release_group_dist(gd_P_new)
     191          306 :       CALL release_group_dist(gd_ia_new)
     192          306 :       CALL release_group_dist(gd_array_new)
     193              : 
     194              :       ! complete the gamma matrix Gamma_ia^P = sum_Q (Y_ia^Q * (Q|P)^(-1/2))
     195          306 :       IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     196          298 :          CALL cp_fm_create(fm_Gamma, fm_struct_ia, name="fm_Gamma")
     197          298 :          CALL cp_fm_struct_release(fm_struct_ia)
     198              :          ! perform the matrix multiplication
     199              :          CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     200              :                             matrix_a=fm_Y, matrix_b=PQ_half, beta=0.0_dp, &
     201          298 :                             matrix_c=fm_Gamma)
     202              :          ! release the Y matrix
     203          298 :          CALL cp_fm_release(fm_Y)
     204              : 
     205              :          ! complete gamma small (fm_Gamma_PQ)
     206              :          ! create temp matrix
     207          298 :          CALL cp_fm_create(fm_Gamma_PQ_temp, fm_struct_RI, name="fm_Gamma_PQ_temp")
     208              :          CALL parallel_gemm(transa="T", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_ia, alpha=1.0_dp, &
     209              :                             matrix_a=fm_Gamma, matrix_b=fm_ia_P, beta=0.0_dp, &
     210          298 :                             matrix_c=fm_Gamma_PQ_temp)
     211          298 :          CALL cp_fm_release(fm_ia_P)
     212              :          ! create fm_Gamma_PQ matrix
     213          298 :          CALL cp_fm_create(fm_Gamma_PQ, fm_struct_RI, name="fm_Gamma_PQ")
     214          298 :          CALL cp_fm_struct_release(fm_struct_RI)
     215              :          ! perform matrix multiplication
     216              :          CALL parallel_gemm(transa="N", transb="T", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     217              :                             matrix_a=fm_Gamma_PQ_temp, matrix_b=PQ_half, beta=0.0_dp, &
     218          298 :                             matrix_c=fm_Gamma_PQ)
     219          298 :          CALL cp_fm_release(fm_Gamma_PQ_temp)
     220          298 :          CALL cp_fm_release(PQ_half)
     221              : 
     222              :       ELSE
     223              :          ! create temp matrix
     224            8 :          CALL cp_fm_create(fm_Gamma_PQ_temp, fm_struct_RI, name="fm_Gamma_PQ_temp")
     225              :          CALL parallel_gemm(transa="T", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_ia, alpha=1.0_dp, &
     226              :                             matrix_a=fm_Y, matrix_b=fm_ia_P, beta=0.0_dp, &
     227            8 :                             matrix_c=fm_Gamma_PQ_temp)
     228            8 :          CALL cp_fm_release(fm_ia_P)
     229              : 
     230            8 :          CALL cp_fm_create(fm_Gamma, fm_struct_ia, name="fm_Gamma")
     231            8 :          CALL cp_fm_struct_release(fm_struct_ia)
     232              :          ! perform the matrix multiplication
     233              :          CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     234              :                             matrix_a=fm_Y, matrix_b=PQ_half, beta=0.0_dp, &
     235            8 :                             matrix_c=fm_Gamma)
     236            8 :          CALL cp_fm_release(fm_Y)
     237              : 
     238            8 :          CALL cp_fm_create(fm_Gamma_PQ_temp_2, fm_struct_RI, name="fm_Gamma_PQ_temp_2")
     239              :          CALL parallel_gemm(transa="N", transb="T", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     240              :                             matrix_a=fm_Gamma_PQ_temp, matrix_b=operator_half, beta=0.0_dp, &
     241            8 :                             matrix_c=fm_Gamma_PQ_temp_2)
     242              : 
     243            8 :          CALL cp_fm_create(fm_Gamma_PQ_2, fm_struct_RI, name="fm_Gamma_PQ_2")
     244              :          CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     245              :                             matrix_a=PQ_half, matrix_b=fm_Gamma_PQ_temp_2, beta=0.0_dp, &
     246            8 :                             matrix_c=fm_Gamma_PQ_temp)
     247            8 :          CALL cp_fm_to_fm(fm_Gamma_PQ_temp, fm_Gamma_PQ_2)
     248            8 :          CALL cp_fm_geadd(1.0_dp, "T", fm_Gamma_PQ_temp, 1.0_dp, fm_Gamma_PQ_2)
     249            8 :          CALL cp_fm_release(fm_Gamma_PQ_temp)
     250            8 :          CALL cp_fm_release(PQ_half)
     251              : 
     252            8 :          CALL cp_fm_create(fm_Gamma_PQ, fm_struct_RI)
     253            8 :          CALL cp_fm_struct_release(fm_struct_RI)
     254              :          CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=-1.0_dp, &
     255              :                             matrix_a=operator_half, matrix_b=fm_Gamma_PQ_temp_2, beta=0.0_dp, &
     256            8 :                             matrix_c=fm_Gamma_PQ)
     257            8 :          CALL cp_fm_release(operator_half)
     258            8 :          CALL cp_fm_release(fm_Gamma_PQ_temp_2)
     259              :       END IF
     260              : 
     261              :       ! now redistribute the data, in this case we go back
     262              :       ! to the original way the integrals were distributed
     263              :       CALL fm2array(Gamma_2D, my_ia_size, my_ia_start, my_ia_end, &
     264              :                     my_group_L_size, my_group_L_start, my_group_L_end, &
     265              :                     group_grid_2_mepos, mepos_2_grid_group, &
     266              :                     para_env_sub%num_pe, ngroup, &
     267          306 :                     fm_Gamma)
     268              : 
     269         1224 :       ALLOCATE (Gamma_PQ(my_P_size, my_group_L_size))
     270              :       CALL fm2array(Gamma_PQ, my_P_size, my_P_start, my_P_end, &
     271              :                     my_group_L_size, my_group_L_start, my_group_L_end, &
     272              :                     group_grid_2_mepos, mepos_2_grid_group, &
     273              :                     para_env_sub%num_pe, ngroup, &
     274          306 :                     fm_Gamma_PQ)
     275          306 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ)) THEN
     276          228 :          CALL MOVE_ALLOC(Gamma_PQ, mp2_env%ri_grad%Gamma_PQ)
     277              :       ELSE
     278       313178 :          mp2_env%ri_grad%Gamma_PQ(:, :) = mp2_env%ri_grad%Gamma_PQ + Gamma_PQ
     279           78 :          DEALLOCATE (Gamma_PQ)
     280              :       END IF
     281              : 
     282          306 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     283           24 :          ALLOCATE (Gamma_PQ(my_P_size, my_group_L_size))
     284              :          CALL fm2array(Gamma_PQ, my_P_size, my_P_start, my_P_end, &
     285              :                        my_group_L_size, my_group_L_start, my_group_L_end, &
     286              :                        group_grid_2_mepos, mepos_2_grid_group, &
     287              :                        para_env_sub%num_pe, ngroup, &
     288            8 :                        fm_Gamma_PQ_2)
     289            8 :          IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ_2)) THEN
     290            8 :             CALL MOVE_ALLOC(Gamma_PQ, mp2_env%ri_grad%Gamma_PQ_2)
     291              :          ELSE
     292            0 :             mp2_env%ri_grad%Gamma_PQ_2(:, :) = mp2_env%ri_grad%Gamma_PQ_2 + Gamma_PQ
     293            0 :             DEALLOCATE (Gamma_PQ)
     294              :          END IF
     295              :       END IF
     296              : 
     297              :       ! allocate G_P_ia (DBCSR)
     298          306 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%G_P_ia)) THEN
     299          228 :          nspins = SIZE(mp2_env%ri_grad%mo_coeff_o)
     300        14456 :          ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_L_size, nspins))
     301          534 :          DO ispin = 1, nspins
     302        13772 :          DO kkB = 1, my_group_L_size
     303        13544 :             NULLIFY (mp2_env%ri_grad%G_P_ia(kkB, ispin)%matrix)
     304              :          END DO
     305              :          END DO
     306              :       END IF
     307              : 
     308              :       ! create the Gamma_ia_P in DBCSR style
     309              :       CALL create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
     310              :                               my_ia_start, my_ia_end, my_group_L_size, gd_ia, &
     311          306 :                               mp2_env%ri_grad%G_P_ia(:, kspin), mp2_env%ri_grad%mo_coeff_o(1)%matrix)
     312              : 
     313          306 :       DEALLOCATE (pos_info)
     314          306 :       DEALLOCATE (group_grid_2_mepos, mepos_2_grid_group)
     315          306 :       CALL release_group_dist(gd_ia)
     316          306 :       CALL release_group_dist(gd_P)
     317              : 
     318              :       ! release blacs_env
     319          306 :       CALL cp_blacs_env_release(blacs_env)
     320              : 
     321          306 :       CALL timestop(handle)
     322              : 
     323         2142 :    END SUBROUTINE complete_gamma
     324              : 
     325              : ! **************************************************************************************************
     326              : !> \brief Redistribute a 3d matrix to a 2d matrix
     327              : !> \param B_ia_Q ...
     328              : !> \param BIb_C_2D ...
     329              : !> \param homo ...
     330              : !> \param my_B_size ...
     331              : !> \param virtual ...
     332              : !> \param my_B_virtual_start ...
     333              : !> \param my_ia_start ...
     334              : !> \param my_ia_end ...
     335              : !> \param my_ia_size ...
     336              : !> \param my_group_L_size ...
     337              : !> \param para_env_sub ...
     338              : !> \param gd_B_virtual ...
     339              : ! **************************************************************************************************
     340          306 :    SUBROUTINE mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     341              :                            my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     342              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
     343              :          INTENT(INOUT)                                   :: B_ia_Q
     344              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     345              :          INTENT(OUT)                                     :: BIb_C_2D
     346              :       INTEGER, INTENT(IN)                                :: homo, my_B_size, virtual, &
     347              :                                                             my_B_virtual_start, my_ia_start, &
     348              :                                                             my_ia_end, my_ia_size, my_group_L_size
     349              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env_sub
     350              :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_B_virtual
     351              : 
     352              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'mat_3d_to_2d'
     353              : 
     354              :       INTEGER :: handle, ia_global, iiB, jjB, proc_receive, proc_send, proc_shift, rec_B_size, &
     355              :          rec_B_virtual_end, rec_B_virtual_start
     356          306 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BIb_C_rec
     357              : 
     358          306 :       CALL timeset(routineN, handle)
     359              : 
     360         1224 :       ALLOCATE (BIb_C_2D(my_ia_size, my_group_L_size))
     361       720844 :       BIb_C_2D = 0.0_dp
     362              : 
     363         1408 :       DO iiB = 1, homo
     364        17332 :          DO jjB = 1, my_B_size
     365        15924 :             ia_global = (iiB - 1)*virtual + my_B_virtual_start + jjB - 1
     366        17026 :             IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     367       705851 :                BIb_C_2D(ia_global - my_ia_start + 1, :) = B_ia_Q(iiB, jjB, 1:my_group_L_size)
     368              :             END IF
     369              :          END DO
     370              :       END DO
     371              : 
     372          314 :       DO proc_shift = 1, para_env_sub%num_pe - 1
     373            8 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     374            8 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     375              : 
     376            8 :          CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
     377              : 
     378           40 :          ALLOCATE (BIb_C_rec(homo, rec_B_size, my_group_L_size))
     379        47130 :          BIb_C_rec = 0.0_dp
     380              : 
     381              :          CALL para_env_sub%sendrecv(B_ia_Q, proc_send, &
     382            8 :                                     BIb_C_rec, proc_receive)
     383              : 
     384           48 :          DO iiB = 1, homo
     385          438 :             DO jjB = 1, rec_B_size
     386          390 :                ia_global = (iiB - 1)*virtual + rec_B_virtual_start + jjB - 1
     387          430 :                IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     388        17373 :                   BIb_C_2D(ia_global - my_ia_start + 1, :) = BIb_C_rec(iiB, jjB, 1:my_group_L_size)
     389              :                END IF
     390              :             END DO
     391              :          END DO
     392              : 
     393          322 :          DEALLOCATE (BIb_C_rec)
     394              :       END DO
     395          306 :       DEALLOCATE (B_ia_Q)
     396              : 
     397          306 :       CALL timestop(handle)
     398          612 :    END SUBROUTINE mat_3d_to_2d
     399              : 
     400              : ! **************************************************************************************************
     401              : !> \brief Redistribute a 3d matrix to a 2d matrix, specialized for Gamma_P_ia to account for a different order of indices
     402              : !> \param B_ia_Q ...
     403              : !> \param BIb_C_2D ...
     404              : !> \param homo ...
     405              : !> \param my_B_size ...
     406              : !> \param virtual ...
     407              : !> \param my_B_virtual_start ...
     408              : !> \param my_ia_start ...
     409              : !> \param my_ia_end ...
     410              : !> \param my_ia_size ...
     411              : !> \param my_group_L_size ...
     412              : !> \param para_env_sub ...
     413              : !> \param gd_B_virtual ...
     414              : ! **************************************************************************************************
     415          306 :    SUBROUTINE mat_3d_to_2d_gamma(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     416              :                                  my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     417              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
     418              :          INTENT(INOUT)                                   :: B_ia_Q
     419              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     420              :          INTENT(OUT)                                     :: BIb_C_2D
     421              :       INTEGER, INTENT(IN)                                :: homo, my_B_size, virtual, &
     422              :                                                             my_B_virtual_start, my_ia_start, &
     423              :                                                             my_ia_end, my_ia_size, my_group_L_size
     424              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env_sub
     425              :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_B_virtual
     426              : 
     427              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'mat_3d_to_2d_gamma'
     428              : 
     429              :       INTEGER :: handle, ia_global, iiB, jjB, proc_receive, proc_send, proc_shift, rec_B_size, &
     430              :          rec_B_virtual_end, rec_B_virtual_start
     431          306 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BIb_C_rec
     432              : 
     433          306 :       CALL timeset(routineN, handle)
     434              : 
     435         1224 :       ALLOCATE (BIb_C_2D(my_ia_size, my_group_L_size))
     436       720844 :       BIb_C_2D = 0.0_dp
     437              : 
     438         1408 :       DO iiB = 1, homo
     439        17332 :          DO jjB = 1, my_B_size
     440        15924 :             ia_global = (iiB - 1)*virtual + my_B_virtual_start + jjB - 1
     441        17026 :             IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     442       705851 :                BIb_C_2D(ia_global - my_ia_start + 1, :) = B_ia_Q(jjB, iiB, 1:my_group_L_size)
     443              :             END IF
     444              :          END DO
     445              :       END DO
     446              : 
     447          314 :       DO proc_shift = 1, para_env_sub%num_pe - 1
     448            8 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     449            8 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     450              : 
     451            8 :          CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
     452              : 
     453           40 :          ALLOCATE (BIb_C_rec(rec_B_size, homo, my_group_L_size))
     454        43544 :          BIb_C_rec = 0.0_dp
     455              : 
     456              :          CALL para_env_sub%sendrecv(B_ia_Q, proc_send, &
     457            8 :                                     BIb_C_rec, proc_receive)
     458              : 
     459           48 :          DO iiB = 1, homo
     460          438 :             DO jjB = 1, rec_B_size
     461          390 :                ia_global = (iiB - 1)*virtual + rec_B_virtual_start + jjB - 1
     462          430 :                IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     463        17373 :                   BIb_C_2D(ia_global - my_ia_start + 1, :) = BIb_C_rec(jjB, iiB, 1:my_group_L_size)
     464              :                END IF
     465              :             END DO
     466              :          END DO
     467              : 
     468          322 :          DEALLOCATE (BIb_C_rec)
     469              :       END DO
     470          306 :       DEALLOCATE (B_ia_Q)
     471              : 
     472          306 :       CALL timestop(handle)
     473          612 :    END SUBROUTINE mat_3d_to_2d_gamma
     474              : 
     475              : ! **************************************************************************************************
     476              : !> \brief redistribute local part of array to fm
     477              : !> \param mat2D ...
     478              : !> \param fm_struct ...
     479              : !> \param my_start_row ...
     480              : !> \param my_end_row ...
     481              : !> \param my_start_col ...
     482              : !> \param my_end_col ...
     483              : !> \param gd_row ...
     484              : !> \param gd_col ...
     485              : !> \param group_grid_2_mepos ...
     486              : !> \param ngroup_row ...
     487              : !> \param ngroup_col ...
     488              : !> \param fm_mat ...
     489              : !> \param integ_group_size ...
     490              : !> \param color_group ...
     491              : !> \param do_release_mat whether to release the array (default: yes)
     492              : ! **************************************************************************************************
     493         1292 :    SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, &
     494              :                        my_start_col, my_end_col, &
     495              :                        gd_row, gd_col, &
     496              :                        group_grid_2_mepos, ngroup_row, ngroup_col, &
     497              :                        fm_mat, integ_group_size, color_group, do_release_mat)
     498              : 
     499              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     500              :          INTENT(INOUT)                                   :: mat2D
     501              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
     502              :       INTEGER, INTENT(IN)                                :: my_start_row, my_end_row, my_start_col, &
     503              :                                                             my_end_col
     504              :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_row, gd_col
     505              :       INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN)  :: group_grid_2_mepos
     506              :       INTEGER, INTENT(IN)                                :: ngroup_row, ngroup_col
     507              :       TYPE(cp_fm_type), INTENT(OUT)                      :: fm_mat
     508              :       INTEGER, INTENT(IN), OPTIONAL                      :: integ_group_size, color_group
     509              :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_release_mat
     510              : 
     511              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'array2fm'
     512              : 
     513              :       INTEGER :: dummy_proc, end_col_block, end_row_block, handle, handle2, i_global, i_local, &
     514              :          i_sub, iiB, iii, itmp(2), j_global, j_local, j_sub, jjB, my_num_col_blocks, &
     515              :          my_num_row_blocks, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, num_cols, &
     516              :          num_rec_cols, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, &
     517              :          proc_shift, rec_col_end, rec_col_size, rec_col_start, rec_counter, rec_pcol, rec_prow, &
     518              :          rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, ref_send_prow, send_counter, &
     519              :          send_pcol, send_prow, size_rec_buffer, size_send_buffer, start_col_block, start_row_block
     520         1292 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, index_col_rec, map_rec_size, &
     521         1292 :                                                             map_send_size
     522         1292 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: blocks_ranges_col, blocks_ranges_row, &
     523         1292 :                                                             grid_2_mepos, grid_ref_2_send_pos, &
     524         1292 :                                                             mepos_2_grid
     525         1292 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     526              :       LOGICAL                                            :: convert_pos, my_do_release_mat
     527              :       REAL(KIND=dp)                                      :: part_col, part_row
     528              :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     529         1292 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     530              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     531         1292 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
     532              : 
     533         1292 :       CALL timeset(routineN, handle)
     534              : 
     535         1292 :       my_do_release_mat = .TRUE.
     536         1292 :       IF (PRESENT(do_release_mat)) my_do_release_mat = do_release_mat
     537              : 
     538         1292 :       CALL cp_fm_struct_get(fm_struct, para_env=para_env, nrow_global=num_rows, ncol_global=num_cols)
     539              : 
     540              :       ! create the full matrix, (num_rows x num_cols)
     541         1292 :       CALL cp_fm_create(fm_mat, fm_struct, name="fm_mat")
     542         1292 :       CALL cp_fm_set_all(matrix=fm_mat, alpha=0.0_dp)
     543              : 
     544              :       ! start filling procedure
     545              :       ! fill the matrix
     546              :       CALL cp_fm_get_info(matrix=fm_mat, &
     547              :                           nrow_local=nrow_local, &
     548              :                           ncol_local=ncol_local, &
     549              :                           row_indices=row_indices, &
     550         1292 :                           col_indices=col_indices)
     551         1292 :       myprow = fm_mat%matrix_struct%context%mepos(1)
     552         1292 :       mypcol = fm_mat%matrix_struct%context%mepos(2)
     553         1292 :       nprow = fm_mat%matrix_struct%context%num_pe(1)
     554         1292 :       npcol = fm_mat%matrix_struct%context%num_pe(2)
     555              : 
     556              :       ! start the communication part
     557              :       ! 0) create array containing the processes position
     558              :       !    and supporting infos
     559         1292 :       CALL timeset(routineN//"_info", handle2)
     560         5168 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
     561         5016 :       grid_2_mepos = 0
     562         3876 :       ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
     563         1292 :       grid_2_mepos(myprow, mypcol) = para_env%mepos
     564              :       ! sum infos
     565         1292 :       CALL para_env%sum(grid_2_mepos)
     566         3876 :       CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
     567              : 
     568              :       ! 1) loop over my local data and define a map for the proc to send data
     569         3876 :       ALLOCATE (map_send_size(0:para_env%num_pe - 1))
     570         3724 :       map_send_size = 0
     571         1292 :       dummy_proc = 0
     572        82228 :       DO jjB = my_start_col, my_end_col
     573        80936 :          send_pcol = fm_mat%matrix_struct%g2p_col(jjB)
     574      4410020 :          DO iiB = my_start_row, my_end_row
     575      4327792 :             send_prow = fm_mat%matrix_struct%g2p_row(iiB)
     576      4327792 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     577      4408728 :             map_send_size(proc_send) = map_send_size(proc_send) + 1
     578              :          END DO
     579              :       END DO
     580              : 
     581              :       ! 2) loop over my local data of fm_mat and define a map for the proc from which rec data
     582         3876 :       ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
     583         3724 :       map_rec_size = 0
     584         1292 :       part_row = REAL(num_rows, KIND=dp)/REAL(ngroup_row, KIND=dp)
     585         1292 :       part_col = REAL(num_cols, KIND=dp)/REAL(ngroup_col, KIND=dp)
     586              : 
     587              :       ! In some cases we have to convert global positions to positions in a subgroup (RPA)
     588         1292 :       convert_pos = .FALSE.
     589         1292 :       IF (PRESENT(integ_group_size) .AND. PRESENT(color_group)) convert_pos = .TRUE.
     590              : 
     591        50990 :       DO jjB = 1, nrow_local
     592        49698 :          j_global = row_indices(jjB)
     593              :          ! check the group holding this element
     594              :          ! dirty way, if someone has a better idea ...
     595        49698 :          rec_prow = INT(REAL(j_global - 1, KIND=dp)/part_row)
     596        49698 :          rec_prow = MAX(0, rec_prow)
     597        49698 :          rec_prow = MIN(rec_prow, ngroup_row - 1)
     598              :          DO
     599        49698 :             itmp = get_limit(num_rows, ngroup_row, rec_prow)
     600        49698 :             IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
     601            0 :             IF (j_global < itmp(1)) rec_prow = rec_prow - 1
     602        49698 :             IF (j_global > itmp(2)) rec_prow = rec_prow + 1
     603              :          END DO
     604              : 
     605        49698 :          IF (convert_pos) THEN
     606              :             ! if the group is not in the same RPA group cycle
     607        18152 :             IF ((rec_prow/integ_group_size) .NE. color_group) CYCLE
     608              :             ! convert global position to position into the RPA group
     609        12473 :             rec_prow = MOD(rec_prow, integ_group_size)
     610              :          END IF
     611              : 
     612      4373103 :          DO iiB = 1, ncol_local
     613      4327792 :             i_global = col_indices(iiB)
     614              :             ! check the process in the group holding this element
     615      4327792 :             rec_pcol = INT(REAL(i_global - 1, KIND=dp)/part_col)
     616      4327792 :             rec_pcol = MAX(0, rec_pcol)
     617      4327792 :             rec_pcol = MIN(rec_pcol, ngroup_col - 1)
     618              :             DO
     619      4327792 :                itmp = get_limit(num_cols, ngroup_col, rec_pcol)
     620      4327792 :                IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
     621            0 :                IF (i_global < itmp(1)) rec_pcol = rec_pcol - 1
     622      4327792 :                IF (i_global > itmp(2)) rec_pcol = rec_pcol + 1
     623              :             END DO
     624              : 
     625      4327792 :             proc_receive = group_grid_2_mepos(rec_prow, rec_pcol)
     626              : 
     627      4377490 :             map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
     628              : 
     629              :          END DO ! i_global
     630              :       END DO ! j_global
     631              : 
     632              :       ! 3) check if the local data has to be stored in the new fm_mat
     633         1292 :       IF (map_rec_size(para_env%mepos) > 0) THEN
     634       122048 :          DO jjB = 1, ncol_local
     635       120762 :             j_global = col_indices(jjB)
     636       122048 :             IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
     637      3791630 :                DO iiB = 1, nrow_local
     638      3710694 :                   i_global = row_indices(iiB)
     639      3791630 :                   IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
     640      2866657 :                      fm_mat%local_data(iiB, jjB) = mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1)
     641              :                   END IF
     642              :                END DO
     643              :             END IF
     644              :          END DO
     645              :       END IF
     646         1292 :       CALL timestop(handle2)
     647              : 
     648              :       ! 4) reorder data in the send_buffer
     649         1292 :       CALL timeset(routineN//"_buffer_send", handle2)
     650              :       ! count the number of messages to send
     651         1292 :       number_of_send = 0
     652         2432 :       DO proc_shift = 1, para_env%num_pe - 1
     653         1140 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
     654         2432 :          IF (map_send_size(proc_send) > 0) THEN
     655         1140 :             number_of_send = number_of_send + 1
     656              :          END IF
     657              :       END DO
     658              :       ! allocate the structure that will hold the messages to be sent
     659         4864 :       ALLOCATE (buffer_send(number_of_send))
     660              :       ! grid_ref_2_send_pos is an array (map) that given a pair
     661              :       ! (ref_send_prow,ref_send_pcol) returns
     662              :       ! the position in the buffer_send associated to that process
     663         3876 :       ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
     664         5016 :       grid_ref_2_send_pos = 0
     665              :       ! finalize the allocation of buffer_send with the actual size
     666              :       ! of each message (actual size is size_send_buffer)
     667         1292 :       send_counter = 0
     668         2432 :       DO proc_shift = 1, para_env%num_pe - 1
     669         1140 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
     670         1140 :          size_send_buffer = map_send_size(proc_send)
     671         2432 :          IF (map_send_size(proc_send) > 0) THEN
     672         1140 :             send_counter = send_counter + 1
     673              :             ! allocate the sending buffer (msg)
     674         3420 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
     675      1462275 :             buffer_send(send_counter)%msg = 0.0_dp
     676         1140 :             buffer_send(send_counter)%proc = proc_send
     677              :             ! get the pointer to prow, pcol of the process that has
     678              :             ! to receive this message
     679         1140 :             ref_send_prow = mepos_2_grid(1, proc_send)
     680         1140 :             ref_send_pcol = mepos_2_grid(2, proc_send)
     681              :             ! save the rank of the process that has to receive this message
     682         1140 :             grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
     683              :          END IF
     684              :       END DO
     685              : 
     686              :       ! loop over the locally held data and fill the buffer_send
     687              :       ! for doing that we need an array that keep track if the
     688              :       ! sequential increase of the index for each message
     689         3724 :       ALLOCATE (iii_vet(number_of_send))
     690         2432 :       iii_vet = 0
     691        75774 :       DO iiB = my_start_row, my_end_row
     692        74482 :          send_prow = fm_mat%matrix_struct%g2p_row(iiB)
     693      4403566 :          DO jjB = my_start_col, my_end_col
     694      4327792 :             send_pcol = fm_mat%matrix_struct%g2p_col(jjB)
     695              :             ! we don't need to send to ourselves
     696      4327792 :             IF (grid_2_mepos(send_prow, send_pcol) == para_env%mepos) CYCLE
     697              : 
     698      1461135 :             send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
     699      1461135 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
     700      1461135 :             iii = iii_vet(send_counter)
     701      4402274 :             buffer_send(send_counter)%msg(iii) = mat2D(iiB - my_start_row + 1, jjB - my_start_col + 1)
     702              :          END DO
     703              :       END DO
     704              : 
     705         1292 :       DEALLOCATE (iii_vet)
     706         1292 :       DEALLOCATE (grid_ref_2_send_pos)
     707         1292 :       IF (my_do_release_mat) DEALLOCATE (mat2D)
     708         1292 :       CALL timestop(handle2)
     709              : 
     710              :       ! 5) similarly to what done for the buffer_send
     711              :       !    create the buffer for receive, post the message with irecv
     712              :       !    and send the messages non-blocking
     713         1292 :       CALL timeset(routineN//"_isendrecv", handle2)
     714              :       ! count the number of messages to be received
     715         1292 :       number_of_rec = 0
     716         2432 :       DO proc_shift = 1, para_env%num_pe - 1
     717         1140 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
     718         2432 :          IF (map_rec_size(proc_receive) > 0) THEN
     719         1140 :             number_of_rec = number_of_rec + 1
     720              :          END IF
     721              :       END DO
     722              : 
     723         4864 :       ALLOCATE (buffer_rec(number_of_rec))
     724              : 
     725         1292 :       rec_counter = 0
     726         2432 :       DO proc_shift = 1, para_env%num_pe - 1
     727         1140 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
     728         1140 :          size_rec_buffer = map_rec_size(proc_receive)
     729         2432 :          IF (map_rec_size(proc_receive) > 0) THEN
     730         1140 :             rec_counter = rec_counter + 1
     731              :             ! prepare the buffer for receive
     732         3420 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
     733      1462275 :             buffer_rec(rec_counter)%msg = 0.0_dp
     734         1140 :             buffer_rec(rec_counter)%proc = proc_receive
     735              :             ! post the message to be received
     736              :             CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
     737         1140 :                                 buffer_rec(rec_counter)%msg_req)
     738              :          END IF
     739              :       END DO
     740              : 
     741              :       ! send messages
     742         4864 :       ALLOCATE (req_send(number_of_send))
     743         1292 :       send_counter = 0
     744         2432 :       DO proc_shift = 1, para_env%num_pe - 1
     745         1140 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
     746         2432 :          IF (map_send_size(proc_send) > 0) THEN
     747         1140 :             send_counter = send_counter + 1
     748              :             CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
     749         1140 :                                 buffer_send(send_counter)%msg_req)
     750         1140 :             req_send(send_counter) = buffer_send(send_counter)%msg_req
     751              :          END IF
     752              :       END DO
     753         1292 :       CALL timestop(handle2)
     754              : 
     755              :       ! 6) fill the fm_mat matrix with the messages received from the
     756              :       !    other processes
     757         1292 :       CALL timeset(routineN//"_fill", handle2)
     758              :       ! In order to perform this step efficiently first we have to know the
     759              :       ! ranges of the blocks over which a given process hold its local data.
     760              :       ! Start with the rows ...
     761         1292 :       my_num_row_blocks = 1
     762        49698 :       DO iiB = 1, nrow_local - 1
     763        48406 :          IF (ABS(row_indices(iiB + 1) - row_indices(iiB)) == 1) CYCLE
     764        49698 :          my_num_row_blocks = my_num_row_blocks + 1
     765              :       END DO
     766         3876 :       ALLOCATE (blocks_ranges_row(2, my_num_row_blocks))
     767         7046 :       blocks_ranges_row = 0
     768         1292 :       blocks_ranges_row(1, 1) = row_indices(1)
     769         1292 :       iii = 1
     770        49698 :       DO iiB = 1, nrow_local - 1
     771        48406 :          IF (ABS(row_indices(iiB + 1) - row_indices(iiB)) == 1) CYCLE
     772          626 :          iii = iii + 1
     773          626 :          blocks_ranges_row(2, iii - 1) = row_indices(iiB)
     774        49698 :          blocks_ranges_row(1, iii) = row_indices(iiB + 1)
     775              :       END DO
     776         1292 :       blocks_ranges_row(2, my_num_row_blocks) = row_indices(MAX(nrow_local, 1))
     777              :       ! ... and columns
     778         1292 :       my_num_col_blocks = 1
     779       120768 :       DO jjB = 1, ncol_local - 1
     780       119476 :          IF (ABS(col_indices(jjB + 1) - col_indices(jjB)) == 1) CYCLE
     781       120768 :          my_num_col_blocks = my_num_col_blocks + 1
     782              :       END DO
     783         3876 :       ALLOCATE (blocks_ranges_col(2, my_num_col_blocks))
     784         5168 :       blocks_ranges_col = 0
     785         1292 :       blocks_ranges_col(1, 1) = col_indices(1)
     786         1292 :       iii = 1
     787       120768 :       DO jjB = 1, ncol_local - 1
     788       119476 :          IF (ABS(col_indices(jjB + 1) - col_indices(jjB)) == 1) CYCLE
     789            0 :          iii = iii + 1
     790            0 :          blocks_ranges_col(2, iii - 1) = col_indices(jjB)
     791       120768 :          blocks_ranges_col(1, iii) = col_indices(jjB + 1)
     792              :       END DO
     793         1292 :       blocks_ranges_col(2, my_num_col_blocks) = col_indices(MAX(ncol_local, 1))
     794              : 
     795         1292 :       rec_counter = 0
     796         2432 :       DO proc_shift = 1, para_env%num_pe - 1
     797         1140 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
     798         1140 :          size_rec_buffer = map_rec_size(proc_receive)
     799              : 
     800         2432 :          IF (map_rec_size(proc_receive) > 0) THEN
     801         1140 :             rec_counter = rec_counter + 1
     802              : 
     803         1140 :             CALL get_group_dist(gd_col, proc_receive, rec_col_start, rec_col_end, rec_col_size)
     804              : 
     805              :             ! precompute the number of received columns and relative index
     806         1140 :             num_rec_cols = 0
     807         2280 :             DO jjB = 1, my_num_col_blocks
     808         1140 :                start_col_block = MAX(blocks_ranges_col(1, jjB), rec_col_start)
     809         1140 :                end_col_block = MIN(blocks_ranges_col(2, jjB), rec_col_end)
     810        66396 :                DO j_sub = start_col_block, end_col_block
     811        65256 :                   num_rec_cols = num_rec_cols + 1
     812              :                END DO
     813              :             END DO
     814         3420 :             ALLOCATE (index_col_rec(num_rec_cols))
     815        65256 :             index_col_rec = 0
     816         1140 :             iii = 0
     817         2280 :             DO jjB = 1, my_num_col_blocks
     818         1140 :                start_col_block = MAX(blocks_ranges_col(1, jjB), rec_col_start)
     819         1140 :                end_col_block = MIN(blocks_ranges_col(2, jjB), rec_col_end)
     820        66396 :                DO j_sub = start_col_block, end_col_block
     821        64116 :                   iii = iii + 1
     822        64116 :                   j_local = fm_mat%matrix_struct%g2l_col(j_sub)
     823        65256 :                   index_col_rec(iii) = j_local
     824              :                END DO
     825              :             END DO
     826              : 
     827         1140 :             CALL get_group_dist(gd_row, proc_receive, rec_row_start, rec_row_end, rec_row_size)
     828              : 
     829              :             ! wait for the message
     830         1140 :             CALL buffer_rec(rec_counter)%msg_req%wait()
     831              : 
     832              :             ! fill the local data in fm_mat
     833         1140 :             iii = 0
     834         2906 :             DO iiB = 1, my_num_row_blocks
     835         1766 :                start_row_block = MAX(blocks_ranges_row(1, iiB), rec_row_start)
     836         1766 :                end_row_block = MIN(blocks_ranges_row(2, iiB), rec_row_end)
     837        35024 :                DO i_sub = start_row_block, end_row_block
     838        32118 :                   i_local = fm_mat%matrix_struct%g2l_row(i_sub)
     839      1495019 :                   DO jjB = 1, num_rec_cols
     840      1461135 :                      iii = iii + 1
     841      1461135 :                      j_local = index_col_rec(jjB)
     842      1493253 :                      fm_mat%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
     843              :                   END DO
     844              :                END DO
     845              :             END DO
     846              : 
     847         1140 :             DEALLOCATE (buffer_rec(rec_counter)%msg)
     848         3420 :             DEALLOCATE (index_col_rec)
     849              :          END IF
     850              :       END DO
     851         2432 :       DEALLOCATE (buffer_rec)
     852              : 
     853         1292 :       DEALLOCATE (blocks_ranges_row)
     854         1292 :       DEALLOCATE (blocks_ranges_col)
     855              : 
     856         1292 :       CALL timestop(handle2)
     857              : 
     858              :       ! 7) Finally wait for all messeges to be sent
     859         1292 :       CALL timeset(routineN//"_waitall", handle2)
     860         1292 :       CALL mp_waitall(req_send(:))
     861         2432 :       DO send_counter = 1, number_of_send
     862         2432 :          DEALLOCATE (buffer_send(send_counter)%msg)
     863              :       END DO
     864         2432 :       DEALLOCATE (buffer_send)
     865         1292 :       CALL timestop(handle2)
     866              : 
     867         1292 :       DEALLOCATE (map_send_size)
     868         1292 :       DEALLOCATE (map_rec_size)
     869         1292 :       DEALLOCATE (grid_2_mepos)
     870         1292 :       DEALLOCATE (mepos_2_grid)
     871              : 
     872         1292 :       CALL timestop(handle)
     873              : 
     874        10336 :    END SUBROUTINE array2fm
     875              : 
     876              : ! **************************************************************************************************
     877              : !> \brief redistribute fm to local part of array
     878              : !> \param mat2D ...
     879              : !> \param my_rows ...
     880              : !> \param my_start_row ...
     881              : !> \param my_end_row ...
     882              : !> \param my_cols ...
     883              : !> \param my_start_col ...
     884              : !> \param my_end_col ...
     885              : !> \param group_grid_2_mepos ...
     886              : !> \param mepos_2_grid_group ...
     887              : !> \param ngroup_row ...
     888              : !> \param ngroup_col ...
     889              : !> \param fm_mat ...
     890              : ! **************************************************************************************************
     891          722 :    SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, &
     892              :                        my_cols, my_start_col, my_end_col, &
     893              :                        group_grid_2_mepos, mepos_2_grid_group, &
     894              :                        ngroup_row, ngroup_col, &
     895              :                        fm_mat)
     896              : 
     897              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     898              :          INTENT(OUT)                                     :: mat2D
     899              :       INTEGER, INTENT(IN)                                :: my_rows, my_start_row, my_end_row, &
     900              :                                                             my_cols, my_start_col, my_end_col
     901              :       INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN)  :: group_grid_2_mepos, mepos_2_grid_group
     902              :       INTEGER, INTENT(IN)                                :: ngroup_row, ngroup_col
     903              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_mat
     904              : 
     905              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'fm2array'
     906              : 
     907              :       INTEGER :: dummy_proc, handle, handle2, i_global, iiB, iii, itmp(2), j_global, jjB, mypcol, &
     908              :          myprow, ncol_local, npcol, nprow, nrow_local, num_cols, num_rec_rows, num_rows, &
     909              :          number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, rec_col_size, &
     910              :          rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, ref_send_prow, &
     911              :          send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
     912          722 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, index_row_rec, map_rec_size, &
     913          722 :                                                             map_send_size
     914          722 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: grid_2_mepos, grid_ref_2_send_pos, &
     915          722 :                                                             mepos_2_grid, sizes
     916          722 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     917              :       REAL(KIND=dp)                                      :: part_col, part_row
     918              :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     919          722 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     920              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     921          722 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
     922              : 
     923          722 :       CALL timeset(routineN, handle)
     924              : 
     925              :       ! allocate the array
     926         2888 :       ALLOCATE (mat2D(my_rows, my_cols))
     927      2235585 :       mat2D = 0.0_dp
     928              : 
     929              :       ! start procedure
     930              :       ! get information of from the full matrix
     931              :       CALL cp_fm_get_info(matrix=fm_mat, &
     932              :                           nrow_local=nrow_local, &
     933              :                           ncol_local=ncol_local, &
     934              :                           row_indices=row_indices, &
     935              :                           col_indices=col_indices, &
     936              :                           nrow_global=num_rows, &
     937              :                           ncol_global=num_cols, &
     938          722 :                           para_env=para_env)
     939          722 :       myprow = fm_mat%matrix_struct%context%mepos(1)
     940          722 :       mypcol = fm_mat%matrix_struct%context%mepos(2)
     941          722 :       nprow = fm_mat%matrix_struct%context%num_pe(1)
     942          722 :       npcol = fm_mat%matrix_struct%context%num_pe(2)
     943              : 
     944              :       ! start the communication part
     945              :       ! 0) create array containing the processes position
     946              :       !    and supporting infos
     947          722 :       CALL timeset(routineN//"_info", handle2)
     948         2888 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
     949         2888 :       grid_2_mepos = 0
     950         2166 :       ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
     951              : 
     952              :       ! fill the info array
     953          722 :       grid_2_mepos(myprow, mypcol) = para_env%mepos
     954              : 
     955              :       ! sum infos
     956          722 :       CALL para_env%sum(grid_2_mepos)
     957         2166 :       CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
     958              : 
     959              :       ! 1) loop over my local data and define a map for the proc to send data
     960         2166 :       ALLOCATE (map_send_size(0:para_env%num_pe - 1))
     961         2166 :       map_send_size = 0
     962          722 :       part_row = REAL(num_rows, KIND=dp)/REAL(ngroup_row, KIND=dp)
     963          722 :       part_col = REAL(num_cols, KIND=dp)/REAL(ngroup_col, KIND=dp)
     964              : 
     965        61596 :       DO jjB = 1, ncol_local
     966        60874 :          j_global = col_indices(jjB)
     967              :          ! check the group holding this element
     968              :          ! dirty way, if someone has a better idea ...
     969        60874 :          send_pcol = INT(REAL(j_global - 1, KIND=dp)/part_col)
     970        60874 :          send_pcol = MAX(0, send_pcol)
     971        60874 :          send_pcol = MIN(send_pcol, ngroup_col - 1)
     972              :          DO
     973        60874 :             itmp = get_limit(num_cols, ngroup_col, send_pcol)
     974        60874 :             IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
     975            0 :             IF (j_global < itmp(1)) send_pcol = send_pcol - 1
     976        60874 :             IF (j_global > itmp(2)) send_pcol = send_pcol + 1
     977              :          END DO
     978              : 
     979      2264586 :          DO iiB = 1, nrow_local
     980      2202990 :             i_global = row_indices(iiB)
     981              :             ! check the process in the group holding this element
     982      2202990 :             send_prow = INT(REAL(i_global - 1, KIND=dp)/part_row)
     983      2202990 :             send_prow = MAX(0, send_prow)
     984      2202990 :             send_prow = MIN(send_prow, ngroup_row - 1)
     985              :             DO
     986      2202990 :                itmp = get_limit(num_rows, ngroup_row, send_prow)
     987      2202990 :                IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
     988            0 :                IF (i_global < itmp(1)) send_prow = send_prow - 1
     989      2202990 :                IF (i_global > itmp(2)) send_prow = send_prow + 1
     990              :             END DO
     991              : 
     992      2202990 :             proc_send = group_grid_2_mepos(send_prow, send_pcol)
     993              : 
     994      2263864 :             map_send_size(proc_send) = map_send_size(proc_send) + 1
     995              : 
     996              :          END DO ! i_global
     997              :       END DO ! j_global
     998              : 
     999              :       ! 2) loop over my local data of the array and define a map for the proc from which rec data
    1000         2166 :       ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
    1001         2166 :       map_rec_size = 0
    1002          722 :       dummy_proc = 0
    1003        32595 :       DO jjB = my_start_col, my_end_col
    1004        31873 :          rec_pcol = fm_mat%matrix_struct%g2p_col(jjB)
    1005      2235585 :          DO iiB = my_start_row, my_end_row
    1006      2202990 :             rec_prow = fm_mat%matrix_struct%g2p_row(iiB)
    1007      2202990 :             proc_receive = grid_2_mepos(rec_prow, rec_pcol)
    1008      2234863 :             map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
    1009              :          END DO
    1010              :       END DO
    1011              : 
    1012              :       ! 3) check if the local data in the fm_mat has to be stored in the new array
    1013          722 :       IF (map_rec_size(para_env%mepos) > 0) THEN
    1014        61596 :          DO jjB = 1, ncol_local
    1015        60874 :             j_global = col_indices(jjB)
    1016        61596 :             IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
    1017      1198612 :                DO iiB = 1, nrow_local
    1018      1166739 :                   i_global = row_indices(iiB)
    1019      1198612 :                   IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
    1020      1158181 :                      mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1) = fm_mat%local_data(iiB, jjB)
    1021              :                   END IF
    1022              :                END DO
    1023              :             END IF
    1024              :          END DO
    1025              :       END IF
    1026          722 :       CALL timestop(handle2)
    1027              : 
    1028              :       ! 4) reorder data in the send_buffer
    1029          722 :       CALL timeset(routineN//"_buffer_send", handle2)
    1030              :       ! count the number of messages to send
    1031          722 :       number_of_send = 0
    1032         1444 :       DO proc_shift = 1, para_env%num_pe - 1
    1033          722 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
    1034         1444 :          IF (map_send_size(proc_send) > 0) THEN
    1035          722 :             number_of_send = number_of_send + 1
    1036              :          END IF
    1037              :       END DO
    1038              :       ! allocate the structure that will hold the messages to be sent
    1039         2888 :       ALLOCATE (buffer_send(number_of_send))
    1040              :       ! grid_ref_2_send_pos is an array (map) that given a pair
    1041              :       ! (ref_send_prow,ref_send_pcol) returns
    1042              :       ! the position in the buffer_send associated to that process
    1043              : 
    1044         2888 :       ALLOCATE (grid_ref_2_send_pos(0:ngroup_row - 1, 0:ngroup_col - 1))
    1045         3578 :       grid_ref_2_send_pos = 0
    1046              : 
    1047              :       ! finalize the allocation of buffer_send with the actual size
    1048              :       ! of each message (actual size is size_send_buffer)
    1049          722 :       send_counter = 0
    1050         1444 :       DO proc_shift = 1, para_env%num_pe - 1
    1051          722 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
    1052          722 :          size_send_buffer = map_send_size(proc_send)
    1053         1444 :          IF (map_send_size(proc_send) > 0) THEN
    1054          722 :             send_counter = send_counter + 1
    1055              :             ! allocate the sending buffer (msg)
    1056         2166 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
    1057      1045531 :             buffer_send(send_counter)%msg = 0.0_dp
    1058          722 :             buffer_send(send_counter)%proc = proc_send
    1059              :             ! get the pointer to prow, pcol of the process that has
    1060              :             ! to receive this message
    1061          722 :             ref_send_prow = mepos_2_grid_group(1, proc_send)
    1062          722 :             ref_send_pcol = mepos_2_grid_group(2, proc_send)
    1063              :             ! save the rank of the process that has to receive this message
    1064          722 :             grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
    1065              :          END IF
    1066              :       END DO
    1067              : 
    1068              :       ! loop over the locally held data and fill the buffer_send
    1069              :       ! for doing that we need an array that keep track if the
    1070              :       ! sequential increase of the index for each message
    1071         2166 :       ALLOCATE (iii_vet(number_of_send))
    1072         1444 :       iii_vet = 0
    1073        61596 :       DO jjB = 1, ncol_local
    1074        60874 :          j_global = col_indices(jjB)
    1075              :          ! check the group holding this element
    1076              :          ! dirty way, if someone has a better idea ...
    1077        60874 :          send_pcol = INT(REAL(j_global - 1, KIND=dp)/part_col)
    1078        60874 :          send_pcol = MAX(0, send_pcol)
    1079        60874 :          send_pcol = MIN(send_pcol, ngroup_col - 1)
    1080              :          DO
    1081        60874 :             itmp = get_limit(num_cols, ngroup_col, send_pcol)
    1082        60874 :             IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
    1083            0 :             IF (j_global < itmp(1)) send_pcol = send_pcol - 1
    1084        60874 :             IF (j_global > itmp(2)) send_pcol = send_pcol + 1
    1085              :          END DO
    1086              : 
    1087      2264586 :          DO iiB = 1, nrow_local
    1088      2202990 :             i_global = row_indices(iiB)
    1089              :             ! check the process in the group holding this element
    1090      2202990 :             send_prow = INT(REAL(i_global - 1, KIND=dp)/part_row)
    1091      2202990 :             send_prow = MAX(0, send_prow)
    1092      2202990 :             send_prow = MIN(send_prow, ngroup_row - 1)
    1093              :             DO
    1094      2202990 :                itmp = get_limit(num_rows, ngroup_row, send_prow)
    1095      2202990 :                IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
    1096            0 :                IF (i_global < itmp(1)) send_prow = send_prow - 1
    1097      2202990 :                IF (i_global > itmp(2)) send_prow = send_prow + 1
    1098              :             END DO
    1099              :             ! we don't need to send to ourselves
    1100      2202990 :             IF (group_grid_2_mepos(send_prow, send_pcol) == para_env%mepos) CYCLE
    1101              : 
    1102      1044809 :             send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
    1103      1044809 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
    1104      1044809 :             iii = iii_vet(send_counter)
    1105      2263864 :             buffer_send(send_counter)%msg(iii) = fm_mat%local_data(iiB, jjB)
    1106              :          END DO
    1107              :       END DO
    1108              : 
    1109          722 :       DEALLOCATE (iii_vet)
    1110          722 :       DEALLOCATE (grid_ref_2_send_pos)
    1111          722 :       CALL timestop(handle2)
    1112              : 
    1113              :       ! 5) similarly to what done for the buffer_send
    1114              :       !    create the buffer for receive, post the message with irecv
    1115              :       !    and send the messages with non-blocking
    1116          722 :       CALL timeset(routineN//"_isendrecv", handle2)
    1117              :       ! count the number of messages to be received
    1118          722 :       number_of_rec = 0
    1119         1444 :       DO proc_shift = 1, para_env%num_pe - 1
    1120          722 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
    1121         1444 :          IF (map_rec_size(proc_receive) > 0) THEN
    1122          722 :             number_of_rec = number_of_rec + 1
    1123              :          END IF
    1124              :       END DO
    1125              : 
    1126         2888 :       ALLOCATE (buffer_rec(number_of_rec))
    1127              : 
    1128          722 :       rec_counter = 0
    1129         1444 :       DO proc_shift = 1, para_env%num_pe - 1
    1130          722 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
    1131          722 :          size_rec_buffer = map_rec_size(proc_receive)
    1132         1444 :          IF (map_rec_size(proc_receive) > 0) THEN
    1133          722 :             rec_counter = rec_counter + 1
    1134              :             ! prepare the buffer for receive
    1135         2166 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
    1136      1045531 :             buffer_rec(rec_counter)%msg = 0.0_dp
    1137          722 :             buffer_rec(rec_counter)%proc = proc_receive
    1138              :             ! post the message to be received
    1139              :             CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
    1140          722 :                                 buffer_rec(rec_counter)%msg_req)
    1141              :          END IF
    1142              :       END DO
    1143              : 
    1144              :       ! send messages
    1145         2888 :       ALLOCATE (req_send(number_of_send))
    1146          722 :       send_counter = 0
    1147         1444 :       DO proc_shift = 1, para_env%num_pe - 1
    1148          722 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
    1149         1444 :          IF (map_send_size(proc_send) > 0) THEN
    1150          722 :             send_counter = send_counter + 1
    1151              :             CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
    1152          722 :                                 buffer_send(send_counter)%msg_req)
    1153          722 :             req_send(send_counter) = buffer_send(send_counter)%msg_req
    1154              :          END IF
    1155              :       END DO
    1156          722 :       CALL timestop(handle2)
    1157              : 
    1158              :       ! 6) fill the fm_mat matrix with the messages received from the
    1159              :       !    other processes
    1160          722 :       CALL timeset(routineN//"_fill", handle2)
    1161              :       CALL cp_fm_get_info(matrix=fm_mat, &
    1162              :                           nrow_local=nrow_local, &
    1163          722 :                           ncol_local=ncol_local)
    1164         2166 :       ALLOCATE (sizes(2, 0:para_env%num_pe - 1))
    1165         2166 :       CALL para_env%allgather([nrow_local, ncol_local], sizes)
    1166         2166 :       iiB = MAXVAL(sizes(1, :))
    1167         2166 :       ALLOCATE (index_row_rec(iiB))
    1168        27268 :       index_row_rec = 0
    1169          722 :       rec_counter = 0
    1170         1444 :       DO proc_shift = 1, para_env%num_pe - 1
    1171          722 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
    1172          722 :          size_rec_buffer = map_rec_size(proc_receive)
    1173              : 
    1174         1444 :          IF (map_rec_size(proc_receive) > 0) THEN
    1175          722 :             rec_counter = rec_counter + 1
    1176              : 
    1177          722 :             rec_col_size = sizes(2, proc_receive)
    1178          722 :             rec_row_size = sizes(1, proc_receive)
    1179              : 
    1180              :             ! precompute the indeces of the rows
    1181          722 :             num_rec_rows = 0
    1182        26223 :             DO iiB = 1, rec_row_size
    1183        25501 :                i_global = fm_mat%matrix_struct%l2g_row(iiB, mepos_2_grid(1, proc_receive))
    1184        26223 :                IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
    1185        24183 :                   num_rec_rows = num_rec_rows + 1
    1186        24183 :                   index_row_rec(num_rec_rows) = i_global
    1187              :                END IF
    1188              :             END DO
    1189              : 
    1190          722 :             CALL buffer_rec(rec_counter)%msg_req%wait()
    1191              : 
    1192          722 :             iii = 0
    1193        61596 :             DO jjB = 1, rec_col_size
    1194        60874 :                j_global = fm_mat%matrix_struct%l2g_col(jjB, mepos_2_grid(2, proc_receive))
    1195        61596 :                IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
    1196      1076682 :                   DO iiB = 1, num_rec_rows
    1197      1044809 :                      i_global = index_row_rec(iiB)
    1198      1044809 :                      iii = iii + 1
    1199      1076682 :                      mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1) = buffer_rec(rec_counter)%msg(iii)
    1200              :                   END DO
    1201              :                END IF
    1202              :             END DO
    1203              : 
    1204          722 :             DEALLOCATE (buffer_rec(rec_counter)%msg)
    1205              :          END IF
    1206              :       END DO
    1207         1444 :       DEALLOCATE (buffer_rec)
    1208          722 :       DEALLOCATE (index_row_rec)
    1209          722 :       CALL cp_fm_release(fm_mat)
    1210          722 :       CALL timestop(handle2)
    1211              : 
    1212              :       ! 7) Finally wait for all messeges to be sent
    1213          722 :       CALL timeset(routineN//"_waitall", handle2)
    1214          722 :       CALL mp_waitall(req_send(:))
    1215         1444 :       DO send_counter = 1, number_of_send
    1216         1444 :          DEALLOCATE (buffer_send(send_counter)%msg)
    1217              :       END DO
    1218         1444 :       DEALLOCATE (buffer_send)
    1219          722 :       CALL timestop(handle2)
    1220              : 
    1221          722 :       CALL timestop(handle)
    1222              : 
    1223         5054 :    END SUBROUTINE fm2array
    1224              : 
    1225              : ! **************************************************************************************************
    1226              : !> \brief redistribute 2D representation of 3d tensor to a set of dbcsr matrices
    1227              : !> \param Gamma_2D ...
    1228              : !> \param homo ...
    1229              : !> \param virtual ...
    1230              : !> \param dimen_ia ...
    1231              : !> \param para_env_sub ...
    1232              : !> \param my_ia_start ...
    1233              : !> \param my_ia_end ...
    1234              : !> \param my_group_L_size ...
    1235              : !> \param gd_ia ...
    1236              : !> \param dbcsr_Gamma ...
    1237              : !> \param mo_coeff_o ...
    1238              : ! **************************************************************************************************
    1239          358 :    SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
    1240              :                                  my_ia_start, my_ia_end, my_group_L_size, &
    1241          358 :                                  gd_ia, dbcsr_Gamma, mo_coeff_o)
    1242              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: Gamma_2D
    1243              :       INTEGER, INTENT(IN)                                :: homo, virtual, dimen_ia
    1244              :       TYPE(mp_para_env_type), INTENT(IN), POINTER        :: para_env_sub
    1245              :       INTEGER, INTENT(IN)                                :: my_ia_start, my_ia_end, my_group_L_size
    1246              :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_ia
    1247              :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT)    :: dbcsr_Gamma
    1248              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mo_coeff_o
    1249              : 
    1250              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'create_dbcsr_gamma'
    1251              : 
    1252              :       INTEGER :: dummy_proc, handle, i_global, i_local, iaia, iiB, iii, itmp(2), j_global, &
    1253              :          j_local, jjB, jjj, kkB, mypcol, myprow, ncol_local, npcol, nprow, nrow_local, &
    1254              :          number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, rec_counter, &
    1255              :          rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, ref_send_pcol, &
    1256              :          ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
    1257          358 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, map_rec_size, map_send_size
    1258          358 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: grid_2_mepos, grid_ref_2_send_pos, &
    1259          358 :                                                             indeces_map_my, mepos_2_grid
    1260          358 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1261              :       REAL(KIND=dp)                                      :: part_ia
    1262              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1263              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
    1264              :       TYPE(cp_fm_type)                                   :: fm_ia
    1265          358 :       TYPE(index_map), ALLOCATABLE, DIMENSION(:)         :: indeces_rec
    1266              :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
    1267          358 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
    1268          358 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
    1269              : 
    1270          358 :       CALL timeset(routineN, handle)
    1271              : 
    1272              :       ! create sub blacs env
    1273          358 :       NULLIFY (blacs_env)
    1274          358 :       CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env_sub)
    1275              : 
    1276              :       ! create the fm_ia buffer matrix
    1277          358 :       NULLIFY (fm_struct)
    1278              :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=homo, &
    1279          358 :                                ncol_global=virtual, para_env=para_env_sub)
    1280          358 :       CALL cp_fm_create(fm_ia, fm_struct, name="fm_ia")
    1281              :       ! release structure
    1282          358 :       CALL cp_fm_struct_release(fm_struct)
    1283              :       ! release blacs_env
    1284          358 :       CALL cp_blacs_env_release(blacs_env)
    1285              : 
    1286              :       ! get array information
    1287              :       CALL cp_fm_get_info(matrix=fm_ia, &
    1288              :                           nrow_local=nrow_local, &
    1289              :                           ncol_local=ncol_local, &
    1290              :                           row_indices=row_indices, &
    1291          358 :                           col_indices=col_indices)
    1292          358 :       myprow = fm_ia%matrix_struct%context%mepos(1)
    1293          358 :       mypcol = fm_ia%matrix_struct%context%mepos(2)
    1294          358 :       nprow = fm_ia%matrix_struct%context%num_pe(1)
    1295          358 :       npcol = fm_ia%matrix_struct%context%num_pe(2)
    1296              : 
    1297              :       ! 0) create array containing the processes position and supporting infos
    1298         1432 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
    1299         1090 :       grid_2_mepos = 0
    1300         1074 :       ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
    1301              :       ! fill the info array
    1302          358 :       grid_2_mepos(myprow, mypcol) = para_env_sub%mepos
    1303              :       ! sum infos
    1304          358 :       CALL para_env_sub%sum(grid_2_mepos)
    1305         1074 :       CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
    1306              : 
    1307              :       ! loop over local index range and define the sending map
    1308         1074 :       ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
    1309          732 :       map_send_size = 0
    1310          358 :       dummy_proc = 0
    1311        20050 :       DO iaia = my_ia_start, my_ia_end
    1312        19692 :          i_global = (iaia - 1)/virtual + 1
    1313        19692 :          j_global = MOD(iaia - 1, virtual) + 1
    1314        19692 :          send_prow = fm_ia%matrix_struct%g2p_row(i_global)
    1315        19692 :          send_pcol = fm_ia%matrix_struct%g2p_col(j_global)
    1316        19692 :          proc_send = grid_2_mepos(send_prow, send_pcol)
    1317        20050 :          map_send_size(proc_send) = map_send_size(proc_send) + 1
    1318              :       END DO
    1319              : 
    1320              :       ! loop over local data of fm_ia and define the receiving map
    1321         1074 :       ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
    1322          732 :       map_rec_size = 0
    1323          358 :       part_ia = REAL(dimen_ia, KIND=dp)/REAL(para_env_sub%num_pe, KIND=dp)
    1324              : 
    1325         1624 :       DO iiB = 1, nrow_local
    1326         1266 :          i_global = row_indices(iiB)
    1327        21316 :          DO jjB = 1, ncol_local
    1328        19692 :             j_global = col_indices(jjB)
    1329        19692 :             iaia = (i_global - 1)*virtual + j_global
    1330        19692 :             proc_receive = INT(REAL(iaia - 1, KIND=dp)/part_ia)
    1331        19692 :             proc_receive = MAX(0, proc_receive)
    1332        19692 :             proc_receive = MIN(proc_receive, para_env_sub%num_pe - 1)
    1333              :             DO
    1334        19692 :                itmp = get_limit(dimen_ia, para_env_sub%num_pe, proc_receive)
    1335        19692 :                IF (iaia >= itmp(1) .AND. iaia <= itmp(2)) EXIT
    1336            0 :                IF (iaia < itmp(1)) proc_receive = proc_receive - 1
    1337        19692 :                IF (iaia > itmp(2)) proc_receive = proc_receive + 1
    1338              :             END DO
    1339        20958 :             map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
    1340              :          END DO
    1341              :       END DO
    1342              : 
    1343              :       ! allocate the buffer for sending data
    1344          358 :       number_of_send = 0
    1345          374 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1346           16 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1347          374 :          IF (map_send_size(proc_send) > 0) THEN
    1348           15 :             number_of_send = number_of_send + 1
    1349              :          END IF
    1350              :       END DO
    1351              :       ! allocate the structure that will hold the messages to be sent
    1352          746 :       ALLOCATE (buffer_send(number_of_send))
    1353              :       ! and the map from the grid of processess to the message position
    1354         1074 :       ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
    1355         1090 :       grid_ref_2_send_pos = 0
    1356              :       ! finally allocate each message
    1357          358 :       send_counter = 0
    1358          374 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1359           16 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1360           16 :          size_send_buffer = map_send_size(proc_send)
    1361          374 :          IF (map_send_size(proc_send) > 0) THEN
    1362           15 :             send_counter = send_counter + 1
    1363              :             ! allocate the sending buffer (msg)
    1364           45 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
    1365           15 :             buffer_send(send_counter)%proc = proc_send
    1366              :             ! get the pointer to prow, pcol of the process that has
    1367              :             ! to receive this message
    1368           15 :             ref_send_prow = mepos_2_grid(1, proc_send)
    1369           15 :             ref_send_pcol = mepos_2_grid(2, proc_send)
    1370              :             ! save the rank of the process that has to receive this message
    1371           15 :             grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
    1372              :          END IF
    1373              :       END DO
    1374              : 
    1375              :       ! allocate the buffer for receiving data
    1376              :       number_of_rec = 0
    1377          374 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1378           16 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1379          374 :          IF (map_rec_size(proc_receive) > 0) THEN
    1380           15 :             number_of_rec = number_of_rec + 1
    1381              :          END IF
    1382              :       END DO
    1383              :       ! allocate the structure that will hold the messages to be received
    1384              :       ! and relative indeces
    1385          746 :       ALLOCATE (buffer_rec(number_of_rec))
    1386          746 :       ALLOCATE (indeces_rec(number_of_rec))
    1387              :       ! finally allocate each message and fill the array of indeces
    1388          358 :       rec_counter = 0
    1389          374 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1390           16 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1391           16 :          size_rec_buffer = map_rec_size(proc_receive)
    1392          374 :          IF (map_rec_size(proc_receive) > 0) THEN
    1393           15 :             rec_counter = rec_counter + 1
    1394              :             ! prepare the buffer for receive
    1395           45 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
    1396           15 :             buffer_rec(rec_counter)%proc = proc_receive
    1397              :             ! create the indeces array
    1398           45 :             ALLOCATE (indeces_rec(rec_counter)%map(2, size_rec_buffer))
    1399          882 :             indeces_rec(rec_counter)%map = 0
    1400           15 :             CALL get_group_dist(gd_ia, proc_receive, rec_iaia_start, rec_iaia_end, rec_iaia_size)
    1401           15 :             iii = 0
    1402          642 :             DO iaia = rec_iaia_start, rec_iaia_end
    1403          627 :                i_global = (iaia - 1)/virtual + 1
    1404          627 :                j_global = MOD(iaia - 1, virtual) + 1
    1405          627 :                rec_prow = fm_ia%matrix_struct%g2p_row(i_global)
    1406          627 :                rec_pcol = fm_ia%matrix_struct%g2p_col(j_global)
    1407          627 :                IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE
    1408          289 :                iii = iii + 1
    1409          289 :                i_local = fm_ia%matrix_struct%g2l_row(i_global)
    1410          289 :                j_local = fm_ia%matrix_struct%g2l_col(j_global)
    1411          289 :                indeces_rec(rec_counter)%map(1, iii) = i_local
    1412          642 :                indeces_rec(rec_counter)%map(2, iii) = j_local
    1413              :             END DO
    1414              :          END IF
    1415              :       END DO
    1416              :       ! and create the index map for my local data
    1417          358 :       IF (map_rec_size(para_env_sub%mepos) > 0) THEN
    1418          358 :          size_rec_buffer = map_rec_size(para_env_sub%mepos)
    1419         1074 :          ALLOCATE (indeces_map_my(2, size_rec_buffer))
    1420        58567 :          indeces_map_my = 0
    1421              :          iii = 0
    1422        20050 :          DO iaia = my_ia_start, my_ia_end
    1423        19692 :             i_global = (iaia - 1)/virtual + 1
    1424        19692 :             j_global = MOD(iaia - 1, virtual) + 1
    1425        19692 :             rec_prow = fm_ia%matrix_struct%g2p_row(i_global)
    1426        19692 :             rec_pcol = fm_ia%matrix_struct%g2p_col(j_global)
    1427        19692 :             IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE
    1428        19403 :             iii = iii + 1
    1429        19403 :             i_local = fm_ia%matrix_struct%g2l_row(i_global)
    1430        19403 :             j_local = fm_ia%matrix_struct%g2l_col(j_global)
    1431        19403 :             indeces_map_my(1, iii) = i_local
    1432        20050 :             indeces_map_my(2, iii) = j_local
    1433              :          END DO
    1434              :       END IF
    1435              : 
    1436              :       ! auxiliary vector of indeces for the send buffer
    1437          731 :       ALLOCATE (iii_vet(number_of_send))
    1438              :       ! vector for the send requests
    1439          746 :       ALLOCATE (req_send(number_of_send))
    1440              :       ! loop over auxiliary basis function and redistribute into a fm
    1441              :       ! and then compy the fm into a dbcsr matrix
    1442        16198 :       DO kkB = 1, my_group_L_size
    1443              :          ! zero the matries of the buffers and post the messages to be received
    1444        15840 :          CALL cp_fm_set_all(matrix=fm_ia, alpha=0.0_dp)
    1445        15840 :          rec_counter = 0
    1446        17276 :          DO proc_shift = 1, para_env_sub%num_pe - 1
    1447         1436 :             proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1448        17276 :             IF (map_rec_size(proc_receive) > 0) THEN
    1449         1326 :                rec_counter = rec_counter + 1
    1450        26960 :                buffer_rec(rec_counter)%msg = 0.0_dp
    1451              :                CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
    1452         1326 :                                        buffer_rec(rec_counter)%msg_req)
    1453              :             END IF
    1454              :          END DO
    1455              :          ! fill the sending buffer and send the messages
    1456        17166 :          DO send_counter = 1, number_of_send
    1457        42800 :             buffer_send(send_counter)%msg = 0.0_dp
    1458              :          END DO
    1459        17166 :          iii_vet = 0
    1460              :          jjj = 0
    1461       901704 :          DO iaia = my_ia_start, my_ia_end
    1462       885864 :             i_global = (iaia - 1)/virtual + 1
    1463       885864 :             j_global = MOD(iaia - 1, virtual) + 1
    1464       885864 :             send_prow = fm_ia%matrix_struct%g2p_row(i_global)
    1465       885864 :             send_pcol = fm_ia%matrix_struct%g2p_col(j_global)
    1466       885864 :             proc_send = grid_2_mepos(send_prow, send_pcol)
    1467              :             ! we don't need to send to ourselves
    1468       901704 :             IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos) THEN
    1469              :                ! filling fm_ia with local data
    1470       860230 :                jjj = jjj + 1
    1471       860230 :                i_local = indeces_map_my(1, jjj)
    1472       860230 :                j_local = indeces_map_my(2, jjj)
    1473       860230 :                fm_ia%local_data(i_local, j_local) = Gamma_2D(iaia - my_ia_start + 1, kkB)
    1474              :             ELSE
    1475        25634 :                send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
    1476        25634 :                iii_vet(send_counter) = iii_vet(send_counter) + 1
    1477        25634 :                iii = iii_vet(send_counter)
    1478        25634 :                buffer_send(send_counter)%msg(iii) = Gamma_2D(iaia - my_ia_start + 1, kkB)
    1479              :             END IF
    1480              :          END DO
    1481        17166 :          req_send = mp_request_null
    1482        15840 :          send_counter = 0
    1483        17276 :          DO proc_shift = 1, para_env_sub%num_pe - 1
    1484         1436 :             proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1485        17276 :             IF (map_send_size(proc_send) > 0) THEN
    1486         1326 :                send_counter = send_counter + 1
    1487              :                CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
    1488         1326 :                                        buffer_send(send_counter)%msg_req)
    1489         1326 :                req_send(send_counter) = buffer_send(send_counter)%msg_req
    1490              :             END IF
    1491              :          END DO
    1492              : 
    1493              :          ! receive the massages and fill the fm_ia
    1494        15840 :          rec_counter = 0
    1495        17276 :          DO proc_shift = 1, para_env_sub%num_pe - 1
    1496         1436 :             proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1497         1436 :             size_rec_buffer = map_rec_size(proc_receive)
    1498        17276 :             IF (map_rec_size(proc_receive) > 0) THEN
    1499         1326 :                rec_counter = rec_counter + 1
    1500              :                ! wait for the message
    1501         1326 :                CALL buffer_rec(rec_counter)%msg_req%wait()
    1502        26960 :                DO iii = 1, size_rec_buffer
    1503        25634 :                   i_local = indeces_rec(rec_counter)%map(1, iii)
    1504        25634 :                   j_local = indeces_rec(rec_counter)%map(2, iii)
    1505        26960 :                   fm_ia%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
    1506              :                END DO
    1507              :             END IF
    1508              :          END DO
    1509              : 
    1510              :          ! wait all
    1511        15840 :          CALL mp_waitall(req_send(:))
    1512              : 
    1513              :          ! now create the DBCSR matrix and copy fm_ia into it
    1514        15840 :          ALLOCATE (dbcsr_Gamma(kkB)%matrix)
    1515              :          CALL cp_dbcsr_m_by_n_from_template(dbcsr_Gamma(kkB)%matrix, &
    1516              :                                             template=mo_coeff_o, &
    1517        15840 :                                             m=homo, n=virtual, sym=dbcsr_type_no_symmetry)
    1518        16198 :          CALL copy_fm_to_dbcsr(fm_ia, dbcsr_Gamma(kkB)%matrix, keep_sparsity=.FALSE.)
    1519              : 
    1520              :       END DO
    1521              : 
    1522              :       ! deallocate stuff
    1523          358 :       DEALLOCATE (Gamma_2D)
    1524          358 :       DEALLOCATE (iii_vet)
    1525          358 :       DEALLOCATE (req_send)
    1526          358 :       IF (map_rec_size(para_env_sub%mepos) > 0) THEN
    1527          358 :          DEALLOCATE (indeces_map_my)
    1528              :       END IF
    1529          373 :       DO rec_counter = 1, number_of_rec
    1530           15 :          DEALLOCATE (indeces_rec(rec_counter)%map)
    1531          373 :          DEALLOCATE (buffer_rec(rec_counter)%msg)
    1532              :       END DO
    1533          373 :       DEALLOCATE (indeces_rec)
    1534          373 :       DEALLOCATE (buffer_rec)
    1535          373 :       DO send_counter = 1, number_of_send
    1536          373 :          DEALLOCATE (buffer_send(send_counter)%msg)
    1537              :       END DO
    1538          373 :       DEALLOCATE (buffer_send)
    1539          358 :       DEALLOCATE (map_send_size)
    1540          358 :       DEALLOCATE (map_rec_size)
    1541          358 :       DEALLOCATE (grid_2_mepos)
    1542          358 :       DEALLOCATE (mepos_2_grid)
    1543              : 
    1544              :       ! release buffer matrix
    1545          358 :       CALL cp_fm_release(fm_ia)
    1546              : 
    1547          358 :       CALL timestop(handle)
    1548              : 
    1549         1432 :    END SUBROUTINE create_dbcsr_gamma
    1550              : 
    1551              : ! **************************************************************************************************
    1552              : !> \brief prepare array for redistribution
    1553              : !> \param para_env ...
    1554              : !> \param para_env_sub ...
    1555              : !> \param ngroup ...
    1556              : !> \param group_grid_2_mepos ...
    1557              : !> \param mepos_2_grid_group ...
    1558              : !> \param pos_info ...
    1559              : ! **************************************************************************************************
    1560          352 :    SUBROUTINE prepare_redistribution(para_env, para_env_sub, ngroup, &
    1561              :                                      group_grid_2_mepos, mepos_2_grid_group, &
    1562              :                                      pos_info)
    1563              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env, para_env_sub
    1564              :       INTEGER, INTENT(IN)                                :: ngroup
    1565              :       INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: group_grid_2_mepos, mepos_2_grid_group
    1566              :       INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), &
    1567              :          OPTIONAL                                        :: pos_info
    1568              : 
    1569              :       INTEGER                                            :: i, pos_group, pos_sub
    1570          352 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: my_pos_info
    1571              : 
    1572         1056 :       ALLOCATE (my_pos_info(0:para_env%num_pe - 1))
    1573          352 :       CALL para_env%allgather(para_env_sub%mepos, my_pos_info)
    1574              : 
    1575         1408 :       ALLOCATE (group_grid_2_mepos(0:para_env_sub%num_pe - 1, 0:ngroup - 1))
    1576         1744 :       group_grid_2_mepos = 0
    1577         1056 :       ALLOCATE (mepos_2_grid_group(2, 0:para_env%num_pe - 1))
    1578         2464 :       mepos_2_grid_group = 0
    1579              : 
    1580         1056 :       DO i = 0, para_env%num_pe - 1
    1581              :          ! calculate position of the group
    1582          704 :          pos_group = i/para_env_sub%num_pe
    1583              :          ! calculate position in the subgroup
    1584          704 :          pos_sub = my_pos_info(i)
    1585              :          ! fill the map from the grid of groups to process
    1586          704 :          group_grid_2_mepos(pos_sub, pos_group) = i
    1587              :          ! and the opposite, from the global pos to the grid pos
    1588          704 :          mepos_2_grid_group(1, i) = pos_sub
    1589         1056 :          mepos_2_grid_group(2, i) = pos_group
    1590              :       END DO
    1591              : 
    1592          352 :       IF (PRESENT(pos_info)) CALL move_alloc(my_pos_info, pos_info)
    1593              : 
    1594          352 :    END SUBROUTINE prepare_redistribution
    1595              : 
    1596            0 : END MODULE mp2_ri_grad_util
        

Generated by: LCOV version 2.0-1