LCOV - code coverage report
Current view: top level - src - mp2_ri_grad_util.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b195825) Lines: 737 756 97.5 %
Date: 2024-04-20 06:29:22 Functions: 7 9 77.8 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Routines for calculating RI-MP2 gradients
      10             : !> \par History
      11             : !>      10.2013 created [Mauro Del Ben]
      12             : ! **************************************************************************************************
      13             : MODULE mp2_ri_grad_util
      14             : !
      15             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_create,&
      16             :                                               cp_blacs_env_release,&
      17             :                                               cp_blacs_env_type
      18             :    USE cp_dbcsr_operations,             ONLY: copy_fm_to_dbcsr,&
      19             :                                               cp_dbcsr_m_by_n_from_template
      20             :    USE cp_fm_basic_linalg,              ONLY: cp_fm_geadd
      21             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      22             :                                               cp_fm_struct_get,&
      23             :                                               cp_fm_struct_release,&
      24             :                                               cp_fm_struct_type
      25             :    USE cp_fm_types,                     ONLY: &
      26             :         cp_fm_create, cp_fm_get_info, cp_fm_indxg2l, cp_fm_indxg2p, cp_fm_indxl2g, cp_fm_release, &
      27             :         cp_fm_set_all, cp_fm_to_fm, cp_fm_type
      28             :    USE dbcsr_api,                       ONLY: dbcsr_p_type,&
      29             :                                               dbcsr_type,&
      30             :                                               dbcsr_type_no_symmetry
      31             :    USE group_dist_types,                ONLY: create_group_dist,&
      32             :                                               get_group_dist,&
      33             :                                               group_dist_d1_type,&
      34             :                                               release_group_dist
      35             :    USE kinds,                           ONLY: dp
      36             :    USE libint_2c_3c,                    ONLY: compare_potential_types
      37             :    USE message_passing,                 ONLY: mp_para_env_type,&
      38             :                                               mp_request_null,&
      39             :                                               mp_request_type,&
      40             :                                               mp_waitall
      41             :    USE mp2_types,                       ONLY: integ_mat_buffer_type,&
      42             :                                               mp2_type
      43             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      44             :    USE util,                            ONLY: get_limit
      45             : #include "./base/base_uses.f90"
      46             : 
      47             :    IMPLICIT NONE
      48             : 
      49             :    PRIVATE
      50             : 
      51             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad_util'
      52             : 
      53             :    PUBLIC :: complete_gamma, array2fm, fm2array, prepare_redistribution, create_dbcsr_gamma
      54             : 
      55             :    TYPE index_map
      56             :       INTEGER, DIMENSION(:, :), ALLOCATABLE :: map
      57             :    END TYPE
      58             : 
      59             : CONTAINS
      60             : 
      61             : ! **************************************************************************************************
      62             : !> \brief complete the calculation of the Gamma matrices
      63             : !> \param mp2_env ...
      64             : !> \param B_ia_Q ...
      65             : !> \param dimen_RI ...
      66             : !> \param homo ...
      67             : !> \param virtual ...
      68             : !> \param para_env ...
      69             : !> \param para_env_sub ...
      70             : !> \param ngroup ...
      71             : !> \param my_group_L_size ...
      72             : !> \param my_group_L_start ...
      73             : !> \param my_group_L_end ...
      74             : !> \param my_B_size ...
      75             : !> \param my_B_virtual_start ...
      76             : !> \param gd_array ...
      77             : !> \param gd_B_virtual ...
      78             : !> \param kspin ...
      79             : !> \author Mauro Del Ben
      80             : ! **************************************************************************************************
      81         296 :    SUBROUTINE complete_gamma(mp2_env, B_ia_Q, dimen_RI, homo, virtual, para_env, para_env_sub, ngroup, &
      82             :                              my_group_L_size, my_group_L_start, my_group_L_end, &
      83             :                              my_B_size, my_B_virtual_start, gd_array, gd_B_virtual, kspin)
      84             : 
      85             :       TYPE(mp2_type)                                     :: mp2_env
      86             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
      87             :          INTENT(INOUT)                                   :: B_ia_Q
      88             :       INTEGER, INTENT(IN)                                :: dimen_RI, homo, virtual
      89             :       TYPE(mp_para_env_type), INTENT(IN), POINTER        :: para_env, para_env_sub
      90             :       INTEGER, INTENT(IN)                                :: ngroup, my_group_L_size, &
      91             :                                                             my_group_L_start, my_group_L_end, &
      92             :                                                             my_B_size, my_B_virtual_start
      93             :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_array, gd_B_virtual
      94             :       INTEGER, INTENT(IN)                                :: kspin
      95             : 
      96             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'complete_gamma'
      97             : 
      98             :       INTEGER :: dimen_ia, handle, i, ispin, kkB, my_ia_end, my_ia_size, my_ia_start, my_P_end, &
      99             :          my_P_size, my_P_start, nspins, pos_group, pos_sub
     100         296 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: pos_info
     101         296 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: group_grid_2_mepos, mepos_2_grid_group
     102         296 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: BIb_C_2D, Gamma_2D, Gamma_PQ
     103             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     104             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_ia, fm_struct_RI
     105             :       TYPE(cp_fm_type) :: fm_Gamma, fm_Gamma_PQ, fm_Gamma_PQ_2, fm_Gamma_PQ_temp, &
     106             :          fm_Gamma_PQ_temp_2, fm_ia_P, fm_Y, operator_half, PQ_half
     107         296 :       TYPE(group_dist_d1_type)                           :: gd_array_new, gd_ia, gd_ia_new, gd_P, &
     108         296 :                                                             gd_P_new
     109             : 
     110         296 :       CALL timeset(routineN, handle)
     111             : 
     112             :       ! reshape the data into a 2D array
     113             :       ! reorder the local data in such a way to help the next stage of matrix creation
     114             :       ! now the data inside the group are divided into a ia x K matrix
     115         296 :       dimen_ia = homo*virtual
     116         296 :       CALL create_group_dist(gd_ia, para_env_sub%num_pe, dimen_ia)
     117         296 :       CALL get_group_dist(gd_ia, para_env_sub%mepos, my_ia_start, my_ia_end, my_ia_size)
     118             : 
     119             :       CALL mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     120             :                         my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     121             : 
     122             :       CALL mat_3d_to_2d_gamma(mp2_env%ri_grad%Gamma_P_ia(kspin)%array, Gamma_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     123         296 :                               my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     124             : 
     125             :       ! create the processor map and size arrays
     126         296 :       CALL create_group_dist(gd_ia_new, para_env%num_pe)
     127         296 :       CALL create_group_dist(gd_array_new, para_env%num_pe)
     128         296 :       CALL create_group_dist(gd_P, para_env_sub%num_pe, dimen_RI)
     129         296 :       CALL create_group_dist(gd_P_new, para_env%num_pe)
     130             : 
     131         296 :       CALL get_group_dist(gd_P, para_env_sub%mepos, my_P_start, my_P_end, my_P_size)
     132             : 
     133             :       CALL prepare_redistribution(para_env, para_env_sub, ngroup, &
     134             :                                   group_grid_2_mepos, mepos_2_grid_group, pos_info=pos_info)
     135             : 
     136         888 :       DO i = 0, para_env%num_pe - 1
     137             :          ! calculate position of the group
     138         592 :          pos_group = i/para_env_sub%num_pe
     139             :          ! calculate position in the subgroup
     140         592 :          pos_sub = pos_info(i)
     141             :          ! 1 -> rows, 2 -> cols
     142         592 :          CALL get_group_dist(gd_ia, pos_sub, gd_ia_new, i)
     143         592 :          CALL get_group_dist(gd_array, pos_group, gd_array_new, i)
     144         888 :          CALL get_group_dist(gd_P, pos_sub, gd_P_new, i)
     145             :       END DO
     146             : 
     147             :       ! create the blacs env
     148         296 :       NULLIFY (blacs_env)
     149         296 :       CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env)
     150             : 
     151         296 :       NULLIFY (fm_struct_ia)
     152             :       CALL cp_fm_struct_create(fm_struct_ia, context=blacs_env, nrow_global=dimen_ia, &
     153         296 :                                ncol_global=dimen_RI, para_env=para_env)
     154             : 
     155             :       ! create the fm matrix Gamma
     156             :       CALL array2fm(Gamma_2D, fm_struct_ia, my_ia_start, my_ia_end, &
     157             :                     my_group_L_start, my_group_L_end, &
     158             :                     gd_ia_new, gd_array_new, &
     159             :                     group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     160         296 :                     fm_Y)
     161             :       ! create the fm matrix B_ia_P
     162             :       CALL array2fm(BIb_C_2D, fm_struct_ia, my_ia_start, my_ia_end, &
     163             :                     my_group_L_start, my_group_L_end, &
     164             :                     gd_ia_new, gd_array_new, &
     165             :                     group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     166         296 :                     fm_ia_P)
     167             : 
     168         296 :       NULLIFY (fm_struct_RI)
     169             :       CALL cp_fm_struct_create(fm_struct_RI, context=blacs_env, nrow_global=dimen_RI, &
     170         296 :                                ncol_global=dimen_RI, para_env=para_env)
     171             : 
     172             :       ! since we will need (P|Q)^(-1/2) in the future, make a copy
     173             :       CALL array2fm(mp2_env%ri_grad%PQ_half, fm_struct_RI, my_P_start, my_P_end, &
     174             :                     my_group_L_start, my_group_L_end, &
     175             :                     gd_P_new, gd_array_new, &
     176             :                     group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     177         296 :                     PQ_half, do_release_mat=.FALSE.)
     178             : 
     179         296 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     180             :          CALL array2fm(mp2_env%ri_grad%operator_half, fm_struct_RI, my_P_start, my_P_end, &
     181             :                        my_group_L_start, my_group_L_end, &
     182             :                        gd_P_new, gd_array_new, &
     183             :                        group_grid_2_mepos, para_env_sub%num_pe, ngroup, &
     184           8 :                        operator_half, do_release_mat=.FALSE.)
     185             :       END IF
     186             : 
     187         296 :       CALL release_group_dist(gd_P_new)
     188         296 :       CALL release_group_dist(gd_ia_new)
     189         296 :       CALL release_group_dist(gd_array_new)
     190             : 
     191             :       ! complete the gamma matrix Gamma_ia^P = sum_Q (Y_ia^Q * (Q|P)^(-1/2))
     192         296 :       IF (compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     193         288 :          CALL cp_fm_create(fm_Gamma, fm_struct_ia, name="fm_Gamma")
     194         288 :          CALL cp_fm_struct_release(fm_struct_ia)
     195             :          ! perform the matrix multiplication
     196             :          CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     197             :                             matrix_a=fm_Y, matrix_b=PQ_half, beta=0.0_dp, &
     198         288 :                             matrix_c=fm_Gamma)
     199             :          ! release the Y matrix
     200         288 :          CALL cp_fm_release(fm_Y)
     201             : 
     202             :          ! complete gamma small (fm_Gamma_PQ)
     203             :          ! create temp matrix
     204         288 :          CALL cp_fm_create(fm_Gamma_PQ_temp, fm_struct_RI, name="fm_Gamma_PQ_temp")
     205             :          CALL parallel_gemm(transa="T", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_ia, alpha=1.0_dp, &
     206             :                             matrix_a=fm_Gamma, matrix_b=fm_ia_P, beta=0.0_dp, &
     207         288 :                             matrix_c=fm_Gamma_PQ_temp)
     208         288 :          CALL cp_fm_release(fm_ia_P)
     209             :          ! create fm_Gamma_PQ matrix
     210         288 :          CALL cp_fm_create(fm_Gamma_PQ, fm_struct_RI, name="fm_Gamma_PQ")
     211         288 :          CALL cp_fm_struct_release(fm_struct_RI)
     212             :          ! perform matrix multiplication
     213             :          CALL parallel_gemm(transa="N", transb="T", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     214             :                             matrix_a=fm_Gamma_PQ_temp, matrix_b=PQ_half, beta=0.0_dp, &
     215         288 :                             matrix_c=fm_Gamma_PQ)
     216         288 :          CALL cp_fm_release(fm_Gamma_PQ_temp)
     217         288 :          CALL cp_fm_release(PQ_half)
     218             : 
     219             :       ELSE
     220             :          ! create temp matrix
     221           8 :          CALL cp_fm_create(fm_Gamma_PQ_temp, fm_struct_RI, name="fm_Gamma_PQ_temp")
     222             :          CALL parallel_gemm(transa="T", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_ia, alpha=1.0_dp, &
     223             :                             matrix_a=fm_Y, matrix_b=fm_ia_P, beta=0.0_dp, &
     224           8 :                             matrix_c=fm_Gamma_PQ_temp)
     225           8 :          CALL cp_fm_release(fm_ia_P)
     226             : 
     227           8 :          CALL cp_fm_create(fm_Gamma, fm_struct_ia, name="fm_Gamma")
     228           8 :          CALL cp_fm_struct_release(fm_struct_ia)
     229             :          ! perform the matrix multiplication
     230             :          CALL parallel_gemm(transa="N", transb="T", m=dimen_ia, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     231             :                             matrix_a=fm_Y, matrix_b=PQ_half, beta=0.0_dp, &
     232           8 :                             matrix_c=fm_Gamma)
     233           8 :          CALL cp_fm_release(fm_Y)
     234             : 
     235           8 :          CALL cp_fm_create(fm_Gamma_PQ_temp_2, fm_struct_RI, name="fm_Gamma_PQ_temp_2")
     236             :          CALL parallel_gemm(transa="N", transb="T", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     237             :                             matrix_a=fm_Gamma_PQ_temp, matrix_b=operator_half, beta=0.0_dp, &
     238           8 :                             matrix_c=fm_Gamma_PQ_temp_2)
     239             : 
     240           8 :          CALL cp_fm_create(fm_Gamma_PQ_2, fm_struct_RI, name="fm_Gamma_PQ_2")
     241             :          CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=1.0_dp, &
     242             :                             matrix_a=PQ_half, matrix_b=fm_Gamma_PQ_temp_2, beta=0.0_dp, &
     243           8 :                             matrix_c=fm_Gamma_PQ_temp)
     244           8 :          CALL cp_fm_to_fm(fm_Gamma_PQ_temp, fm_Gamma_PQ_2)
     245           8 :          CALL cp_fm_geadd(1.0_dp, "T", fm_Gamma_PQ_temp, 1.0_dp, fm_Gamma_PQ_2)
     246           8 :          CALL cp_fm_release(fm_Gamma_PQ_temp)
     247           8 :          CALL cp_fm_release(PQ_half)
     248             : 
     249           8 :          CALL cp_fm_create(fm_Gamma_PQ, fm_struct_RI)
     250           8 :          CALL cp_fm_struct_release(fm_struct_RI)
     251             :          CALL parallel_gemm(transa="N", transb="N", m=dimen_RI, n=dimen_RI, k=dimen_RI, alpha=-1.0_dp, &
     252             :                             matrix_a=operator_half, matrix_b=fm_Gamma_PQ_temp_2, beta=0.0_dp, &
     253           8 :                             matrix_c=fm_Gamma_PQ)
     254           8 :          CALL cp_fm_release(operator_half)
     255           8 :          CALL cp_fm_release(fm_Gamma_PQ_temp_2)
     256             :       END IF
     257             : 
     258             :       ! now redistribute the data, in this case we go back
     259             :       ! to the original way the integrals were distributed
     260             :       CALL fm2array(Gamma_2D, my_ia_size, my_ia_start, my_ia_end, &
     261             :                     my_group_L_size, my_group_L_start, my_group_L_end, &
     262             :                     group_grid_2_mepos, mepos_2_grid_group, &
     263             :                     para_env_sub%num_pe, ngroup, &
     264         296 :                     fm_Gamma)
     265             : 
     266        1184 :       ALLOCATE (Gamma_PQ(my_P_size, my_group_L_size))
     267             :       CALL fm2array(Gamma_PQ, my_P_size, my_P_start, my_P_end, &
     268             :                     my_group_L_size, my_group_L_start, my_group_L_end, &
     269             :                     group_grid_2_mepos, mepos_2_grid_group, &
     270             :                     para_env_sub%num_pe, ngroup, &
     271         296 :                     fm_Gamma_PQ)
     272         296 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ)) THEN
     273         220 :          CALL MOVE_ALLOC(Gamma_PQ, mp2_env%ri_grad%Gamma_PQ)
     274             :       ELSE
     275      303670 :          mp2_env%ri_grad%Gamma_PQ(:, :) = mp2_env%ri_grad%Gamma_PQ + Gamma_PQ
     276          76 :          DEALLOCATE (Gamma_PQ)
     277             :       END IF
     278             : 
     279         296 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     280          32 :          ALLOCATE (Gamma_PQ(my_P_size, my_group_L_size))
     281             :          CALL fm2array(Gamma_PQ, my_P_size, my_P_start, my_P_end, &
     282             :                        my_group_L_size, my_group_L_start, my_group_L_end, &
     283             :                        group_grid_2_mepos, mepos_2_grid_group, &
     284             :                        para_env_sub%num_pe, ngroup, &
     285           8 :                        fm_Gamma_PQ_2)
     286           8 :          IF (.NOT. ALLOCATED(mp2_env%ri_grad%Gamma_PQ_2)) THEN
     287           8 :             CALL MOVE_ALLOC(Gamma_PQ, mp2_env%ri_grad%Gamma_PQ_2)
     288             :          ELSE
     289           0 :             mp2_env%ri_grad%Gamma_PQ_2(:, :) = mp2_env%ri_grad%Gamma_PQ_2 + Gamma_PQ
     290           0 :             DEALLOCATE (Gamma_PQ)
     291             :          END IF
     292             :       END IF
     293             : 
     294             :       ! allocate G_P_ia (DBCSR)
     295         296 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%G_P_ia)) THEN
     296         220 :          nspins = SIZE(mp2_env%ri_grad%mo_coeff_o)
     297       13971 :          ALLOCATE (mp2_env%ri_grad%G_P_ia(my_group_L_size, nspins))
     298         516 :          DO ispin = 1, nspins
     299       13311 :          DO kkB = 1, my_group_L_size
     300       13091 :             NULLIFY (mp2_env%ri_grad%G_P_ia(kkB, ispin)%matrix)
     301             :          END DO
     302             :          END DO
     303             :       END IF
     304             : 
     305             :       ! create the Gamma_ia_P in DBCSR style
     306             :       CALL create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
     307             :                               my_ia_start, my_ia_end, my_group_L_size, gd_ia, &
     308         296 :                               mp2_env%ri_grad%G_P_ia(:, kspin), mp2_env%ri_grad%mo_coeff_o(1)%matrix)
     309             : 
     310         296 :       DEALLOCATE (pos_info)
     311         296 :       DEALLOCATE (group_grid_2_mepos, mepos_2_grid_group)
     312         296 :       CALL release_group_dist(gd_ia)
     313         296 :       CALL release_group_dist(gd_P)
     314             : 
     315             :       ! release blacs_env
     316         296 :       CALL cp_blacs_env_release(blacs_env)
     317             : 
     318         296 :       CALL timestop(handle)
     319             : 
     320        1776 :    END SUBROUTINE complete_gamma
     321             : 
     322             : ! **************************************************************************************************
     323             : !> \brief Redistribute a 3d matrix to a 2d matrix
     324             : !> \param B_ia_Q ...
     325             : !> \param BIb_C_2D ...
     326             : !> \param homo ...
     327             : !> \param my_B_size ...
     328             : !> \param virtual ...
     329             : !> \param my_B_virtual_start ...
     330             : !> \param my_ia_start ...
     331             : !> \param my_ia_end ...
     332             : !> \param my_ia_size ...
     333             : !> \param my_group_L_size ...
     334             : !> \param para_env_sub ...
     335             : !> \param gd_B_virtual ...
     336             : ! **************************************************************************************************
     337         296 :    SUBROUTINE mat_3d_to_2d(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     338             :                            my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     339             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
     340             :          INTENT(INOUT)                                   :: B_ia_Q
     341             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     342             :          INTENT(OUT)                                     :: BIb_C_2D
     343             :       INTEGER, INTENT(IN)                                :: homo, my_B_size, virtual, &
     344             :                                                             my_B_virtual_start, my_ia_start, &
     345             :                                                             my_ia_end, my_ia_size, my_group_L_size
     346             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env_sub
     347             :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_B_virtual
     348             : 
     349             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'mat_3d_to_2d'
     350             : 
     351             :       INTEGER :: handle, ia_global, iiB, jjB, proc_receive, proc_send, proc_shift, rec_B_size, &
     352             :          rec_B_virtual_end, rec_B_virtual_start
     353         296 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BIb_C_rec
     354             : 
     355         296 :       CALL timeset(routineN, handle)
     356             : 
     357        1184 :       ALLOCATE (BIb_C_2D(my_ia_size, my_group_L_size))
     358      705779 :       BIb_C_2D = 0.0_dp
     359             : 
     360        1360 :       DO iiB = 1, homo
     361       16940 :          DO jjB = 1, my_B_size
     362       15580 :             ia_global = (iiB - 1)*virtual + my_B_virtual_start + jjB - 1
     363       16644 :             IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     364      690895 :                BIb_C_2D(ia_global - my_ia_start + 1, :) = B_ia_Q(iiB, jjB, 1:my_group_L_size)
     365             :             END IF
     366             :          END DO
     367             :       END DO
     368             : 
     369         304 :       DO proc_shift = 1, para_env_sub%num_pe - 1
     370           8 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     371           8 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     372             : 
     373           8 :          CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
     374             : 
     375          40 :          ALLOCATE (BIb_C_rec(homo, rec_B_size, my_group_L_size))
     376       47130 :          BIb_C_rec = 0.0_dp
     377             : 
     378             :          CALL para_env_sub%sendrecv(B_ia_Q, proc_send, &
     379           8 :                                     BIb_C_rec, proc_receive)
     380             : 
     381          48 :          DO iiB = 1, homo
     382         438 :             DO jjB = 1, rec_B_size
     383         390 :                ia_global = (iiB - 1)*virtual + rec_B_virtual_start + jjB - 1
     384         430 :                IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     385       17373 :                   BIb_C_2D(ia_global - my_ia_start + 1, :) = BIb_C_rec(iiB, jjB, 1:my_group_L_size)
     386             :                END IF
     387             :             END DO
     388             :          END DO
     389             : 
     390         312 :          DEALLOCATE (BIb_C_rec)
     391             :       END DO
     392         296 :       DEALLOCATE (B_ia_Q)
     393             : 
     394         296 :       CALL timestop(handle)
     395         592 :    END SUBROUTINE mat_3d_to_2d
     396             : 
     397             : ! **************************************************************************************************
     398             : !> \brief Redistribute a 3d matrix to a 2d matrix, specialized for Gamma_P_ia to account for a different order of indices
     399             : !> \param B_ia_Q ...
     400             : !> \param BIb_C_2D ...
     401             : !> \param homo ...
     402             : !> \param my_B_size ...
     403             : !> \param virtual ...
     404             : !> \param my_B_virtual_start ...
     405             : !> \param my_ia_start ...
     406             : !> \param my_ia_end ...
     407             : !> \param my_ia_size ...
     408             : !> \param my_group_L_size ...
     409             : !> \param para_env_sub ...
     410             : !> \param gd_B_virtual ...
     411             : ! **************************************************************************************************
     412         296 :    SUBROUTINE mat_3d_to_2d_gamma(B_ia_Q, BIb_C_2D, homo, my_B_size, virtual, my_B_virtual_start, &
     413             :                                  my_ia_start, my_ia_end, my_ia_size, my_group_L_size, para_env_sub, gd_B_virtual)
     414             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
     415             :          INTENT(INOUT)                                   :: B_ia_Q
     416             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     417             :          INTENT(OUT)                                     :: BIb_C_2D
     418             :       INTEGER, INTENT(IN)                                :: homo, my_B_size, virtual, &
     419             :                                                             my_B_virtual_start, my_ia_start, &
     420             :                                                             my_ia_end, my_ia_size, my_group_L_size
     421             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env_sub
     422             :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_B_virtual
     423             : 
     424             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'mat_3d_to_2d_gamma'
     425             : 
     426             :       INTEGER :: handle, ia_global, iiB, jjB, proc_receive, proc_send, proc_shift, rec_B_size, &
     427             :          rec_B_virtual_end, rec_B_virtual_start
     428         296 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BIb_C_rec
     429             : 
     430         296 :       CALL timeset(routineN, handle)
     431             : 
     432        1184 :       ALLOCATE (BIb_C_2D(my_ia_size, my_group_L_size))
     433      705779 :       BIb_C_2D = 0.0_dp
     434             : 
     435        1360 :       DO iiB = 1, homo
     436       16940 :          DO jjB = 1, my_B_size
     437       15580 :             ia_global = (iiB - 1)*virtual + my_B_virtual_start + jjB - 1
     438       16644 :             IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     439      690895 :                BIb_C_2D(ia_global - my_ia_start + 1, :) = B_ia_Q(jjB, iiB, 1:my_group_L_size)
     440             :             END IF
     441             :          END DO
     442             :       END DO
     443             : 
     444         304 :       DO proc_shift = 1, para_env_sub%num_pe - 1
     445           8 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     446           8 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     447             : 
     448           8 :          CALL get_group_dist(gd_B_virtual, proc_receive, rec_B_virtual_start, rec_B_virtual_end, rec_B_size)
     449             : 
     450          40 :          ALLOCATE (BIb_C_rec(rec_B_size, homo, my_group_L_size))
     451       43544 :          BIb_C_rec = 0.0_dp
     452             : 
     453             :          CALL para_env_sub%sendrecv(B_ia_Q, proc_send, &
     454           8 :                                     BIb_C_rec, proc_receive)
     455             : 
     456          48 :          DO iiB = 1, homo
     457         438 :             DO jjB = 1, rec_B_size
     458         390 :                ia_global = (iiB - 1)*virtual + rec_B_virtual_start + jjB - 1
     459         430 :                IF (ia_global >= my_ia_start .AND. ia_global <= my_ia_end) THEN
     460       17373 :                   BIb_C_2D(ia_global - my_ia_start + 1, :) = BIb_C_rec(jjB, iiB, 1:my_group_L_size)
     461             :                END IF
     462             :             END DO
     463             :          END DO
     464             : 
     465         312 :          DEALLOCATE (BIb_C_rec)
     466             :       END DO
     467         296 :       DEALLOCATE (B_ia_Q)
     468             : 
     469         296 :       CALL timestop(handle)
     470         592 :    END SUBROUTINE mat_3d_to_2d_gamma
     471             : 
     472             : ! **************************************************************************************************
     473             : !> \brief redistribute local part of array to fm
     474             : !> \param mat2D ...
     475             : !> \param fm_struct ...
     476             : !> \param my_start_row ...
     477             : !> \param my_end_row ...
     478             : !> \param my_start_col ...
     479             : !> \param my_end_col ...
     480             : !> \param gd_row ...
     481             : !> \param gd_col ...
     482             : !> \param group_grid_2_mepos ...
     483             : !> \param ngroup_row ...
     484             : !> \param ngroup_col ...
     485             : !> \param fm_mat ...
     486             : !> \param integ_group_size ...
     487             : !> \param color_group ...
     488             : !> \param do_release_mat whether to release the array (default: yes)
     489             : ! **************************************************************************************************
     490        1112 :    SUBROUTINE array2fm(mat2D, fm_struct, my_start_row, my_end_row, &
     491             :                        my_start_col, my_end_col, &
     492             :                        gd_row, gd_col, &
     493             :                        group_grid_2_mepos, ngroup_row, ngroup_col, &
     494             :                        fm_mat, integ_group_size, color_group, do_release_mat)
     495             : 
     496             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     497             :          INTENT(INOUT)                                   :: mat2D
     498             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
     499             :       INTEGER, INTENT(IN)                                :: my_start_row, my_end_row, my_start_col, &
     500             :                                                             my_end_col
     501             :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_row, gd_col
     502             :       INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN)  :: group_grid_2_mepos
     503             :       INTEGER, INTENT(IN)                                :: ngroup_row, ngroup_col
     504             :       TYPE(cp_fm_type), INTENT(OUT)                      :: fm_mat
     505             :       INTEGER, INTENT(IN), OPTIONAL                      :: integ_group_size, color_group
     506             :       LOGICAL, INTENT(IN), OPTIONAL                      :: do_release_mat
     507             : 
     508             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'array2fm'
     509             : 
     510             :       INTEGER :: dummy_proc, end_col_block, end_row_block, handle, handle2, i_global, i_local, &
     511             :          i_sub, iiB, iii, itmp(2), j_global, j_local, j_sub, jjB, my_num_col_blocks, &
     512             :          my_num_row_blocks, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, &
     513             :          nrow_local, num_cols, num_rec_cols, num_rows, number_of_rec, number_of_send, &
     514             :          proc_receive, proc_send, proc_shift, rec_col_end, rec_col_size, rec_col_start, &
     515             :          rec_counter, rec_pcol, rec_prow, rec_row_end, rec_row_size, rec_row_start, ref_send_pcol, &
     516             :          ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
     517             :       INTEGER :: start_col_block, start_row_block
     518        1112 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, index_col_rec, map_rec_size, &
     519        1112 :                                                             map_send_size
     520        1112 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: blocks_ranges_col, blocks_ranges_row, &
     521        1112 :                                                             grid_2_mepos, grid_ref_2_send_pos, &
     522        1112 :                                                             mepos_2_grid
     523        1112 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     524             :       LOGICAL                                            :: convert_pos, my_do_release_mat
     525             :       REAL(KIND=dp)                                      :: part_col, part_row
     526             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     527        1112 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     528             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     529        1112 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
     530             : 
     531        1112 :       CALL timeset(routineN, handle)
     532             : 
     533        1112 :       my_do_release_mat = .TRUE.
     534        1112 :       IF (PRESENT(do_release_mat)) my_do_release_mat = do_release_mat
     535             : 
     536        1112 :       CALL cp_fm_struct_get(fm_struct, para_env=para_env, nrow_global=num_rows, ncol_global=num_cols)
     537             : 
     538             :       ! create the full matrix, (num_rows x num_cols)
     539        1112 :       CALL cp_fm_create(fm_mat, fm_struct, name="fm_mat")
     540        1112 :       CALL cp_fm_set_all(matrix=fm_mat, alpha=0.0_dp)
     541             : 
     542             :       ! start filling procedure
     543             :       ! fill the matrix
     544             :       CALL cp_fm_get_info(matrix=fm_mat, &
     545             :                           nrow_local=nrow_local, &
     546             :                           ncol_local=ncol_local, &
     547             :                           row_indices=row_indices, &
     548             :                           col_indices=col_indices, &
     549             :                           nrow_block=nrow_block, &
     550        1112 :                           ncol_block=ncol_block)
     551        1112 :       myprow = fm_mat%matrix_struct%context%mepos(1)
     552        1112 :       mypcol = fm_mat%matrix_struct%context%mepos(2)
     553        1112 :       nprow = fm_mat%matrix_struct%context%num_pe(1)
     554        1112 :       npcol = fm_mat%matrix_struct%context%num_pe(2)
     555             : 
     556             :       ! start the communication part
     557             :       ! 0) create array containing the processes position
     558             :       !    and supporting infos
     559        1112 :       CALL timeset(routineN//"_info", handle2)
     560        4448 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
     561        4330 :       grid_2_mepos = 0
     562        3336 :       ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
     563        1112 :       grid_2_mepos(myprow, mypcol) = para_env%mepos
     564             :       ! sum infos
     565        1112 :       CALL para_env%sum(grid_2_mepos)
     566        3336 :       CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
     567             : 
     568             :       ! 1) loop over my local data and define a map for the proc to send data
     569        3336 :       ALLOCATE (map_send_size(0:para_env%num_pe - 1))
     570        3218 :       map_send_size = 0
     571        1112 :       dummy_proc = 0
     572       63345 :       DO jjB = my_start_col, my_end_col
     573             :          send_pcol = cp_fm_indxg2p(jjB, ncol_block, dummy_proc, &
     574       62233 :                                    fm_mat%matrix_struct%first_p_pos(2), npcol)
     575     3601117 :          DO iiB = my_start_row, my_end_row
     576             :             send_prow = cp_fm_indxg2p(iiB, nrow_block, dummy_proc, &
     577     3537772 :                                       fm_mat%matrix_struct%first_p_pos(1), nprow)
     578     3537772 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     579     3600005 :             map_send_size(proc_send) = map_send_size(proc_send) + 1
     580             :          END DO
     581             :       END DO
     582             : 
     583             :       ! 2) loop over my local data of fm_mat and define a map for the proc from which rec data
     584        3336 :       ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
     585        3218 :       map_rec_size = 0
     586        1112 :       part_row = REAL(num_rows, KIND=dp)/REAL(ngroup_row, KIND=dp)
     587        1112 :       part_col = REAL(num_cols, KIND=dp)/REAL(ngroup_col, KIND=dp)
     588             : 
     589             :       ! In some cases we have to convert global positions to positions in a subgroup (RPA)
     590        1112 :       convert_pos = .FALSE.
     591        1112 :       IF (PRESENT(integ_group_size) .AND. PRESENT(color_group)) convert_pos = .TRUE.
     592             : 
     593       43277 :       DO jjB = 1, nrow_local
     594       42165 :          j_global = row_indices(jjB)
     595             :          ! check the group holding this element
     596             :          ! dirty way, if someone has a better idea ...
     597       42165 :          rec_prow = INT(REAL(j_global - 1, KIND=dp)/part_row)
     598       42165 :          rec_prow = MAX(0, rec_prow)
     599       42165 :          rec_prow = MIN(rec_prow, ngroup_row - 1)
     600             :          DO
     601       42165 :             itmp = get_limit(num_rows, ngroup_row, rec_prow)
     602       42165 :             IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
     603           0 :             IF (j_global < itmp(1)) rec_prow = rec_prow - 1
     604           0 :             IF (j_global > itmp(2)) rec_prow = rec_prow + 1
     605             :          END DO
     606             : 
     607       42165 :          IF (convert_pos) THEN
     608             :             ! if the group is not in the same RPA group cycle
     609       11572 :             IF ((rec_prow/integ_group_size) .NE. color_group) CYCLE
     610             :             ! convert global position to position into the RPA group
     611        6918 :             rec_prow = MOD(rec_prow, integ_group_size)
     612             :          END IF
     613             : 
     614     3576395 :          DO iiB = 1, ncol_local
     615     3537772 :             i_global = col_indices(iiB)
     616             :             ! check the process in the group holding this element
     617     3537772 :             rec_pcol = INT(REAL(i_global - 1, KIND=dp)/part_col)
     618     3537772 :             rec_pcol = MAX(0, rec_pcol)
     619     3537772 :             rec_pcol = MIN(rec_pcol, ngroup_col - 1)
     620             :             DO
     621     3537772 :                itmp = get_limit(num_cols, ngroup_col, rec_pcol)
     622     3537772 :                IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
     623           0 :                IF (i_global < itmp(1)) rec_pcol = rec_pcol - 1
     624           0 :                IF (i_global > itmp(2)) rec_pcol = rec_pcol + 1
     625             :             END DO
     626             : 
     627     3537772 :             proc_receive = group_grid_2_mepos(rec_prow, rec_pcol)
     628             : 
     629     3579937 :             map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
     630             : 
     631             :          END DO ! i_global
     632             :       END DO ! j_global
     633             : 
     634             :       ! 3) check if the local data has to be stored in the new fm_mat
     635        1112 :       IF (map_rec_size(para_env%mepos) > 0) THEN
     636      101674 :          DO jjB = 1, ncol_local
     637      100564 :             j_global = col_indices(jjB)
     638      101674 :             IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
     639     2893662 :                DO iiB = 1, nrow_local
     640     2831429 :                   i_global = row_indices(iiB)
     641     2893662 :                   IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
     642     2218008 :                      fm_mat%local_data(iiB, jjB) = mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1)
     643             :                   END IF
     644             :                END DO
     645             :             END IF
     646             :          END DO
     647             :       END IF
     648        1112 :       CALL timestop(handle2)
     649             : 
     650             :       ! 4) reorder data in the send_buffer
     651        1112 :       CALL timeset(routineN//"_buffer_send", handle2)
     652             :       ! count the number of messages to send
     653        1112 :       number_of_send = 0
     654        2106 :       DO proc_shift = 1, para_env%num_pe - 1
     655         994 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
     656        2106 :          IF (map_send_size(proc_send) > 0) THEN
     657         952 :             number_of_send = number_of_send + 1
     658             :          END IF
     659             :       END DO
     660             :       ! allocate the structure that will hold the messages to be sent
     661        4128 :       ALLOCATE (buffer_send(number_of_send))
     662             :       ! grid_ref_2_send_pos is an array (map) that given a pair
     663             :       ! (ref_send_prow,ref_send_pcol) returns
     664             :       ! the position in the buffer_send associated to that process
     665        4448 :       ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
     666        4330 :       grid_ref_2_send_pos = 0
     667             :       ! finalize the allocation of buffer_send with the actual size
     668             :       ! of each message (actual size is size_send_buffer)
     669        1112 :       send_counter = 0
     670        2106 :       DO proc_shift = 1, para_env%num_pe - 1
     671         994 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
     672         994 :          size_send_buffer = map_send_size(proc_send)
     673        2106 :          IF (map_send_size(proc_send) > 0) THEN
     674         952 :             send_counter = send_counter + 1
     675             :             ! allocate the sending buffer (msg)
     676        2856 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
     677     1320716 :             buffer_send(send_counter)%msg = 0.0_dp
     678         952 :             buffer_send(send_counter)%proc = proc_send
     679             :             ! get the pointer to prow, pcol of the process that has
     680             :             ! to receive this message
     681         952 :             ref_send_prow = mepos_2_grid(1, proc_send)
     682         952 :             ref_send_pcol = mepos_2_grid(2, proc_send)
     683             :             ! save the rank of the process that has to receive this message
     684         952 :             grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
     685             :          END IF
     686             :       END DO
     687             : 
     688             :       ! loop over the locally held data and fill the buffer_send
     689             :       ! for doing that we need an array that keep track if the
     690             :       ! sequential increase of the index for each message
     691        3176 :       ALLOCATE (iii_vet(number_of_send))
     692        2064 :       iii_vet = 0
     693       68133 :       DO iiB = my_start_row, my_end_row
     694             :          send_prow = cp_fm_indxg2p(iiB, nrow_block, dummy_proc, &
     695       67021 :                                    fm_mat%matrix_struct%first_p_pos(1), nprow)
     696     3605905 :          DO jjB = my_start_col, my_end_col
     697             :             send_pcol = cp_fm_indxg2p(jjB, ncol_block, dummy_proc, &
     698     3537772 :                                       fm_mat%matrix_struct%first_p_pos(2), npcol)
     699             :             ! we don't need to send to ourselves
     700     3537772 :             IF (grid_2_mepos(send_prow, send_pcol) == para_env%mepos) CYCLE
     701             : 
     702     1319764 :             send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
     703     1319764 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
     704     1319764 :             iii = iii_vet(send_counter)
     705     3604793 :             buffer_send(send_counter)%msg(iii) = mat2D(iiB - my_start_row + 1, jjB - my_start_col + 1)
     706             :          END DO
     707             :       END DO
     708             : 
     709        1112 :       DEALLOCATE (iii_vet)
     710        1112 :       DEALLOCATE (grid_ref_2_send_pos)
     711        1112 :       IF (my_do_release_mat) DEALLOCATE (mat2D)
     712        1112 :       CALL timestop(handle2)
     713             : 
     714             :       ! 5) similarly to what done for the buffer_send
     715             :       !    create the buffer for receive, post the message with irecv
     716             :       !    and send the messages non-blocking
     717        1112 :       CALL timeset(routineN//"_isendrecv", handle2)
     718             :       ! count the number of messages to be received
     719        1112 :       number_of_rec = 0
     720        2106 :       DO proc_shift = 1, para_env%num_pe - 1
     721         994 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
     722        2106 :          IF (map_rec_size(proc_receive) > 0) THEN
     723         952 :             number_of_rec = number_of_rec + 1
     724             :          END IF
     725             :       END DO
     726             : 
     727        4128 :       ALLOCATE (buffer_rec(number_of_rec))
     728             : 
     729        1112 :       rec_counter = 0
     730        2106 :       DO proc_shift = 1, para_env%num_pe - 1
     731         994 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
     732         994 :          size_rec_buffer = map_rec_size(proc_receive)
     733        2106 :          IF (map_rec_size(proc_receive) > 0) THEN
     734         952 :             rec_counter = rec_counter + 1
     735             :             ! prepare the buffer for receive
     736        2856 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
     737     1320716 :             buffer_rec(rec_counter)%msg = 0.0_dp
     738         952 :             buffer_rec(rec_counter)%proc = proc_receive
     739             :             ! post the message to be received
     740             :             CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
     741         952 :                                 buffer_rec(rec_counter)%msg_req)
     742             :          END IF
     743             :       END DO
     744             : 
     745             :       ! send messages
     746        5240 :       ALLOCATE (req_send(number_of_send))
     747        1112 :       send_counter = 0
     748        2106 :       DO proc_shift = 1, para_env%num_pe - 1
     749         994 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
     750        2106 :          IF (map_send_size(proc_send) > 0) THEN
     751         952 :             send_counter = send_counter + 1
     752             :             CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
     753         952 :                                 buffer_send(send_counter)%msg_req)
     754         952 :             req_send(send_counter) = buffer_send(send_counter)%msg_req
     755             :          END IF
     756             :       END DO
     757        1112 :       CALL timestop(handle2)
     758             : 
     759             :       ! 6) fill the fm_mat matrix with the messages received from the
     760             :       !    other processes
     761        1112 :       CALL timeset(routineN//"_fill", handle2)
     762             :       ! In order to perform this step efficiently first we have to know the
     763             :       ! ranges of the blocks over which a given process hold its local data.
     764             :       ! Start with the rows ...
     765        1112 :       my_num_row_blocks = 1
     766       42165 :       DO iiB = 1, nrow_local - 1
     767       41053 :          IF (ABS(row_indices(iiB + 1) - row_indices(iiB)) == 1) CYCLE
     768       42165 :          my_num_row_blocks = my_num_row_blocks + 1
     769             :       END DO
     770        3336 :       ALLOCATE (blocks_ranges_row(2, my_num_row_blocks))
     771        4646 :       blocks_ranges_row = 0
     772        1112 :       blocks_ranges_row(1, 1) = row_indices(1)
     773        1112 :       iii = 1
     774       42165 :       DO iiB = 1, nrow_local - 1
     775       41053 :          IF (ABS(row_indices(iiB + 1) - row_indices(iiB)) == 1) CYCLE
     776          66 :          iii = iii + 1
     777          66 :          blocks_ranges_row(2, iii - 1) = row_indices(iiB)
     778       42165 :          blocks_ranges_row(1, iii) = row_indices(iiB + 1)
     779             :       END DO
     780        1112 :       blocks_ranges_row(2, my_num_row_blocks) = row_indices(MAX(nrow_local, 1))
     781             :       ! ... and columns
     782        1112 :       my_num_col_blocks = 1
     783      100566 :       DO jjB = 1, ncol_local - 1
     784       99454 :          IF (ABS(col_indices(jjB + 1) - col_indices(jjB)) == 1) CYCLE
     785      100566 :          my_num_col_blocks = my_num_col_blocks + 1
     786             :       END DO
     787        3336 :       ALLOCATE (blocks_ranges_col(2, my_num_col_blocks))
     788        4448 :       blocks_ranges_col = 0
     789        1112 :       blocks_ranges_col(1, 1) = col_indices(1)
     790        1112 :       iii = 1
     791      100566 :       DO jjB = 1, ncol_local - 1
     792       99454 :          IF (ABS(col_indices(jjB + 1) - col_indices(jjB)) == 1) CYCLE
     793           0 :          iii = iii + 1
     794           0 :          blocks_ranges_col(2, iii - 1) = col_indices(jjB)
     795      100566 :          blocks_ranges_col(1, iii) = col_indices(jjB + 1)
     796             :       END DO
     797        1112 :       blocks_ranges_col(2, my_num_col_blocks) = col_indices(MAX(ncol_local, 1))
     798             : 
     799        1112 :       rec_counter = 0
     800        2106 :       DO proc_shift = 1, para_env%num_pe - 1
     801         994 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
     802         994 :          size_rec_buffer = map_rec_size(proc_receive)
     803             : 
     804        2106 :          IF (map_rec_size(proc_receive) > 0) THEN
     805         952 :             rec_counter = rec_counter + 1
     806             : 
     807         952 :             CALL get_group_dist(gd_col, proc_receive, rec_col_start, rec_col_end, rec_col_size)
     808             : 
     809             :             ! precompute the number of received columns and relative index
     810         952 :             num_rec_cols = 0
     811        1904 :             DO jjB = 1, my_num_col_blocks
     812         952 :                start_col_block = MAX(blocks_ranges_col(1, jjB), rec_col_start)
     813         952 :                end_col_block = MIN(blocks_ranges_col(2, jjB), rec_col_end)
     814       43175 :                DO j_sub = start_col_block, end_col_block
     815       42223 :                   num_rec_cols = num_rec_cols + 1
     816             :                END DO
     817             :             END DO
     818        2856 :             ALLOCATE (index_col_rec(num_rec_cols))
     819       42223 :             index_col_rec = 0
     820         952 :             iii = 0
     821        1904 :             DO jjB = 1, my_num_col_blocks
     822         952 :                start_col_block = MAX(blocks_ranges_col(1, jjB), rec_col_start)
     823         952 :                end_col_block = MIN(blocks_ranges_col(2, jjB), rec_col_end)
     824       43175 :                DO j_sub = start_col_block, end_col_block
     825       41271 :                   iii = iii + 1
     826             :                   j_local = cp_fm_indxg2l(j_sub, ncol_block, dummy_proc, &
     827       41271 :                                           fm_mat%matrix_struct%first_p_pos(2), npcol)
     828       42223 :                   index_col_rec(iii) = j_local
     829             :                END DO
     830             :             END DO
     831             : 
     832         952 :             CALL get_group_dist(gd_row, proc_receive, rec_row_start, rec_row_end, rec_row_size)
     833             : 
     834             :             ! wait for the message
     835         952 :             CALL buffer_rec(rec_counter)%msg_req%wait()
     836             : 
     837             :             ! fill the local data in fm_mat
     838         952 :             iii = 0
     839        1970 :             DO iiB = 1, my_num_row_blocks
     840        1018 :                start_row_block = MAX(blocks_ranges_row(1, iiB), rec_row_start)
     841        1018 :                end_row_block = MIN(blocks_ranges_row(2, iiB), rec_row_end)
     842       32123 :                DO i_sub = start_row_block, end_row_block
     843             :                   i_local = cp_fm_indxg2l(i_sub, nrow_block, dummy_proc, &
     844       30153 :                                           fm_mat%matrix_struct%first_p_pos(1), nprow)
     845     1350935 :                   DO jjB = 1, num_rec_cols
     846     1319764 :                      iii = iii + 1
     847     1319764 :                      j_local = index_col_rec(jjB)
     848     1349917 :                      fm_mat%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
     849             :                   END DO
     850             :                END DO
     851             :             END DO
     852             : 
     853         952 :             DEALLOCATE (buffer_rec(rec_counter)%msg)
     854        2856 :             DEALLOCATE (index_col_rec)
     855             :          END IF
     856             :       END DO
     857        2064 :       DEALLOCATE (buffer_rec)
     858             : 
     859        1112 :       DEALLOCATE (blocks_ranges_row)
     860        1112 :       DEALLOCATE (blocks_ranges_col)
     861             : 
     862        1112 :       CALL timestop(handle2)
     863             : 
     864             :       ! 7) Finally wait for all messeges to be sent
     865        1112 :       CALL timeset(routineN//"_waitall", handle2)
     866        1112 :       CALL mp_waitall(req_send(:))
     867        2064 :       DO send_counter = 1, number_of_send
     868        2064 :          DEALLOCATE (buffer_send(send_counter)%msg)
     869             :       END DO
     870        2064 :       DEALLOCATE (buffer_send)
     871        1112 :       CALL timestop(handle2)
     872             : 
     873        1112 :       DEALLOCATE (map_send_size)
     874        1112 :       DEALLOCATE (map_rec_size)
     875        1112 :       DEALLOCATE (grid_2_mepos)
     876        1112 :       DEALLOCATE (mepos_2_grid)
     877             : 
     878        1112 :       CALL timestop(handle)
     879             : 
     880        8896 :    END SUBROUTINE array2fm
     881             : 
     882             : ! **************************************************************************************************
     883             : !> \brief redistribute fm to local part of array
     884             : !> \param mat2D ...
     885             : !> \param my_rows ...
     886             : !> \param my_start_row ...
     887             : !> \param my_end_row ...
     888             : !> \param my_cols ...
     889             : !> \param my_start_col ...
     890             : !> \param my_end_col ...
     891             : !> \param group_grid_2_mepos ...
     892             : !> \param mepos_2_grid_group ...
     893             : !> \param ngroup_row ...
     894             : !> \param ngroup_col ...
     895             : !> \param fm_mat ...
     896             : ! **************************************************************************************************
     897         706 :    SUBROUTINE fm2array(mat2D, my_rows, my_start_row, my_end_row, &
     898             :                        my_cols, my_start_col, my_end_col, &
     899             :                        group_grid_2_mepos, mepos_2_grid_group, &
     900             :                        ngroup_row, ngroup_col, &
     901             :                        fm_mat)
     902             : 
     903             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :), &
     904             :          INTENT(OUT)                                     :: mat2D
     905             :       INTEGER, INTENT(IN)                                :: my_rows, my_start_row, my_end_row, &
     906             :                                                             my_cols, my_start_col, my_end_col
     907             :       INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(IN)  :: group_grid_2_mepos, mepos_2_grid_group
     908             :       INTEGER, INTENT(IN)                                :: ngroup_row, ngroup_col
     909             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_mat
     910             : 
     911             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'fm2array'
     912             : 
     913             :       INTEGER :: dummy_proc, handle, handle2, i_global, iiB, iii, itmp(2), j_global, jjB, mypcol, &
     914             :          myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, nrow_local, num_cols, &
     915             :          num_rec_rows, num_rows, number_of_rec, number_of_send, proc_receive, proc_send, &
     916             :          proc_shift, rec_col_size, rec_counter, rec_pcol, rec_prow, rec_row_size, ref_send_pcol, &
     917             :          ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, size_send_buffer
     918         706 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, index_row_rec, map_rec_size, &
     919         706 :                                                             map_send_size
     920         706 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: grid_2_mepos, grid_ref_2_send_pos, &
     921         706 :                                                             mepos_2_grid, sizes
     922         706 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
     923             :       REAL(KIND=dp)                                      :: part_col, part_row
     924             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     925         706 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     926             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     927         706 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
     928             : 
     929         706 :       CALL timeset(routineN, handle)
     930             : 
     931             :       ! allocate the array
     932        2824 :       ALLOCATE (mat2D(my_rows, my_cols))
     933     2183909 :       mat2D = 0.0_dp
     934             : 
     935             :       ! start procedure
     936             :       ! get information of from the full matrix
     937             :       CALL cp_fm_get_info(matrix=fm_mat, &
     938             :                           nrow_local=nrow_local, &
     939             :                           ncol_local=ncol_local, &
     940             :                           row_indices=row_indices, &
     941             :                           col_indices=col_indices, &
     942             :                           nrow_block=nrow_block, &
     943             :                           ncol_block=ncol_block, &
     944             :                           nrow_global=num_rows, &
     945             :                           ncol_global=num_cols, &
     946         706 :                           para_env=para_env)
     947         706 :       myprow = fm_mat%matrix_struct%context%mepos(1)
     948         706 :       mypcol = fm_mat%matrix_struct%context%mepos(2)
     949         706 :       nprow = fm_mat%matrix_struct%context%num_pe(1)
     950         706 :       npcol = fm_mat%matrix_struct%context%num_pe(2)
     951             : 
     952             :       ! start the communication part
     953             :       ! 0) create array containing the processes position
     954             :       !    and supporting infos
     955         706 :       CALL timeset(routineN//"_info", handle2)
     956        2824 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
     957        2824 :       grid_2_mepos = 0
     958        2118 :       ALLOCATE (mepos_2_grid(2, 0:para_env%num_pe - 1))
     959             : 
     960             :       ! fill the info array
     961         706 :       grid_2_mepos(myprow, mypcol) = para_env%mepos
     962             : 
     963             :       ! sum infos
     964         706 :       CALL para_env%sum(grid_2_mepos)
     965        2118 :       CALL para_env%allgather([myprow, mypcol], mepos_2_grid)
     966             : 
     967             :       ! 1) loop over my local data and define a map for the proc to send data
     968        2118 :       ALLOCATE (map_send_size(0:para_env%num_pe - 1))
     969        2118 :       map_send_size = 0
     970         706 :       part_row = REAL(num_rows, KIND=dp)/REAL(ngroup_row, KIND=dp)
     971         706 :       part_col = REAL(num_cols, KIND=dp)/REAL(ngroup_col, KIND=dp)
     972             : 
     973       59890 :       DO jjB = 1, ncol_local
     974       59184 :          j_global = col_indices(jjB)
     975             :          ! check the group holding this element
     976             :          ! dirty way, if someone has a better idea ...
     977       59184 :          send_pcol = INT(REAL(j_global - 1, KIND=dp)/part_col)
     978       59184 :          send_pcol = MAX(0, send_pcol)
     979       59184 :          send_pcol = MIN(send_pcol, ngroup_col - 1)
     980             :          DO
     981       59184 :             itmp = get_limit(num_cols, ngroup_col, send_pcol)
     982       59184 :             IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
     983           0 :             IF (j_global < itmp(1)) send_pcol = send_pcol - 1
     984           0 :             IF (j_global > itmp(2)) send_pcol = send_pcol + 1
     985             :          END DO
     986             : 
     987     2212065 :          DO iiB = 1, nrow_local
     988     2152175 :             i_global = row_indices(iiB)
     989             :             ! check the process in the group holding this element
     990     2152175 :             send_prow = INT(REAL(i_global - 1, KIND=dp)/part_row)
     991     2152175 :             send_prow = MAX(0, send_prow)
     992     2152175 :             send_prow = MIN(send_prow, ngroup_row - 1)
     993             :             DO
     994     2152175 :                itmp = get_limit(num_rows, ngroup_row, send_prow)
     995     2152175 :                IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
     996           0 :                IF (i_global < itmp(1)) send_prow = send_prow - 1
     997           0 :                IF (i_global > itmp(2)) send_prow = send_prow + 1
     998             :             END DO
     999             : 
    1000     2152175 :             proc_send = group_grid_2_mepos(send_prow, send_pcol)
    1001             : 
    1002     2211359 :             map_send_size(proc_send) = map_send_size(proc_send) + 1
    1003             : 
    1004             :          END DO ! i_global
    1005             :       END DO ! j_global
    1006             : 
    1007             :       ! 2) loop over my local data of the array and define a map for the proc from which rec data
    1008        2118 :       ALLOCATE (map_rec_size(0:para_env%num_pe - 1))
    1009        2118 :       map_rec_size = 0
    1010         706 :       dummy_proc = 0
    1011       31734 :       DO jjB = my_start_col, my_end_col
    1012             :          rec_pcol = cp_fm_indxg2p(jjB, ncol_block, dummy_proc, &
    1013       31028 :                                   fm_mat%matrix_struct%first_p_pos(2), npcol)
    1014     2183909 :          DO iiB = my_start_row, my_end_row
    1015             :             rec_prow = cp_fm_indxg2p(iiB, nrow_block, dummy_proc, &
    1016     2152175 :                                      fm_mat%matrix_struct%first_p_pos(1), nprow)
    1017     2152175 :             proc_receive = grid_2_mepos(rec_prow, rec_pcol)
    1018     2183203 :             map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
    1019             :          END DO
    1020             :       END DO
    1021             : 
    1022             :       ! 3) check if the local data in the fm_mat has to be stored in the new array
    1023         706 :       IF (map_rec_size(para_env%mepos) > 0) THEN
    1024       59890 :          DO jjB = 1, ncol_local
    1025       59184 :             j_global = col_indices(jjB)
    1026       59890 :             IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
    1027     1171990 :                DO iiB = 1, nrow_local
    1028     1140962 :                   i_global = row_indices(iiB)
    1029     1171990 :                   IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
    1030     1140082 :                      mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1) = fm_mat%local_data(iiB, jjB)
    1031             :                   END IF
    1032             :                END DO
    1033             :             END IF
    1034             :          END DO
    1035             :       END IF
    1036         706 :       CALL timestop(handle2)
    1037             : 
    1038             :       ! 4) reorder data in the send_buffer
    1039         706 :       CALL timeset(routineN//"_buffer_send", handle2)
    1040             :       ! count the number of messages to send
    1041         706 :       number_of_send = 0
    1042        1412 :       DO proc_shift = 1, para_env%num_pe - 1
    1043         706 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
    1044        1412 :          IF (map_send_size(proc_send) > 0) THEN
    1045         676 :             number_of_send = number_of_send + 1
    1046             :          END IF
    1047             :       END DO
    1048             :       ! allocate the structure that will hold the messages to be sent
    1049        2764 :       ALLOCATE (buffer_send(number_of_send))
    1050             :       ! grid_ref_2_send_pos is an array (map) that given a pair
    1051             :       ! (ref_send_prow,ref_send_pcol) returns
    1052             :       ! the position in the buffer_send associated to that process
    1053             : 
    1054        2824 :       ALLOCATE (grid_ref_2_send_pos(0:ngroup_row - 1, 0:ngroup_col - 1))
    1055        3498 :       grid_ref_2_send_pos = 0
    1056             : 
    1057             :       ! finalize the allocation of buffer_send with the actual size
    1058             :       ! of each message (actual size is size_send_buffer)
    1059         706 :       send_counter = 0
    1060        1412 :       DO proc_shift = 1, para_env%num_pe - 1
    1061         706 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
    1062         706 :          size_send_buffer = map_send_size(proc_send)
    1063        1412 :          IF (map_send_size(proc_send) > 0) THEN
    1064         676 :             send_counter = send_counter + 1
    1065             :             ! allocate the sending buffer (msg)
    1066        2028 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
    1067     1012769 :             buffer_send(send_counter)%msg = 0.0_dp
    1068         676 :             buffer_send(send_counter)%proc = proc_send
    1069             :             ! get the pointer to prow, pcol of the process that has
    1070             :             ! to receive this message
    1071         676 :             ref_send_prow = mepos_2_grid_group(1, proc_send)
    1072         676 :             ref_send_pcol = mepos_2_grid_group(2, proc_send)
    1073             :             ! save the rank of the process that has to receive this message
    1074         676 :             grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
    1075             :          END IF
    1076             :       END DO
    1077             : 
    1078             :       ! loop over the locally held data and fill the buffer_send
    1079             :       ! for doing that we need an array that keep track if the
    1080             :       ! sequential increase of the index for each message
    1081        2088 :       ALLOCATE (iii_vet(number_of_send))
    1082        1382 :       iii_vet = 0
    1083       59890 :       DO jjB = 1, ncol_local
    1084       59184 :          j_global = col_indices(jjB)
    1085             :          ! check the group holding this element
    1086             :          ! dirty way, if someone has a better idea ...
    1087       59184 :          send_pcol = INT(REAL(j_global - 1, KIND=dp)/part_col)
    1088       59184 :          send_pcol = MAX(0, send_pcol)
    1089       59184 :          send_pcol = MIN(send_pcol, ngroup_col - 1)
    1090             :          DO
    1091       59184 :             itmp = get_limit(num_cols, ngroup_col, send_pcol)
    1092       59184 :             IF (j_global >= itmp(1) .AND. j_global <= itmp(2)) EXIT
    1093           0 :             IF (j_global < itmp(1)) send_pcol = send_pcol - 1
    1094           0 :             IF (j_global > itmp(2)) send_pcol = send_pcol + 1
    1095             :          END DO
    1096             : 
    1097     2212065 :          DO iiB = 1, nrow_local
    1098     2152175 :             i_global = row_indices(iiB)
    1099             :             ! check the process in the group holding this element
    1100     2152175 :             send_prow = INT(REAL(i_global - 1, KIND=dp)/part_row)
    1101     2152175 :             send_prow = MAX(0, send_prow)
    1102     2152175 :             send_prow = MIN(send_prow, ngroup_row - 1)
    1103             :             DO
    1104     2152175 :                itmp = get_limit(num_rows, ngroup_row, send_prow)
    1105     2152175 :                IF (i_global >= itmp(1) .AND. i_global <= itmp(2)) EXIT
    1106           0 :                IF (i_global < itmp(1)) send_prow = send_prow - 1
    1107           0 :                IF (i_global > itmp(2)) send_prow = send_prow + 1
    1108             :             END DO
    1109             :             ! we don't need to send to ourselves
    1110     2152175 :             IF (group_grid_2_mepos(send_prow, send_pcol) == para_env%mepos) CYCLE
    1111             : 
    1112     1012093 :             send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
    1113     1012093 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
    1114     1012093 :             iii = iii_vet(send_counter)
    1115     2211359 :             buffer_send(send_counter)%msg(iii) = fm_mat%local_data(iiB, jjB)
    1116             :          END DO
    1117             :       END DO
    1118             : 
    1119         706 :       DEALLOCATE (iii_vet)
    1120         706 :       DEALLOCATE (grid_ref_2_send_pos)
    1121         706 :       CALL timestop(handle2)
    1122             : 
    1123             :       ! 5) similarly to what done for the buffer_send
    1124             :       !    create the buffer for receive, post the message with irecv
    1125             :       !    and send the messages with non-blocking
    1126         706 :       CALL timeset(routineN//"_isendrecv", handle2)
    1127             :       ! count the number of messages to be received
    1128         706 :       number_of_rec = 0
    1129        1412 :       DO proc_shift = 1, para_env%num_pe - 1
    1130         706 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
    1131        1412 :          IF (map_rec_size(proc_receive) > 0) THEN
    1132         676 :             number_of_rec = number_of_rec + 1
    1133             :          END IF
    1134             :       END DO
    1135             : 
    1136        2764 :       ALLOCATE (buffer_rec(number_of_rec))
    1137             : 
    1138         706 :       rec_counter = 0
    1139        1412 :       DO proc_shift = 1, para_env%num_pe - 1
    1140         706 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
    1141         706 :          size_rec_buffer = map_rec_size(proc_receive)
    1142        1412 :          IF (map_rec_size(proc_receive) > 0) THEN
    1143         676 :             rec_counter = rec_counter + 1
    1144             :             ! prepare the buffer for receive
    1145        2028 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
    1146     1012769 :             buffer_rec(rec_counter)%msg = 0.0_dp
    1147         676 :             buffer_rec(rec_counter)%proc = proc_receive
    1148             :             ! post the message to be received
    1149             :             CALL para_env%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
    1150         676 :                                 buffer_rec(rec_counter)%msg_req)
    1151             :          END IF
    1152             :       END DO
    1153             : 
    1154             :       ! send messages
    1155        3470 :       ALLOCATE (req_send(number_of_send))
    1156         706 :       send_counter = 0
    1157        1412 :       DO proc_shift = 1, para_env%num_pe - 1
    1158         706 :          proc_send = MODULO(para_env%mepos + proc_shift, para_env%num_pe)
    1159        1412 :          IF (map_send_size(proc_send) > 0) THEN
    1160         676 :             send_counter = send_counter + 1
    1161             :             CALL para_env%isend(buffer_send(send_counter)%msg, proc_send, &
    1162         676 :                                 buffer_send(send_counter)%msg_req)
    1163         676 :             req_send(send_counter) = buffer_send(send_counter)%msg_req
    1164             :          END IF
    1165             :       END DO
    1166         706 :       CALL timestop(handle2)
    1167             : 
    1168             :       ! 6) fill the fm_mat matrix with the messages received from the
    1169             :       !    other processes
    1170         706 :       CALL timeset(routineN//"_fill", handle2)
    1171             :       CALL cp_fm_get_info(matrix=fm_mat, &
    1172             :                           nrow_local=nrow_local, &
    1173         706 :                           ncol_local=ncol_local)
    1174        2118 :       ALLOCATE (sizes(2, 0:para_env%num_pe - 1))
    1175        2118 :       CALL para_env%allgather([nrow_local, ncol_local], sizes)
    1176        2118 :       iiB = MAXVAL(sizes(1, :))
    1177        2118 :       ALLOCATE (index_row_rec(iiB))
    1178       26000 :       index_row_rec = 0
    1179         706 :       rec_counter = 0
    1180        1412 :       DO proc_shift = 1, para_env%num_pe - 1
    1181         706 :          proc_receive = MODULO(para_env%mepos - proc_shift, para_env%num_pe)
    1182         706 :          size_rec_buffer = map_rec_size(proc_receive)
    1183             : 
    1184        1412 :          IF (map_rec_size(proc_receive) > 0) THEN
    1185         676 :             rec_counter = rec_counter + 1
    1186             : 
    1187         676 :             rec_col_size = sizes(2, proc_receive)
    1188         676 :             rec_row_size = sizes(1, proc_receive)
    1189             : 
    1190             :             ! precompute the indeces of the rows
    1191         676 :             num_rec_rows = 0
    1192       24437 :             DO iiB = 1, rec_row_size
    1193             :                i_global = cp_fm_indxl2g(iiB, nrow_block, mepos_2_grid(1, proc_receive), &
    1194       23761 :                                         fm_mat%matrix_struct%first_p_pos(1), nprow)
    1195       24437 :                IF (i_global >= my_start_row .AND. i_global <= my_end_row) THEN
    1196       23636 :                   num_rec_rows = num_rec_rows + 1
    1197       23636 :                   index_row_rec(num_rec_rows) = i_global
    1198             :                END IF
    1199             :             END DO
    1200             : 
    1201         676 :             CALL buffer_rec(rec_counter)%msg_req%wait()
    1202             : 
    1203         676 :             iii = 0
    1204       57208 :             DO jjB = 1, rec_col_size
    1205             :                j_global = cp_fm_indxl2g(jjB, ncol_block, mepos_2_grid(2, proc_receive), &
    1206       56532 :                                         fm_mat%matrix_struct%first_p_pos(2), npcol)
    1207       57208 :                IF (j_global >= my_start_col .AND. j_global <= my_end_col) THEN
    1208     1040469 :                   DO iiB = 1, num_rec_rows
    1209     1012093 :                      i_global = index_row_rec(iiB)
    1210     1012093 :                      iii = iii + 1
    1211     1040469 :                      mat2D(i_global - my_start_row + 1, j_global - my_start_col + 1) = buffer_rec(rec_counter)%msg(iii)
    1212             :                   END DO
    1213             :                END IF
    1214             :             END DO
    1215             : 
    1216         676 :             DEALLOCATE (buffer_rec(rec_counter)%msg)
    1217             :          END IF
    1218             :       END DO
    1219        1382 :       DEALLOCATE (buffer_rec)
    1220         706 :       DEALLOCATE (index_row_rec)
    1221         706 :       CALL cp_fm_release(fm_mat)
    1222         706 :       CALL timestop(handle2)
    1223             : 
    1224             :       ! 7) Finally wait for all messeges to be sent
    1225         706 :       CALL timeset(routineN//"_waitall", handle2)
    1226         706 :       CALL mp_waitall(req_send(:))
    1227        1382 :       DO send_counter = 1, number_of_send
    1228        1382 :          DEALLOCATE (buffer_send(send_counter)%msg)
    1229             :       END DO
    1230        1382 :       DEALLOCATE (buffer_send)
    1231         706 :       CALL timestop(handle2)
    1232             : 
    1233         706 :       CALL timestop(handle)
    1234             : 
    1235        4942 :    END SUBROUTINE fm2array
    1236             : 
    1237             : ! **************************************************************************************************
    1238             : !> \brief redistribute 2D representation of 3d tensor to a set of dbcsr matrices
    1239             : !> \param Gamma_2D ...
    1240             : !> \param homo ...
    1241             : !> \param virtual ...
    1242             : !> \param dimen_ia ...
    1243             : !> \param para_env_sub ...
    1244             : !> \param my_ia_start ...
    1245             : !> \param my_ia_end ...
    1246             : !> \param my_group_L_size ...
    1247             : !> \param gd_ia ...
    1248             : !> \param dbcsr_Gamma ...
    1249             : !> \param mo_coeff_o ...
    1250             : ! **************************************************************************************************
    1251         344 :    SUBROUTINE create_dbcsr_gamma(Gamma_2D, homo, virtual, dimen_ia, para_env_sub, &
    1252             :                                  my_ia_start, my_ia_end, my_group_L_size, &
    1253         344 :                                  gd_ia, dbcsr_Gamma, mo_coeff_o)
    1254             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: Gamma_2D
    1255             :       INTEGER, INTENT(IN)                                :: homo, virtual, dimen_ia
    1256             :       TYPE(mp_para_env_type), INTENT(IN), POINTER        :: para_env_sub
    1257             :       INTEGER, INTENT(IN)                                :: my_ia_start, my_ia_end, my_group_L_size
    1258             :       TYPE(group_dist_d1_type), INTENT(IN)               :: gd_ia
    1259             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT)    :: dbcsr_Gamma
    1260             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: mo_coeff_o
    1261             : 
    1262             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'create_dbcsr_gamma'
    1263             : 
    1264             :       INTEGER :: dummy_proc, handle, i_global, i_local, iaia, iiB, iii, itmp(2), j_global, &
    1265             :          j_local, jjB, jjj, kkB, mypcol, myprow, ncol_block, ncol_local, npcol, nprow, nrow_block, &
    1266             :          nrow_local, number_of_rec, number_of_send, proc_receive, proc_send, proc_shift, &
    1267             :          rec_counter, rec_iaia_end, rec_iaia_size, rec_iaia_start, rec_pcol, rec_prow, &
    1268             :          ref_send_pcol, ref_send_prow, send_counter, send_pcol, send_prow, size_rec_buffer, &
    1269             :          size_send_buffer
    1270         344 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, map_rec_size, map_send_size
    1271         344 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: grid_2_mepos, grid_ref_2_send_pos, &
    1272         344 :                                                             indeces_map_my, mepos_2_grid
    1273         344 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1274             :       REAL(KIND=dp)                                      :: part_ia
    1275             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1276             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
    1277             :       TYPE(cp_fm_type)                                   :: fm_ia
    1278         344 :       TYPE(index_map), ALLOCATABLE, DIMENSION(:)         :: indeces_rec
    1279             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
    1280         344 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
    1281         344 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
    1282             : 
    1283         344 :       CALL timeset(routineN, handle)
    1284             : 
    1285             :       ! create sub blacs env
    1286         344 :       NULLIFY (blacs_env)
    1287         344 :       CALL cp_blacs_env_create(blacs_env=blacs_env, para_env=para_env_sub)
    1288             : 
    1289             :       ! create the fm_ia buffer matrix
    1290         344 :       NULLIFY (fm_struct)
    1291             :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=homo, &
    1292         344 :                                ncol_global=virtual, para_env=para_env_sub)
    1293         344 :       CALL cp_fm_create(fm_ia, fm_struct, name="fm_ia")
    1294             :       ! release structure
    1295         344 :       CALL cp_fm_struct_release(fm_struct)
    1296             :       ! release blacs_env
    1297         344 :       CALL cp_blacs_env_release(blacs_env)
    1298             : 
    1299             :       ! get array information
    1300             :       CALL cp_fm_get_info(matrix=fm_ia, &
    1301             :                           nrow_local=nrow_local, &
    1302             :                           ncol_local=ncol_local, &
    1303             :                           row_indices=row_indices, &
    1304             :                           col_indices=col_indices, &
    1305             :                           nrow_block=nrow_block, &
    1306         344 :                           ncol_block=ncol_block)
    1307         344 :       myprow = fm_ia%matrix_struct%context%mepos(1)
    1308         344 :       mypcol = fm_ia%matrix_struct%context%mepos(2)
    1309         344 :       nprow = fm_ia%matrix_struct%context%num_pe(1)
    1310         344 :       npcol = fm_ia%matrix_struct%context%num_pe(2)
    1311             : 
    1312             :       ! 0) create array containing the processes position and supporting infos
    1313        1376 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
    1314        1048 :       grid_2_mepos = 0
    1315        1032 :       ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
    1316             :       ! fill the info array
    1317         344 :       grid_2_mepos(myprow, mypcol) = para_env_sub%mepos
    1318             :       ! sum infos
    1319         344 :       CALL para_env_sub%sum(grid_2_mepos)
    1320        1032 :       CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
    1321             : 
    1322             :       ! loop over local index range and define the sending map
    1323        1032 :       ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
    1324         704 :       map_send_size = 0
    1325         344 :       dummy_proc = 0
    1326       19420 :       DO iaia = my_ia_start, my_ia_end
    1327       19076 :          i_global = (iaia - 1)/virtual + 1
    1328       19076 :          j_global = MOD(iaia - 1, virtual) + 1
    1329             :          send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
    1330       19076 :                                    fm_ia%matrix_struct%first_p_pos(1), nprow)
    1331             :          send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
    1332       19076 :                                    fm_ia%matrix_struct%first_p_pos(2), npcol)
    1333       19076 :          proc_send = grid_2_mepos(send_prow, send_pcol)
    1334       19420 :          map_send_size(proc_send) = map_send_size(proc_send) + 1
    1335             :       END DO
    1336             : 
    1337             :       ! loop over local data of fm_ia and define the receiving map
    1338        1032 :       ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
    1339         704 :       map_rec_size = 0
    1340         344 :       part_ia = REAL(dimen_ia, KIND=dp)/REAL(para_env_sub%num_pe, KIND=dp)
    1341             : 
    1342        1556 :       DO iiB = 1, nrow_local
    1343        1212 :          i_global = row_indices(iiB)
    1344       20632 :          DO jjB = 1, ncol_local
    1345       19076 :             j_global = col_indices(jjB)
    1346       19076 :             iaia = (i_global - 1)*virtual + j_global
    1347       19076 :             proc_receive = INT(REAL(iaia - 1, KIND=dp)/part_ia)
    1348       19076 :             proc_receive = MAX(0, proc_receive)
    1349       19076 :             proc_receive = MIN(proc_receive, para_env_sub%num_pe - 1)
    1350             :             DO
    1351       19076 :                itmp = get_limit(dimen_ia, para_env_sub%num_pe, proc_receive)
    1352       19076 :                IF (iaia >= itmp(1) .AND. iaia <= itmp(2)) EXIT
    1353           0 :                IF (iaia < itmp(1)) proc_receive = proc_receive - 1
    1354           0 :                IF (iaia > itmp(2)) proc_receive = proc_receive + 1
    1355             :             END DO
    1356       20288 :             map_rec_size(proc_receive) = map_rec_size(proc_receive) + 1
    1357             :          END DO
    1358             :       END DO
    1359             : 
    1360             :       ! allocate the buffer for sending data
    1361         344 :       number_of_send = 0
    1362         360 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1363          16 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1364         360 :          IF (map_send_size(proc_send) > 0) THEN
    1365           2 :             number_of_send = number_of_send + 1
    1366             :          END IF
    1367             :       END DO
    1368             :       ! allocate the structure that will hold the messages to be sent
    1369         692 :       ALLOCATE (buffer_send(number_of_send))
    1370             :       ! and the map from the grid of processess to the message position
    1371        1376 :       ALLOCATE (grid_ref_2_send_pos(0:nprow - 1, 0:npcol - 1))
    1372        1048 :       grid_ref_2_send_pos = 0
    1373             :       ! finally allocate each message
    1374         344 :       send_counter = 0
    1375         360 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1376          16 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1377          16 :          size_send_buffer = map_send_size(proc_send)
    1378         360 :          IF (map_send_size(proc_send) > 0) THEN
    1379           2 :             send_counter = send_counter + 1
    1380             :             ! allocate the sending buffer (msg)
    1381           6 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
    1382           2 :             buffer_send(send_counter)%proc = proc_send
    1383             :             ! get the pointer to prow, pcol of the process that has
    1384             :             ! to receive this message
    1385           2 :             ref_send_prow = mepos_2_grid(1, proc_send)
    1386           2 :             ref_send_pcol = mepos_2_grid(2, proc_send)
    1387             :             ! save the rank of the process that has to receive this message
    1388           2 :             grid_ref_2_send_pos(ref_send_prow, ref_send_pcol) = send_counter
    1389             :          END IF
    1390             :       END DO
    1391             : 
    1392             :       ! allocate the buffer for receiving data
    1393             :       number_of_rec = 0
    1394         360 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1395          16 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1396         360 :          IF (map_rec_size(proc_receive) > 0) THEN
    1397           2 :             number_of_rec = number_of_rec + 1
    1398             :          END IF
    1399             :       END DO
    1400             :       ! allocate the structure that will hold the messages to be received
    1401             :       ! and relative indeces
    1402         692 :       ALLOCATE (buffer_rec(number_of_rec))
    1403         692 :       ALLOCATE (indeces_rec(number_of_rec))
    1404             :       ! finally allocate each message and fill the array of indeces
    1405         344 :       rec_counter = 0
    1406         360 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1407          16 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1408          16 :          size_rec_buffer = map_rec_size(proc_receive)
    1409         360 :          IF (map_rec_size(proc_receive) > 0) THEN
    1410           2 :             rec_counter = rec_counter + 1
    1411             :             ! prepare the buffer for receive
    1412           6 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
    1413           2 :             buffer_rec(rec_counter)%proc = proc_receive
    1414             :             ! create the indeces array
    1415           6 :             ALLOCATE (indeces_rec(rec_counter)%map(2, size_rec_buffer))
    1416          59 :             indeces_rec(rec_counter)%map = 0
    1417           2 :             CALL get_group_dist(gd_ia, proc_receive, rec_iaia_start, rec_iaia_end, rec_iaia_size)
    1418           2 :             iii = 0
    1419         120 :             DO iaia = rec_iaia_start, rec_iaia_end
    1420         118 :                i_global = (iaia - 1)/virtual + 1
    1421         118 :                j_global = MOD(iaia - 1, virtual) + 1
    1422             :                rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
    1423         118 :                                         fm_ia%matrix_struct%first_p_pos(1), nprow)
    1424             :                rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
    1425         118 :                                         fm_ia%matrix_struct%first_p_pos(2), npcol)
    1426         118 :                IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE
    1427          19 :                iii = iii + 1
    1428             :                i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
    1429          19 :                                        fm_ia%matrix_struct%first_p_pos(1), nprow)
    1430             :                j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
    1431          19 :                                        fm_ia%matrix_struct%first_p_pos(2), npcol)
    1432          19 :                indeces_rec(rec_counter)%map(1, iii) = i_local
    1433         120 :                indeces_rec(rec_counter)%map(2, iii) = j_local
    1434             :             END DO
    1435             :          END IF
    1436             :       END DO
    1437             :       ! and create the index map for my local data
    1438         344 :       IF (map_rec_size(para_env_sub%mepos) > 0) THEN
    1439         344 :          size_rec_buffer = map_rec_size(para_env_sub%mepos)
    1440        1032 :          ALLOCATE (indeces_map_my(2, size_rec_buffer))
    1441       57515 :          indeces_map_my = 0
    1442             :          iii = 0
    1443       19420 :          DO iaia = my_ia_start, my_ia_end
    1444       19076 :             i_global = (iaia - 1)/virtual + 1
    1445       19076 :             j_global = MOD(iaia - 1, virtual) + 1
    1446             :             rec_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
    1447       19076 :                                      fm_ia%matrix_struct%first_p_pos(1), nprow)
    1448             :             rec_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
    1449       19076 :                                      fm_ia%matrix_struct%first_p_pos(2), npcol)
    1450       19076 :             IF (grid_2_mepos(rec_prow, rec_pcol) /= para_env_sub%mepos) CYCLE
    1451       19057 :             iii = iii + 1
    1452             :             i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
    1453       19057 :                                     fm_ia%matrix_struct%first_p_pos(1), nprow)
    1454             :             j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
    1455       19057 :                                     fm_ia%matrix_struct%first_p_pos(2), npcol)
    1456       19057 :             indeces_map_my(1, iii) = i_local
    1457       19420 :             indeces_map_my(2, iii) = j_local
    1458             :          END DO
    1459             :       END IF
    1460             : 
    1461             :       ! auxiliary vector of indeces for the send buffer
    1462         690 :       ALLOCATE (iii_vet(number_of_send))
    1463             :       ! vector for the send requests
    1464        1036 :       ALLOCATE (req_send(number_of_send))
    1465             :       ! loop over auxiliary basis function and redistribute into a fm
    1466             :       ! and then compy the fm into a dbcsr matrix
    1467       15575 :       DO kkB = 1, my_group_L_size
    1468             :          ! zero the matries of the buffers and post the messages to be received
    1469       15231 :          CALL cp_fm_set_all(matrix=fm_ia, alpha=0.0_dp)
    1470       15231 :          rec_counter = 0
    1471       16667 :          DO proc_shift = 1, para_env_sub%num_pe - 1
    1472        1436 :             proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1473       16667 :             IF (map_rec_size(proc_receive) > 0) THEN
    1474         220 :                rec_counter = rec_counter + 1
    1475        2310 :                buffer_rec(rec_counter)%msg = 0.0_dp
    1476             :                CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
    1477         220 :                                        buffer_rec(rec_counter)%msg_req)
    1478             :             END IF
    1479             :          END DO
    1480             :          ! fill the sending buffer and send the messages
    1481       15451 :          DO send_counter = 1, number_of_send
    1482       17541 :             buffer_send(send_counter)%msg = 0.0_dp
    1483             :          END DO
    1484       15451 :          iii_vet = 0
    1485             :          jjj = 0
    1486      875195 :          DO iaia = my_ia_start, my_ia_end
    1487      859964 :             i_global = (iaia - 1)/virtual + 1
    1488      859964 :             j_global = MOD(iaia - 1, virtual) + 1
    1489             :             send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
    1490      859964 :                                       fm_ia%matrix_struct%first_p_pos(1), nprow)
    1491             :             send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
    1492      859964 :                                       fm_ia%matrix_struct%first_p_pos(2), npcol)
    1493      859964 :             proc_send = grid_2_mepos(send_prow, send_pcol)
    1494             :             ! we don't need to send to ourselves
    1495      875195 :             IF (grid_2_mepos(send_prow, send_pcol) == para_env_sub%mepos) THEN
    1496             :                ! filling fm_ia with local data
    1497      857874 :                jjj = jjj + 1
    1498      857874 :                i_local = indeces_map_my(1, jjj)
    1499      857874 :                j_local = indeces_map_my(2, jjj)
    1500      857874 :                fm_ia%local_data(i_local, j_local) = Gamma_2D(iaia - my_ia_start + 1, kkB)
    1501             :             ELSE
    1502        2090 :                send_counter = grid_ref_2_send_pos(send_prow, send_pcol)
    1503        2090 :                iii_vet(send_counter) = iii_vet(send_counter) + 1
    1504        2090 :                iii = iii_vet(send_counter)
    1505        2090 :                buffer_send(send_counter)%msg(iii) = Gamma_2D(iaia - my_ia_start + 1, kkB)
    1506             :             END IF
    1507             :          END DO
    1508       15451 :          req_send = mp_request_null
    1509       15231 :          send_counter = 0
    1510       16667 :          DO proc_shift = 1, para_env_sub%num_pe - 1
    1511        1436 :             proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1512       16667 :             IF (map_send_size(proc_send) > 0) THEN
    1513         220 :                send_counter = send_counter + 1
    1514             :                CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
    1515         220 :                                        buffer_send(send_counter)%msg_req)
    1516         220 :                req_send(send_counter) = buffer_send(send_counter)%msg_req
    1517             :             END IF
    1518             :          END DO
    1519             : 
    1520             :          ! receive the massages and fill the fm_ia
    1521       15231 :          rec_counter = 0
    1522       16667 :          DO proc_shift = 1, para_env_sub%num_pe - 1
    1523        1436 :             proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1524        1436 :             size_rec_buffer = map_rec_size(proc_receive)
    1525       16667 :             IF (map_rec_size(proc_receive) > 0) THEN
    1526         220 :                rec_counter = rec_counter + 1
    1527             :                ! wait for the message
    1528         220 :                CALL buffer_rec(rec_counter)%msg_req%wait()
    1529        2310 :                DO iii = 1, size_rec_buffer
    1530        2090 :                   i_local = indeces_rec(rec_counter)%map(1, iii)
    1531        2090 :                   j_local = indeces_rec(rec_counter)%map(2, iii)
    1532        2310 :                   fm_ia%local_data(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
    1533             :                END DO
    1534             :             END IF
    1535             :          END DO
    1536             : 
    1537             :          ! wait all
    1538       15231 :          CALL mp_waitall(req_send(:))
    1539             : 
    1540             :          ! now create the DBCSR matrix and copy fm_ia into it
    1541       15231 :          ALLOCATE (dbcsr_Gamma(kkB)%matrix)
    1542             :          CALL cp_dbcsr_m_by_n_from_template(dbcsr_Gamma(kkB)%matrix, &
    1543             :                                             template=mo_coeff_o, &
    1544       15231 :                                             m=homo, n=virtual, sym=dbcsr_type_no_symmetry)
    1545       15575 :          CALL copy_fm_to_dbcsr(fm_ia, dbcsr_Gamma(kkB)%matrix, keep_sparsity=.FALSE.)
    1546             : 
    1547             :       END DO
    1548             : 
    1549             :       ! deallocate stuff
    1550         344 :       DEALLOCATE (Gamma_2D)
    1551         344 :       DEALLOCATE (iii_vet)
    1552         344 :       DEALLOCATE (req_send)
    1553         344 :       IF (map_rec_size(para_env_sub%mepos) > 0) THEN
    1554         344 :          DEALLOCATE (indeces_map_my)
    1555             :       END IF
    1556         346 :       DO rec_counter = 1, number_of_rec
    1557           2 :          DEALLOCATE (indeces_rec(rec_counter)%map)
    1558         346 :          DEALLOCATE (buffer_rec(rec_counter)%msg)
    1559             :       END DO
    1560         346 :       DEALLOCATE (indeces_rec)
    1561         346 :       DEALLOCATE (buffer_rec)
    1562         346 :       DO send_counter = 1, number_of_send
    1563         346 :          DEALLOCATE (buffer_send(send_counter)%msg)
    1564             :       END DO
    1565         346 :       DEALLOCATE (buffer_send)
    1566         344 :       DEALLOCATE (map_send_size)
    1567         344 :       DEALLOCATE (map_rec_size)
    1568         344 :       DEALLOCATE (grid_2_mepos)
    1569         344 :       DEALLOCATE (mepos_2_grid)
    1570             : 
    1571             :       ! release buffer matrix
    1572         344 :       CALL cp_fm_release(fm_ia)
    1573             : 
    1574         344 :       CALL timestop(handle)
    1575             : 
    1576        1032 :    END SUBROUTINE create_dbcsr_gamma
    1577             : 
    1578             : ! **************************************************************************************************
    1579             : !> \brief prepare array for redistribution
    1580             : !> \param para_env ...
    1581             : !> \param para_env_sub ...
    1582             : !> \param ngroup ...
    1583             : !> \param group_grid_2_mepos ...
    1584             : !> \param mepos_2_grid_group ...
    1585             : !> \param pos_info ...
    1586             : ! **************************************************************************************************
    1587         350 :    SUBROUTINE prepare_redistribution(para_env, para_env_sub, ngroup, &
    1588             :                                      group_grid_2_mepos, mepos_2_grid_group, &
    1589             :                                      pos_info)
    1590             :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env, para_env_sub
    1591             :       INTEGER, INTENT(IN)                                :: ngroup
    1592             :       INTEGER, ALLOCATABLE, DIMENSION(:, :), INTENT(OUT) :: group_grid_2_mepos, mepos_2_grid_group
    1593             :       INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT), &
    1594             :          OPTIONAL                                        :: pos_info
    1595             : 
    1596             :       INTEGER                                            :: i, pos_group, pos_sub
    1597         350 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: my_pos_info
    1598             : 
    1599        1050 :       ALLOCATE (my_pos_info(0:para_env%num_pe - 1))
    1600         350 :       CALL para_env%allgather(para_env_sub%mepos, my_pos_info)
    1601             : 
    1602        1400 :       ALLOCATE (group_grid_2_mepos(0:para_env_sub%num_pe - 1, 0:ngroup - 1))
    1603        1734 :       group_grid_2_mepos = 0
    1604        1050 :       ALLOCATE (mepos_2_grid_group(2, 0:para_env%num_pe - 1))
    1605        2450 :       mepos_2_grid_group = 0
    1606             : 
    1607        1050 :       DO i = 0, para_env%num_pe - 1
    1608             :          ! calculate position of the group
    1609         700 :          pos_group = i/para_env_sub%num_pe
    1610             :          ! calculate position in the subgroup
    1611         700 :          pos_sub = my_pos_info(i)
    1612             :          ! fill the map from the grid of groups to process
    1613         700 :          group_grid_2_mepos(pos_sub, pos_group) = i
    1614             :          ! and the opposite, from the global pos to the grid pos
    1615         700 :          mepos_2_grid_group(1, i) = pos_sub
    1616        1050 :          mepos_2_grid_group(2, i) = pos_group
    1617             :       END DO
    1618             : 
    1619         350 :       IF (PRESENT(pos_info)) CALL move_alloc(my_pos_info, pos_info)
    1620             : 
    1621         350 :    END SUBROUTINE prepare_redistribution
    1622             : 
    1623           0 : END MODULE mp2_ri_grad_util

Generated by: LCOV version 1.15