LCOV - CP2K Regtests (git:ca6acae)

LCOV - code coverage report

Current view:	top level - src - mp2_ri_grad.F (source / functions)		Coverage	Total	Hit
Test:	CP2K Regtests (git:ca6acae)	Lines:	98.7 %	616	608
Test Date:	2026-01-02 06:29:53	Functions:	100.0 %	5	5

            Line data    Source code

       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Routines to calculate gradients of RI-GPW-MP2 energy using pw
      10              : !> \par History
      11              : !>      10.2013 created [Mauro Del Ben]
      12              : ! **************************************************************************************************
      13              : MODULE mp2_ri_grad
      14              :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      15              :                                               get_atomic_kind_set
      16              :    USE cell_types,                      ONLY: cell_type
      17              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      18              :    USE cp_control_types,                ONLY: dft_control_type
      19              :    USE cp_dbcsr_api,                    ONLY: &
      20              :         dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_get_info, dbcsr_multiply, dbcsr_p_type, &
      21              :         dbcsr_release, dbcsr_set, dbcsr_transposed, dbcsr_type, dbcsr_type_no_symmetry, &
      22              :         dbcsr_type_symmetric
      23              :    USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
      24              :                                               dbcsr_deallocate_matrix_set
      25              :    USE cp_eri_mme_interface,            ONLY: cp_eri_mme_param
      26              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      27              :                                               cp_fm_struct_release,&
      28              :                                               cp_fm_struct_type
      29              :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      30              :                                               cp_fm_get_info,&
      31              :                                               cp_fm_release,&
      32              :                                               cp_fm_set_all,&
      33              :                                               cp_fm_to_fm_submat,&
      34              :                                               cp_fm_type
      35              :    USE input_constants,                 ONLY: do_eri_gpw,&
      36              :                                               do_eri_mme,&
      37              :                                               ri_mp2_laplace,&
      38              :                                               ri_mp2_method_gpw,&
      39              :                                               ri_rpa_method_gpw
      40              :    USE kinds,                           ONLY: dp
      41              :    USE libint_2c_3c,                    ONLY: compare_potential_types
      42              :    USE message_passing,                 ONLY: mp_para_env_release,&
      43              :                                               mp_para_env_type,&
      44              :                                               mp_request_null,&
      45              :                                               mp_request_type,&
      46              :                                               mp_waitall
      47              :    USE mp2_eri,                         ONLY: mp2_eri_2c_integrate,&
      48              :                                               mp2_eri_3c_integrate,&
      49              :                                               mp2_eri_deallocate_forces,&
      50              :                                               mp2_eri_force
      51              :    USE mp2_eri_gpw,                     ONLY: cleanup_gpw,&
      52              :                                               integrate_potential_forces_2c,&
      53              :                                               integrate_potential_forces_3c_1c,&
      54              :                                               integrate_potential_forces_3c_2c,&
      55              :                                               prepare_gpw
      56              :    USE mp2_types,                       ONLY: integ_mat_buffer_type,&
      57              :                                               integ_mat_buffer_type_2D,&
      58              :                                               mp2_type
      59              :    USE parallel_gemm_api,               ONLY: parallel_gemm
      60              :    USE particle_types,                  ONLY: particle_type
      61              :    USE pw_env_types,                    ONLY: pw_env_type
      62              :    USE pw_poisson_types,                ONLY: pw_poisson_type
      63              :    USE pw_pool_types,                   ONLY: pw_pool_type
      64              :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      65              :                                               pw_r3d_rs_type
      66              :    USE qs_environment_types,            ONLY: get_qs_env,&
      67              :                                               qs_environment_type
      68              :    USE qs_force_types,                  ONLY: allocate_qs_force,&
      69              :                                               qs_force_type,&
      70              :                                               zero_qs_force
      71              :    USE qs_kind_types,                   ONLY: qs_kind_type
      72              :    USE qs_ks_types,                     ONLY: qs_ks_env_type
      73              :    USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type
      74              :    USE task_list_types,                 ONLY: task_list_type
      75              :    USE util,                            ONLY: get_limit
      76              :    USE virial_types,                    ONLY: virial_type
      77              : #include "./base/base_uses.f90"
      78              : 
      79              :    IMPLICIT NONE
      80              : 
      81              :    PRIVATE
      82              : 
      83              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad'
      84              : 
      85              :    PUBLIC :: calc_ri_mp2_nonsep
      86              : 
      87              : CONTAINS
      88              : 
      89              : ! **************************************************************************************************
      90              : !> \brief Calculate the non-separable part of the gradients and update the
      91              : !>        Lagrangian
      92              : !> \param qs_env ...
      93              : !> \param mp2_env ...
      94              : !> \param para_env ...
      95              : !> \param para_env_sub ...
      96              : !> \param cell ...
      97              : !> \param particle_set ...
      98              : !> \param atomic_kind_set ...
      99              : !> \param qs_kind_set ...
     100              : !> \param mo_coeff ...
     101              : !> \param dimen_RI ...
     102              : !> \param Eigenval ...
     103              : !> \param my_group_L_start ...
     104              : !> \param my_group_L_end ...
     105              : !> \param my_group_L_size ...
     106              : !> \param sab_orb_sub ...
     107              : !> \param mat_munu ...
     108              : !> \param blacs_env_sub ...
     109              : !> \author Mauro Del Ben
     110              : ! **************************************************************************************************
     111          272 :    SUBROUTINE calc_ri_mp2_nonsep(qs_env, mp2_env, para_env, para_env_sub, cell, particle_set, &
     112          272 :                                  atomic_kind_set, qs_kind_set, mo_coeff, dimen_RI, Eigenval, &
     113              :                                  my_group_L_start, my_group_L_end, my_group_L_size, sab_orb_sub, mat_munu, &
     114              :                                  blacs_env_sub)
     115              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     116              :       TYPE(mp2_type)                                     :: mp2_env
     117              :       TYPE(mp_para_env_type), POINTER                    :: para_env, para_env_sub
     118              :       TYPE(cell_type), POINTER                           :: cell
     119              :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     120              :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     121              :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     122              :       TYPE(cp_fm_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
     123              :       INTEGER, INTENT(IN)                                :: dimen_RI
     124              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: Eigenval
     125              :       INTEGER, INTENT(IN)                                :: my_group_L_start, my_group_L_end, &
     126              :                                                             my_group_L_size
     127              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     128              :          POINTER                                         :: sab_orb_sub
     129              :       TYPE(dbcsr_p_type), INTENT(INOUT)                  :: mat_munu
     130              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env_sub
     131              : 
     132              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'calc_ri_mp2_nonsep'
     133              : 
     134              :       INTEGER                                            :: eri_method, handle, handle2, i, ikind, &
     135              :                                                             ispin, itmp(2), L_counter, LLL, &
     136              :                                                             my_P_end, my_P_size, my_P_start, nao, &
     137              :                                                             nspins
     138          272 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_of_kind, homo, kind_of, &
     139          272 :                                                             natom_of_kind, virtual
     140              :       LOGICAL                                            :: alpha_beta, use_virial
     141              :       REAL(KIND=dp)                                      :: cutoff_old, eps_filter, factor, &
     142              :                                                             factor_2c, relative_cutoff_old
     143          272 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: e_cutoff_old
     144          272 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: G_PQ_local, G_PQ_local_2
     145              :       REAL(KIND=dp), DIMENSION(3, 3)                     :: h_stress, pv_virial
     146          272 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: I_tmp2
     147              :       TYPE(cp_eri_mme_param), POINTER                    :: eri_param
     148              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     149          272 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: L1_mu_i, L2_nu_a
     150              :       TYPE(dbcsr_p_type)                                 :: matrix_P_munu
     151              :       TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:)      :: mo_coeff_o, mo_coeff_v
     152          272 :       TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:, :)   :: G_P_ia
     153          272 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: mat_munu_local, matrix_P_munu_local
     154              :       TYPE(dbcsr_type)                                   :: matrix_P_munu_nosym
     155          272 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: Lag_mu_i_1, Lag_nu_a_2, matrix_P_inu
     156              :       TYPE(dft_control_type), POINTER                    :: dft_control
     157          272 :       TYPE(mp2_eri_force), ALLOCATABLE, DIMENSION(:)     :: force_2c, force_2c_RI, force_3c_aux, &
     158          272 :                                                             force_3c_orb_mu, force_3c_orb_nu
     159         1088 :       TYPE(pw_c1d_gs_type)                               :: dvg(3), pot_g, rho_g, rho_g_copy
     160              :       TYPE(pw_env_type), POINTER                         :: pw_env_sub
     161              :       TYPE(pw_poisson_type), POINTER                     :: poisson_env
     162              :       TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
     163              :       TYPE(pw_r3d_rs_type)                               :: psi_L, rho_r
     164          272 :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force, mp2_force
     165              :       TYPE(qs_ks_env_type), POINTER                      :: ks_env
     166              :       TYPE(task_list_type), POINTER                      :: task_list_sub
     167              :       TYPE(virial_type), POINTER                         :: virial
     168              : 
     169          272 :       CALL timeset(routineN, handle)
     170              : 
     171          272 :       eri_method = mp2_env%eri_method
     172          272 :       eri_param => mp2_env%eri_mme_param
     173              : 
     174              :       ! Find out whether we have a closed or open shell
     175          272 :       nspins = SIZE(Eigenval, 2)
     176          272 :       alpha_beta = (nspins == 2)
     177              : 
     178         1088 :       ALLOCATE (virtual(nspins), homo(nspins))
     179          272 :       eps_filter = mp2_env%mp2_gpw%eps_filter
     180        30372 :       ALLOCATE (mo_coeff_o(nspins), mo_coeff_v(nspins), G_P_ia(nspins, my_group_L_size))
     181          630 :       DO ispin = 1, nspins
     182          358 :          mo_coeff_o(ispin)%matrix => mp2_env%ri_grad%mo_coeff_o(ispin)%matrix
     183          358 :          mo_coeff_v(ispin)%matrix => mp2_env%ri_grad%mo_coeff_v(ispin)%matrix
     184              :          CALL dbcsr_get_info(mo_coeff_o(ispin)%matrix, &
     185              :                              nfullrows_total=nao, &
     186          358 :                              nfullcols_total=homo(ispin))
     187              :          CALL dbcsr_get_info(mo_coeff_v(ispin)%matrix, &
     188              :                              nfullrows_total=nao, &
     189          358 :                              nfullcols_total=virtual(ispin))
     190        16470 :          DO LLL = 1, my_group_L_size
     191        16198 :             G_P_ia(ispin, LLL)%matrix => mp2_env%ri_grad%G_P_ia(LLL, ispin)%matrix
     192              :          END DO
     193              :       END DO
     194          272 :       DEALLOCATE (mp2_env%ri_grad%G_P_ia)
     195              : 
     196          272 :       itmp = get_limit(dimen_RI, para_env_sub%num_pe, para_env_sub%mepos)
     197          272 :       my_P_start = itmp(1)
     198          272 :       my_P_end = itmp(2)
     199          272 :       my_P_size = itmp(2) - itmp(1) + 1
     200              : 
     201         1088 :       ALLOCATE (G_PQ_local(dimen_RI, my_group_L_size))
     202      1025542 :       G_PQ_local = 0.0_dp
     203       972108 :       G_PQ_local(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ
     204          272 :       DEALLOCATE (mp2_env%ri_grad%Gamma_PQ)
     205       972108 :       G_PQ_local(my_P_start:my_P_end, :) = G_PQ_local(my_P_start:my_P_end, :)/REAL(nspins, dp)
     206          272 :       CALL para_env_sub%sum(G_PQ_local)
     207          272 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     208           36 :          ALLOCATE (G_PQ_local_2(dimen_RI, my_group_L_size))
     209        41844 :          G_PQ_local_2 = 0.0_dp
     210        41844 :          G_PQ_local_2(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ_2
     211           12 :          DEALLOCATE (mp2_env%ri_grad%Gamma_PQ_2)
     212        41844 :          G_PQ_local_2(my_P_start:my_P_end, :) = G_PQ_local_2(my_P_start:my_P_end, :)/REAL(nspins, dp)
     213           12 :          CALL para_env_sub%sum(G_PQ_local_2)
     214              :       END IF
     215              : 
     216              :       ! create matrix holding the back transformation (G_P_inu)
     217         1174 :       ALLOCATE (matrix_P_inu(nspins))
     218          630 :       DO ispin = 1, nspins
     219          630 :          CALL dbcsr_create(matrix_P_inu(ispin), template=mo_coeff_o(ispin)%matrix)
     220              :       END DO
     221              : 
     222              :       ! non symmetric matrix
     223              :       CALL dbcsr_create(matrix_P_munu_nosym, template=mat_munu%matrix, &
     224          272 :                         matrix_type=dbcsr_type_no_symmetry)
     225              : 
     226              :       ! create Lagrangian matrices in mixed AO/MO formalism
     227          902 :       ALLOCATE (Lag_mu_i_1(nspins))
     228          630 :       DO ispin = 1, nspins
     229          358 :          CALL dbcsr_create(Lag_mu_i_1(ispin), template=mo_coeff_o(ispin)%matrix)
     230          630 :          CALL dbcsr_set(Lag_mu_i_1(ispin), 0.0_dp)
     231              :       END DO
     232              : 
     233          902 :       ALLOCATE (Lag_nu_a_2(nspins))
     234          630 :       DO ispin = 1, nspins
     235          358 :          CALL dbcsr_create(Lag_nu_a_2(ispin), template=mo_coeff_v(ispin)%matrix)
     236          630 :          CALL dbcsr_set(Lag_nu_a_2(ispin), 0.0_dp)
     237              :       END DO
     238              : 
     239              :       ! get forces
     240          272 :       NULLIFY (force, virial)
     241          272 :       CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
     242              : 
     243              :       ! check if we want to calculate the virial
     244          272 :       use_virial = virial%pv_availability .AND. (.NOT. virial%pv_numer)
     245              : 
     246          272 :       CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, natom_of_kind=natom_of_kind)
     247          272 :       NULLIFY (mp2_force)
     248          272 :       CALL allocate_qs_force(mp2_force, natom_of_kind)
     249          272 :       DEALLOCATE (natom_of_kind)
     250          272 :       CALL zero_qs_force(mp2_force)
     251          272 :       mp2_env%ri_grad%mp2_force => mp2_force
     252              : 
     253          272 :       factor_2c = -4.0_dp
     254          272 :       IF (mp2_env%method == ri_rpa_method_gpw) THEN
     255           24 :          factor_2c = -1.0_dp
     256           24 :          IF (alpha_beta) factor_2c = -2.0_dp
     257              :       END IF
     258              : 
     259              :       ! prepare integral derivatives with mme method
     260          272 :       IF (eri_method == do_eri_mme) THEN
     261         1184 :          ALLOCATE (matrix_P_munu_local(my_group_L_size))
     262         1158 :          ALLOCATE (mat_munu_local(my_group_L_size))
     263           26 :          L_counter = 0
     264         1132 :          DO LLL = my_group_L_start, my_group_L_end
     265         1106 :             L_counter = L_counter + 1
     266         1106 :             ALLOCATE (mat_munu_local(L_counter)%matrix)
     267              :             CALL dbcsr_create(mat_munu_local(L_counter)%matrix, template=mat_munu%matrix, &
     268         1106 :                               matrix_type=dbcsr_type_symmetric)
     269         1106 :             CALL dbcsr_copy(mat_munu_local(L_counter)%matrix, mat_munu%matrix)
     270         1106 :             CALL dbcsr_set(mat_munu_local(L_counter)%matrix, 0.0_dp)
     271              : 
     272              :             CALL G_P_transform_MO_to_AO(matrix_P_munu_local(L_counter)%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
     273              :                                         G_P_ia(:, L_counter), matrix_P_inu, &
     274         1132 :                                         mo_coeff_v, mo_coeff_o, eps_filter)
     275              :          END DO
     276              : 
     277           78 :          ALLOCATE (I_tmp2(dimen_RI, my_group_L_size))
     278        95790 :          I_tmp2(:, :) = 0.0_dp
     279              :          CALL mp2_eri_2c_integrate(eri_param, mp2_env%potential_parameter, para_env_sub, qs_env, &
     280              :                                    basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
     281              :                                    hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
     282           26 :                                    eri_method=eri_method, pab=G_PQ_local, force_a=force_2c)
     283           26 :          IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     284            0 :             I_tmp2(:, :) = 0.0_dp
     285              :             CALL mp2_eri_2c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
     286              :                                       basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
     287              :                                       hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
     288            0 :                                       eri_method=eri_method, pab=G_PQ_local_2, force_a=force_2c_RI)
     289              :          END IF
     290           26 :          DEALLOCATE (I_tmp2)
     291              : 
     292              :          CALL mp2_eri_3c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
     293              :                                    first_c=my_group_L_start, last_c=my_group_L_end, mat_ab=mat_munu_local, &
     294              :                                    basis_type_a="ORB", basis_type_b="ORB", basis_type_c="RI_AUX", &
     295              :                                    sab_nl=sab_orb_sub, eri_method=eri_method, &
     296              :                                    pabc=matrix_P_munu_local, &
     297           26 :                                    force_a=force_3c_orb_mu, force_b=force_3c_orb_nu, force_c=force_3c_aux)
     298              : 
     299           26 :          L_counter = 0
     300         1132 :          DO LLL = my_group_L_start, my_group_L_end
     301         1106 :             L_counter = L_counter + 1
     302         2432 :             DO ispin = 1, nspins
     303              :                CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin, L_counter)%matrix, &
     304         2432 :                                    0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
     305              :             END DO
     306              : 
     307              :             ! The matrices of G_P_ia are deallocated here
     308              :             CALL update_lagrangian(mat_munu_local(L_counter)%matrix, matrix_P_inu, Lag_mu_i_1, &
     309              :                                    G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
     310         1132 :                                    eps_filter)
     311              :          END DO
     312              : 
     313           72 :          DO ikind = 1, SIZE(force)
     314              :             mp2_force(ikind)%mp2_non_sep(:, :) = factor_2c*force_2c(ikind)%forces(:, :) + &
     315              :                                                  force_3c_orb_mu(ikind)%forces(:, :) + &
     316              :                                                  force_3c_orb_nu(ikind)%forces(:, :) + &
     317          334 :                                                  force_3c_aux(ikind)%forces(:, :)
     318              : 
     319           72 :             IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     320            0 :                mp2_force(ikind)%mp2_non_sep(:, :) = mp2_force(ikind)%mp2_non_sep(:, :) + factor_2c*force_2c_RI(ikind)%forces
     321              :             END IF
     322              :          END DO
     323              : 
     324           26 :          CALL mp2_eri_deallocate_forces(force_2c)
     325           26 :          IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     326            0 :             CALL mp2_eri_deallocate_forces(force_2c_RI)
     327              :          END IF
     328           26 :          CALL mp2_eri_deallocate_forces(force_3c_aux)
     329           26 :          CALL mp2_eri_deallocate_forces(force_3c_orb_mu)
     330           26 :          CALL mp2_eri_deallocate_forces(force_3c_orb_nu)
     331           26 :          CALL dbcsr_deallocate_matrix_set(matrix_P_munu_local)
     332           26 :          CALL dbcsr_deallocate_matrix_set(mat_munu_local)
     333              : 
     334          246 :       ELSEIF (eri_method == do_eri_gpw) THEN
     335          246 :          CALL get_qs_env(qs_env, ks_env=ks_env)
     336              : 
     337          246 :          CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
     338              : 
     339              :          ! Supporting stuff for GPW
     340              :          CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
     341          246 :                           auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_L, sab_orb_sub)
     342              : 
     343              :          ! in case virial is required we need auxiliary pw
     344              :          ! for calculate the MP2-volume contribution to the virial
     345              :          ! (hartree potential derivatives)
     346          246 :          IF (use_virial) THEN
     347           50 :             CALL auxbas_pw_pool%create_pw(rho_g_copy)
     348          200 :             DO i = 1, 3
     349          200 :                CALL auxbas_pw_pool%create_pw(dvg(i))
     350              :             END DO
     351              :          END IF
     352              : 
     353              :          ! start main loop over auxiliary basis functions
     354          246 :          CALL timeset(routineN//"_loop", handle2)
     355              : 
     356          246 :          IF (use_virial) h_stress = 0.0_dp
     357              : 
     358          246 :          L_counter = 0
     359        11052 :          DO LLL = my_group_L_start, my_group_L_end
     360        10806 :             L_counter = L_counter + 1
     361              : 
     362              :             CALL G_P_transform_MO_to_AO(matrix_P_munu%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
     363              :                                         G_P_ia(:, L_counter), matrix_P_inu, &
     364        10806 :                                         mo_coeff_v, mo_coeff_o, eps_filter)
     365              : 
     366              :             CALL integrate_potential_forces_2c(rho_r, LLL, rho_g, atomic_kind_set, &
     367              :                                                qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
     368              :                                                pot_g, mp2_env%potential_parameter, use_virial, &
     369              :                                                rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local(:, L_counter), &
     370        10806 :                                                mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
     371              : 
     372        10806 :             IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     373              : 
     374              :                CALL integrate_potential_forces_2c(rho_r, LLL, rho_g, atomic_kind_set, &
     375              :                                                   qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
     376              :                                                   pot_g, mp2_env%ri_metric, use_virial, &
     377              :                                                   rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local_2(:, L_counter), &
     378          498 :                                                   mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
     379              :             END IF
     380              : 
     381        36714 :             IF (use_virial) pv_virial = virial%pv_virial
     382              :             CALL integrate_potential_forces_3c_1c(mat_munu, rho_r, matrix_P_munu, qs_env, pw_env_sub, &
     383        10806 :                                                   task_list_sub)
     384        10806 :             IF (use_virial) THEN
     385        28067 :                h_stress = h_stress + (virial%pv_virial - pv_virial)
     386        28067 :                virial%pv_virial = pv_virial
     387              :             END IF
     388              : 
     389              :             ! The matrices of G_P_ia are deallocated here
     390              :             CALL update_lagrangian(mat_munu%matrix, matrix_P_inu, Lag_mu_i_1, &
     391              :                                    G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
     392        10806 :                                    eps_filter)
     393              : 
     394              :             CALL integrate_potential_forces_3c_2c(matrix_P_munu, rho_r, rho_g, task_list_sub, pw_env_sub, &
     395              :                                                   mp2_env%ri_metric, &
     396              :                                                   ks_env, poisson_env, pot_g, use_virial, rho_g_copy, dvg, &
     397              :                                                   h_stress, para_env_sub, kind_of, atom_of_kind, &
     398        11052 :                                                   qs_kind_set, particle_set, cell, LLL, mp2_force, dft_control)
     399              :          END DO
     400              : 
     401          246 :          CALL timestop(handle2)
     402              : 
     403          246 :          DEALLOCATE (kind_of)
     404          246 :          DEALLOCATE (atom_of_kind)
     405              : 
     406          246 :          IF (use_virial) THEN
     407           50 :             CALL auxbas_pw_pool%give_back_pw(rho_g_copy)
     408          200 :             DO i = 1, 3
     409          200 :                CALL auxbas_pw_pool%give_back_pw(dvg(i))
     410              :             END DO
     411              :          END IF
     412              : 
     413              :          CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
     414          246 :                           task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_L)
     415              : 
     416          246 :          CALL dbcsr_release(matrix_P_munu%matrix)
     417          246 :          DEALLOCATE (matrix_P_munu%matrix)
     418              : 
     419              :       END IF
     420              : 
     421          872 :       IF (use_virial) mp2_env%ri_grad%mp2_virial = h_stress
     422              : 
     423          272 :       DEALLOCATE (G_PQ_local)
     424          272 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) DEALLOCATE (G_PQ_local_2)
     425              : 
     426          272 :       CALL dbcsr_release(matrix_P_munu_nosym)
     427              : 
     428          630 :       DO ispin = 1, nspins
     429          630 :          CALL dbcsr_release(matrix_P_inu(ispin))
     430              :       END DO
     431          272 :       DEALLOCATE (matrix_P_inu, G_P_ia)
     432              : 
     433              :       ! move the forces in the correct place
     434          272 :       IF (eri_method == do_eri_gpw) THEN
     435          728 :          DO ikind = 1, SIZE(mp2_force)
     436         6546 :             mp2_force(ikind)%mp2_non_sep(:, :) = force(ikind)%rho_elec(:, :)
     437         3760 :             force(ikind)%rho_elec(:, :) = 0.0_dp
     438              :          END DO
     439              :       END IF
     440              : 
     441              :       ! Now we move from the local matrices to the global ones
     442              :       ! defined over all MPI tasks
     443              :       ! Start with moving from the DBCSR to FM for the lagrangians
     444              : 
     445         1804 :       ALLOCATE (L1_mu_i(nspins), L2_nu_a(nspins))
     446          630 :       DO ispin = 1, nspins
     447              :          ! Now we move from the local matrices to the global ones
     448              :          ! defined over all MPI tasks
     449              :          ! Start with moving from the DBCSR to FM for the lagrangians
     450          358 :          NULLIFY (fm_struct_tmp)
     451              :          CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
     452          358 :                                   nrow_global=nao, ncol_global=homo(ispin))
     453          358 :          CALL cp_fm_create(L1_mu_i(ispin), fm_struct_tmp, name="Lag_mu_i")
     454          358 :          CALL cp_fm_struct_release(fm_struct_tmp)
     455          358 :          CALL cp_fm_set_all(L1_mu_i(ispin), 0.0_dp)
     456          358 :          CALL copy_dbcsr_to_fm(matrix=Lag_mu_i_1(ispin), fm=L1_mu_i(ispin))
     457              : 
     458              :          ! release Lag_mu_i_1
     459          358 :          CALL dbcsr_release(Lag_mu_i_1(ispin))
     460              : 
     461          358 :          NULLIFY (fm_struct_tmp)
     462              :          CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
     463          358 :                                   nrow_global=nao, ncol_global=virtual(ispin))
     464          358 :          CALL cp_fm_create(L2_nu_a(ispin), fm_struct_tmp, name="Lag_nu_a")
     465          358 :          CALL cp_fm_struct_release(fm_struct_tmp)
     466          358 :          CALL cp_fm_set_all(L2_nu_a(ispin), 0.0_dp)
     467          358 :          CALL copy_dbcsr_to_fm(matrix=Lag_nu_a_2(ispin), fm=L2_nu_a(ispin))
     468              : 
     469              :          ! release Lag_nu_a_2
     470          630 :          CALL dbcsr_release(Lag_nu_a_2(ispin))
     471              :       END DO
     472          272 :       DEALLOCATE (Lag_mu_i_1, Lag_nu_a_2)
     473              : 
     474              :       ! Set the factor to multiply P_ij (depends on the open or closed shell)
     475          272 :       factor = 1.0_dp
     476          272 :       IF (alpha_beta) factor = 0.50_dp
     477              : 
     478          630 :       DO ispin = 1, nspins
     479              :          CALL create_W_P(qs_env, mp2_env, mo_coeff(ispin), homo(ispin), virtual(ispin), para_env, &
     480              :                          para_env_sub, Eigenval(:, ispin), L1_mu_i(ispin), L2_nu_a(ispin), &
     481          630 :                          factor, ispin)
     482              :       END DO
     483          272 :       DEALLOCATE (L1_mu_i, L2_nu_a)
     484              : 
     485          272 :       CALL timestop(handle)
     486              : 
     487          544 :    END SUBROUTINE calc_ri_mp2_nonsep
     488              : 
     489              : ! **************************************************************************************************
     490              : !> \brief Transforms G_P_ia to G_P_munu
     491              : !> \param G_P_munu The container for G_P_munu, will be allocated and created if not allocated on entry
     492              : !> \param G_P_munu_nosym ...
     493              : !> \param mat_munu ...
     494              : !> \param G_P_ia ...
     495              : !> \param G_P_inu ...
     496              : !> \param mo_coeff_v ...
     497              : !> \param mo_coeff_o ...
     498              : !> \param eps_filter ...
     499              : ! **************************************************************************************************
     500        11912 :    SUBROUTINE G_P_transform_MO_to_AO(G_P_munu, G_P_munu_nosym, mat_munu, G_P_ia, G_P_inu, &
     501        11912 :                                      mo_coeff_v, mo_coeff_o, eps_filter)
     502              :       TYPE(dbcsr_type), POINTER                          :: G_P_munu
     503              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: G_P_munu_nosym, mat_munu
     504              :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: G_P_ia
     505              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: G_P_inu
     506              :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_v, mo_coeff_o
     507              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     508              : 
     509              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_MO_to_AO'
     510              : 
     511              :       INTEGER                                            :: handle
     512              : 
     513        11912 :       IF (.NOT. ASSOCIATED(G_P_munu)) THEN
     514         1352 :          ALLOCATE (G_P_munu)
     515              :          CALL dbcsr_create(G_P_munu, template=mat_munu, &
     516         1352 :                            matrix_type=dbcsr_type_symmetric)
     517              :       END IF
     518              : 
     519        11912 :       CALL G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu_nosym, mo_coeff_v, mo_coeff_o, eps_filter)
     520              : 
     521              :       ! symmetrize
     522        11912 :       CALL timeset(routineN//"_symmetrize", handle)
     523        11912 :       CALL dbcsr_set(G_P_munu, 0.0_dp)
     524        11912 :       CALL dbcsr_transposed(G_P_munu, G_P_munu_nosym)
     525              :       CALL dbcsr_add(G_P_munu, G_P_munu_nosym, &
     526        11912 :                      alpha_scalar=2.0_dp, beta_scalar=2.0_dp)
     527              :       ! this is a trick to avoid that integrate_v_rspace starts to cry
     528        11912 :       CALL dbcsr_copy(mat_munu, G_P_munu, keep_sparsity=.TRUE.)
     529        11912 :       CALL dbcsr_copy(G_P_munu, mat_munu)
     530              : 
     531        11912 :       CALL timestop(handle)
     532              : 
     533        11912 :    END SUBROUTINE G_P_transform_MO_to_AO
     534              : 
     535              : ! **************************************************************************************************
     536              : !> \brief ...
     537              : !> \param G_P_ia ...
     538              : !> \param G_P_inu ...
     539              : !> \param G_P_munu ...
     540              : !> \param mo_coeff_v ...
     541              : !> \param mo_coeff_o ...
     542              : !> \param eps_filter ...
     543              : ! **************************************************************************************************
     544        11912 :    SUBROUTINE G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu, mo_coeff_v, mo_coeff_o, eps_filter)
     545              :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: G_P_ia
     546              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: G_P_inu
     547              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: G_P_munu
     548              :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_v, mo_coeff_o
     549              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     550              : 
     551              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_alpha_beta'
     552              : 
     553              :       INTEGER                                            :: handle, ispin
     554              :       REAL(KIND=dp)                                      :: factor
     555              : 
     556        11912 :       CALL timeset(routineN, handle)
     557              : 
     558        11912 :       factor = 1.0_dp/REAL(SIZE(G_P_ia), dp)
     559              : 
     560        11912 :       CALL dbcsr_set(G_P_munu, 0.0_dp)
     561              : 
     562        27752 :       DO ispin = 1, SIZE(G_P_ia)
     563              :          ! first back-transformation a->nu
     564              :          CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin)%matrix, &
     565        15840 :                              0.0_dp, G_P_inu(ispin), filter_eps=eps_filter)
     566              : 
     567              :          ! second back-transformation i->mu
     568              :          CALL dbcsr_multiply("N", "T", factor, G_P_inu(ispin), mo_coeff_o(ispin)%matrix, &
     569        27752 :                              1.0_dp, G_P_munu, filter_eps=eps_filter)
     570              :       END DO
     571              : 
     572        11912 :       CALL timestop(handle)
     573              : 
     574        11912 :    END SUBROUTINE G_P_transform_alpha_beta
     575              : 
     576              : ! **************************************************************************************************
     577              : !> \brief ...
     578              : !> \param mat_munu ...
     579              : !> \param matrix_P_inu ...
     580              : !> \param Lag_mu_i_1 ...
     581              : !> \param G_P_ia ...
     582              : !> \param mo_coeff_o ...
     583              : !> \param Lag_nu_a_2 ...
     584              : !> \param eps_filter ...
     585              : ! **************************************************************************************************
     586        11912 :    SUBROUTINE update_lagrangian(mat_munu, matrix_P_inu, Lag_mu_i_1, &
     587        23824 :                                 G_P_ia, mo_coeff_o, Lag_nu_a_2, &
     588              :                                 eps_filter)
     589              :       TYPE(dbcsr_type), INTENT(IN)                       :: mat_munu
     590              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: matrix_P_inu, Lag_mu_i_1
     591              :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT)    :: G_P_ia
     592              :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_o
     593              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: Lag_nu_a_2
     594              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     595              : 
     596              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'update_lagrangian'
     597              : 
     598              :       INTEGER                                            :: handle, ispin
     599              : 
     600              :       ! update lagrangian
     601        11912 :       CALL timeset(routineN, handle)
     602              : 
     603        27752 :       DO ispin = 1, SIZE(G_P_ia)
     604              :          ! first contract mat_munu with the half back transformed Gamma_i_nu
     605              :          ! in order to update Lag_mu_i_1
     606              :          CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, matrix_P_inu(ispin), &
     607        15840 :                              1.0_dp, Lag_mu_i_1(ispin), filter_eps=eps_filter)
     608              : 
     609              :          ! transform first index of mat_munu and store the result into matrix_P_inu
     610        15840 :          CALL dbcsr_set(matrix_P_inu(ispin), 0.0_dp)
     611              :          CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, mo_coeff_o(ispin)%matrix, &
     612        15840 :                              0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
     613              : 
     614              :          ! contract the transformend matrix_P_inu with the untransformend Gamma_i_a
     615              :          ! in order to update Lag_nu_a_2
     616              :          CALL dbcsr_multiply("N", "N", -1.0_dp, matrix_P_inu(ispin), G_P_ia(ispin)%matrix, &
     617        15840 :                              1.0_dp, Lag_nu_a_2(ispin), filter_eps=eps_filter)
     618              : 
     619              :          ! release the actual gamma_P_ia
     620        15840 :          CALL dbcsr_release(G_P_ia(ispin)%matrix)
     621        27752 :          DEALLOCATE (G_P_ia(ispin)%matrix)
     622              :       END DO
     623              : 
     624        11912 :       CALL timestop(handle)
     625              : 
     626        11912 :    END SUBROUTINE update_lagrangian
     627              : 
     628              : ! **************************************************************************************************
     629              : !> \brief ...
     630              : !> \param qs_env ...
     631              : !> \param mp2_env ...
     632              : !> \param mo_coeff ...
     633              : !> \param homo ...
     634              : !> \param virtual ...
     635              : !> \param para_env ...
     636              : !> \param para_env_sub ...
     637              : !> \param Eigenval ...
     638              : !> \param L1_mu_i ...
     639              : !> \param L2_nu_a ...
     640              : !> \param factor ...
     641              : !> \param kspin ...
     642              : ! **************************************************************************************************
     643          358 :    SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, para_env, para_env_sub, &
     644          358 :                          Eigenval, L1_mu_i, L2_nu_a, factor, kspin)
     645              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     646              :       TYPE(mp2_type)                                     :: mp2_env
     647              :       TYPE(cp_fm_type), INTENT(IN)                       :: mo_coeff
     648              :       INTEGER, INTENT(IN)                                :: homo, virtual
     649              :       TYPE(mp_para_env_type), POINTER                    :: para_env, para_env_sub
     650              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: Eigenval
     651              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: L1_mu_i, L2_nu_a
     652              :       REAL(KIND=dp), INTENT(IN)                          :: factor
     653              :       INTEGER, INTENT(IN)                                :: kspin
     654              : 
     655              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'create_W_P'
     656              : 
     657              :       INTEGER :: color_exchange, dummy_proc, handle, handle2, handle3, i_global, i_local, iiB, &
     658              :          iii, iproc, itmp(2), j_global, j_local, jjB, max_col_size, max_row_size, &
     659              :          my_B_virtual_end, my_B_virtual_start, mypcol, myprow, nao, ncol_local, ncol_local_1i, &
     660              :          ncol_local_2a, nmo, npcol, nprow, nrow_local, nrow_local_1i, nrow_local_2a, &
     661              :          number_of_rec, number_of_send, proc_receive, proc_receive_static, proc_send, &
     662              :          proc_send_ex, proc_send_static, proc_send_sub, proc_shift, rec_col_size, rec_counter, &
     663              :          rec_row_size, send_col_size, send_counter, send_pcol, send_prow, send_row_size, &
     664              :          size_rec_buffer
     665              :       INTEGER :: size_send_buffer
     666          358 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, map_rec_size, map_send_size, &
     667          358 :                                                             pos_info, pos_info_ex, proc_2_send_pos
     668          358 :       INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, mepos_2_grid, my_col_indeces_info_1i, &
     669          358 :          my_col_indeces_info_2a, my_row_indeces_info_1i, my_row_indeces_info_2a, sizes, sizes_1i, &
     670          358 :          sizes_2a
     671          358 :       INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: col_indeces_info_1i, &
     672          358 :                                                             col_indeces_info_2a, &
     673          358 :                                                             row_indeces_info_1i, &
     674          358 :                                                             row_indeces_info_2a
     675          358 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, col_indices_1i, &
     676          358 :                                                             col_indices_2a, row_indices, &
     677          358 :                                                             row_indices_1i, row_indices_2a
     678          358 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: ab_rec, ab_send, mat_rec, mat_send
     679              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     680              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     681              :       TYPE(cp_fm_type)                                   :: fm_P_ij, L_mu_q
     682              :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     683          358 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     684              :       TYPE(integ_mat_buffer_type_2D), ALLOCATABLE, &
     685          358 :          DIMENSION(:)                                    :: buffer_cyclic
     686              :       TYPE(mp_para_env_type), POINTER                    :: para_env_exchange
     687          358 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
     688              : 
     689          358 :       CALL timeset(routineN, handle)
     690              : 
     691              :       ! create the globally distributed mixed lagrangian
     692          358 :       NULLIFY (blacs_env)
     693          358 :       CALL get_qs_env(qs_env, blacs_env=blacs_env)
     694              : 
     695          358 :       CALL cp_fm_get_info(mo_coeff, nrow_global=nao, ncol_global=nmo)
     696              : 
     697          358 :       NULLIFY (fm_struct_tmp)
     698              :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
     699          358 :                                nrow_global=nao, ncol_global=nmo)
     700          358 :       CALL cp_fm_create(L_mu_q, fm_struct_tmp, name="Lag_mu_q")
     701          358 :       CALL cp_fm_struct_release(fm_struct_tmp)
     702          358 :       CALL cp_fm_set_all(L_mu_q, 0.0_dp)
     703              : 
     704              :       ! create all information array
     705         1074 :       ALLOCATE (pos_info(0:para_env%num_pe - 1))
     706          358 :       CALL para_env%allgather(para_env_sub%mepos, pos_info)
     707              : 
     708              :       ! get matrix information for the global
     709              :       CALL cp_fm_get_info(matrix=L_mu_q, &
     710              :                           nrow_local=nrow_local, &
     711              :                           ncol_local=ncol_local, &
     712              :                           row_indices=row_indices, &
     713          358 :                           col_indices=col_indices)
     714          358 :       myprow = L_mu_q%matrix_struct%context%mepos(1)
     715          358 :       mypcol = L_mu_q%matrix_struct%context%mepos(2)
     716          358 :       nprow = L_mu_q%matrix_struct%context%num_pe(1)
     717          358 :       npcol = L_mu_q%matrix_struct%context%num_pe(2)
     718              : 
     719         1432 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
     720         1432 :       grid_2_mepos = 0
     721          358 :       grid_2_mepos(myprow, mypcol) = para_env%mepos
     722          358 :       CALL para_env%sum(grid_2_mepos)
     723              : 
     724              :       ! get matrix information for L1_mu_i
     725              :       CALL cp_fm_get_info(matrix=L1_mu_i, &
     726              :                           nrow_local=nrow_local_1i, &
     727              :                           ncol_local=ncol_local_1i, &
     728              :                           row_indices=row_indices_1i, &
     729          358 :                           col_indices=col_indices_1i)
     730              : 
     731         1074 :       ALLOCATE (sizes_1i(2, 0:para_env_sub%num_pe - 1))
     732         1074 :       CALL para_env_sub%allgather([nrow_local_1i, ncol_local_1i], sizes_1i)
     733              : 
     734              :       ! get matrix information for L2_nu_a
     735              :       CALL cp_fm_get_info(matrix=L2_nu_a, &
     736              :                           nrow_local=nrow_local_2a, &
     737              :                           ncol_local=ncol_local_2a, &
     738              :                           row_indices=row_indices_2a, &
     739          358 :                           col_indices=col_indices_2a)
     740              : 
     741         1074 :       ALLOCATE (sizes_2a(2, 0:para_env_sub%num_pe - 1))
     742         1074 :       CALL para_env_sub%allgather([nrow_local_2a, ncol_local_2a], sizes_2a)
     743              : 
     744              :       ! Here we perform a ring communication scheme taking into account
     745              :       ! for the sub-group distribution of the source matrices.
     746              :       ! as a first step we need to redistribute the data within
     747              :       ! the subgroup.
     748              :       ! In order to do so we have to allocate the structure
     749              :       ! that will hold the local data involved in communication, this
     750              :       ! structure will be the same for processes in different subgroups
     751              :       ! sharing the same position in the subgroup.
     752              :       ! -1) create the exchange para_env
     753          358 :       color_exchange = para_env_sub%mepos
     754          358 :       ALLOCATE (para_env_exchange)
     755          358 :       CALL para_env_exchange%from_split(para_env, color_exchange)
     756         1074 :       ALLOCATE (pos_info_ex(0:para_env%num_pe - 1))
     757          358 :       CALL para_env%allgather(para_env_exchange%mepos, pos_info_ex)
     758         1074 :       ALLOCATE (sizes(2, 0:para_env_exchange%num_pe - 1))
     759         1074 :       CALL para_env_exchange%allgather([nrow_local, ncol_local], sizes)
     760              : 
     761              :       ! 0) store some info about indeces of the fm matrices (subgroup)
     762          358 :       CALL timeset(routineN//"_inx", handle2)
     763              :       ! matrix L1_mu_i
     764          732 :       max_row_size = MAXVAL(sizes_1i(1, :))
     765          732 :       max_col_size = MAXVAL(sizes_1i(2, :))
     766         1432 :       ALLOCATE (row_indeces_info_1i(2, max_row_size, 0:para_env_sub%num_pe - 1))
     767         1432 :       ALLOCATE (col_indeces_info_1i(2, max_col_size, 0:para_env_sub%num_pe - 1))
     768         1074 :       ALLOCATE (my_row_indeces_info_1i(2, max_row_size))
     769         1074 :       ALLOCATE (my_col_indeces_info_1i(2, max_col_size))
     770        21150 :       row_indeces_info_1i = 0
     771         4854 :       col_indeces_info_1i = 0
     772          358 :       dummy_proc = 0
     773              :       ! row
     774         6954 :       DO iiB = 1, nrow_local_1i
     775         6596 :          i_global = row_indices_1i(iiB)
     776         6596 :          send_prow = L_mu_q%matrix_struct%g2p_row(i_global)
     777         6596 :          i_local = L_mu_q%matrix_struct%g2l_row(i_global)
     778         6596 :          my_row_indeces_info_1i(1, iiB) = send_prow
     779         6954 :          my_row_indeces_info_1i(2, iiB) = i_local
     780              :       END DO
     781              :       ! col
     782         1660 :       DO jjB = 1, ncol_local_1i
     783         1302 :          j_global = col_indices_1i(jjB)
     784         1302 :          send_pcol = L_mu_q%matrix_struct%g2p_col(j_global)
     785         1302 :          j_local = L_mu_q%matrix_struct%g2l_col(j_global)
     786         1302 :          my_col_indeces_info_1i(1, jjB) = send_pcol
     787         1660 :          my_col_indeces_info_1i(2, jjB) = j_local
     788              :       END DO
     789          358 :       CALL para_env_sub%allgather(my_row_indeces_info_1i, row_indeces_info_1i)
     790          358 :       CALL para_env_sub%allgather(my_col_indeces_info_1i, col_indeces_info_1i)
     791          358 :       DEALLOCATE (my_row_indeces_info_1i, my_col_indeces_info_1i)
     792              : 
     793              :       ! matrix L2_nu_a
     794          732 :       max_row_size = MAXVAL(sizes_2a(1, :))
     795          732 :       max_col_size = MAXVAL(sizes_2a(2, :))
     796         1432 :       ALLOCATE (row_indeces_info_2a(2, max_row_size, 0:para_env_sub%num_pe - 1))
     797         1432 :       ALLOCATE (col_indeces_info_2a(2, max_col_size, 0:para_env_sub%num_pe - 1))
     798         1074 :       ALLOCATE (my_row_indeces_info_2a(2, max_row_size))
     799         1074 :       ALLOCATE (my_col_indeces_info_2a(2, max_col_size))
     800        21150 :       row_indeces_info_2a = 0
     801        18066 :       col_indeces_info_2a = 0
     802              :       ! row
     803         6954 :       DO iiB = 1, nrow_local_2a
     804         6596 :          i_global = row_indices_2a(iiB)
     805         6596 :          send_prow = L_mu_q%matrix_struct%g2p_row(i_global)
     806         6596 :          i_local = L_mu_q%matrix_struct%g2l_row(i_global)
     807         6596 :          my_row_indeces_info_2a(1, iiB) = send_prow
     808         6954 :          my_row_indeces_info_2a(2, iiB) = i_local
     809              :       END DO
     810              :       ! col
     811         5828 :       DO jjB = 1, ncol_local_2a
     812         5470 :          j_global = col_indices_2a(jjB) + homo
     813         5470 :          send_pcol = L_mu_q%matrix_struct%g2p_col(j_global)
     814         5470 :          j_local = L_mu_q%matrix_struct%g2l_col(j_global)
     815         5470 :          my_col_indeces_info_2a(1, jjB) = send_pcol
     816         5828 :          my_col_indeces_info_2a(2, jjB) = j_local
     817              :       END DO
     818          358 :       CALL para_env_sub%allgather(my_row_indeces_info_2a, row_indeces_info_2a)
     819          358 :       CALL para_env_sub%allgather(my_col_indeces_info_2a, col_indeces_info_2a)
     820          358 :       DEALLOCATE (my_row_indeces_info_2a, my_col_indeces_info_2a)
     821          358 :       CALL timestop(handle2)
     822              : 
     823              :       ! 1) define the map for sending data in the subgroup starting with L1_mu_i
     824          358 :       CALL timeset(routineN//"_subinfo", handle2)
     825         1074 :       ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
     826          732 :       map_send_size = 0
     827         1660 :       DO jjB = 1, ncol_local_1i
     828         1302 :          send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
     829        26236 :          DO iiB = 1, nrow_local_1i
     830        24576 :             send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
     831        24576 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     832        24576 :             proc_send_sub = pos_info(proc_send)
     833        25878 :             map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
     834              :          END DO
     835              :       END DO
     836              :       ! and the same for L2_nu_a
     837         5828 :       DO jjB = 1, ncol_local_2a
     838         5470 :          send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
     839       126994 :          DO iiB = 1, nrow_local_2a
     840       121166 :             send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
     841       121166 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     842       121166 :             proc_send_sub = pos_info(proc_send)
     843       126636 :             map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
     844              :          END DO
     845              :       END DO
     846              :       ! and exchange data in order to create map_rec_size
     847         1074 :       ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
     848          732 :       map_rec_size = 0
     849          358 :       CALL para_env_sub%alltoall(map_send_size, map_rec_size, 1)
     850          358 :       CALL timestop(handle2)
     851              : 
     852              :       ! 2) reorder data in sending buffer
     853          358 :       CALL timeset(routineN//"_sub_Bsend", handle2)
     854              :       ! count the number of messages (include myself)
     855          358 :       number_of_send = 0
     856          732 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     857          374 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     858          732 :          IF (map_send_size(proc_send) > 0) THEN
     859          358 :             number_of_send = number_of_send + 1
     860              :          END IF
     861              :       END DO
     862              :       ! allocate the structure that will hold the messages to be sent
     863         1432 :       ALLOCATE (buffer_send(number_of_send))
     864          358 :       send_counter = 0
     865         1074 :       ALLOCATE (proc_2_send_pos(0:para_env_sub%num_pe - 1))
     866          732 :       proc_2_send_pos = 0
     867          732 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     868          374 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     869          374 :          size_send_buffer = map_send_size(proc_send)
     870          732 :          IF (map_send_size(proc_send) > 0) THEN
     871          358 :             send_counter = send_counter + 1
     872              :             ! allocate the sending buffer (msg)
     873         1074 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
     874       146100 :             buffer_send(send_counter)%msg = 0.0_dp
     875          358 :             buffer_send(send_counter)%proc = proc_send
     876          358 :             proc_2_send_pos(proc_send) = send_counter
     877              :          END IF
     878              :       END DO
     879              :       ! loop over the locally held data and fill the buffer_send
     880              :       ! for doing that we need an array that keep track if the
     881              :       ! sequential increase of the index for each message
     882         1074 :       ALLOCATE (iii_vet(number_of_send))
     883          716 :       iii_vet = 0
     884         1660 :       DO jjB = 1, ncol_local_1i
     885         1302 :          send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
     886        26236 :          DO iiB = 1, nrow_local_1i
     887        24576 :             send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
     888        24576 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     889        24576 :             proc_send_sub = pos_info(proc_send)
     890        24576 :             send_counter = proc_2_send_pos(proc_send_sub)
     891        24576 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
     892        24576 :             iii = iii_vet(send_counter)
     893        25878 :             buffer_send(send_counter)%msg(iii) = L1_mu_i%local_data(iiB, jjB)
     894              :          END DO
     895              :       END DO
     896              :       ! release the local data of L1_mu_i
     897          358 :       DEALLOCATE (L1_mu_i%local_data)
     898              :       ! and the same for L2_nu_a
     899         5828 :       DO jjB = 1, ncol_local_2a
     900         5470 :          send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
     901       126994 :          DO iiB = 1, nrow_local_2a
     902       121166 :             send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
     903       121166 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     904       121166 :             proc_send_sub = pos_info(proc_send)
     905       121166 :             send_counter = proc_2_send_pos(proc_send_sub)
     906       121166 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
     907       121166 :             iii = iii_vet(send_counter)
     908       126636 :             buffer_send(send_counter)%msg(iii) = L2_nu_a%local_data(iiB, jjB)
     909              :          END DO
     910              :       END DO
     911          358 :       DEALLOCATE (L2_nu_a%local_data)
     912          358 :       DEALLOCATE (proc_2_send_pos)
     913          358 :       DEALLOCATE (iii_vet)
     914          358 :       CALL timestop(handle2)
     915              : 
     916              :       ! 3) create the buffer for receive, post the message with irecv
     917              :       !    and send the messages non-blocking
     918          358 :       CALL timeset(routineN//"_sub_isendrecv", handle2)
     919              :       ! count the number of messages to be received
     920          358 :       number_of_rec = 0
     921          732 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     922          374 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     923          732 :          IF (map_rec_size(proc_receive) > 0) THEN
     924          358 :             number_of_rec = number_of_rec + 1
     925              :          END IF
     926              :       END DO
     927         1432 :       ALLOCATE (buffer_rec(number_of_rec))
     928          358 :       rec_counter = 0
     929          732 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     930          374 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     931          374 :          size_rec_buffer = map_rec_size(proc_receive)
     932          732 :          IF (map_rec_size(proc_receive) > 0) THEN
     933          358 :             rec_counter = rec_counter + 1
     934              :             ! prepare the buffer for receive
     935         1074 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
     936       146100 :             buffer_rec(rec_counter)%msg = 0.0_dp
     937          358 :             buffer_rec(rec_counter)%proc = proc_receive
     938              :             ! post the message to be received (not need to send to myself)
     939          358 :             IF (proc_receive /= para_env_sub%mepos) THEN
     940              :                CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
     941            0 :                                        buffer_rec(rec_counter)%msg_req)
     942              :             END IF
     943              :          END IF
     944              :       END DO
     945              :       ! send messages
     946         1432 :       ALLOCATE (req_send(number_of_send))
     947          716 :       req_send = mp_request_null
     948          358 :       send_counter = 0
     949          732 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     950          374 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     951          732 :          IF (map_send_size(proc_send) > 0) THEN
     952          358 :             send_counter = send_counter + 1
     953          358 :             IF (proc_send == para_env_sub%mepos) THEN
     954       146100 :                buffer_rec(send_counter)%msg(:) = buffer_send(send_counter)%msg
     955              :             ELSE
     956              :                CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
     957            0 :                                        buffer_send(send_counter)%msg_req)
     958            0 :                req_send(send_counter) = buffer_send(send_counter)%msg_req
     959              :             END IF
     960              :          END IF
     961              :       END DO
     962          358 :       DEALLOCATE (map_send_size)
     963          358 :       CALL timestop(handle2)
     964              : 
     965              :       ! 4) (if memory is a problem we should move this part after point 5)
     966              :       !    Here we create the new buffer for cyclic(ring) communication and
     967              :       !    we fill it with the data received from the other member of the
     968              :       !    subgroup
     969          358 :       CALL timeset(routineN//"_Bcyclic", handle2)
     970              :       ! first allocata new structure
     971         1774 :       ALLOCATE (buffer_cyclic(0:para_env_exchange%num_pe - 1))
     972         1058 :       DO iproc = 0, para_env_exchange%num_pe - 1
     973          700 :          rec_row_size = sizes(1, iproc)
     974          700 :          rec_col_size = sizes(2, iproc)
     975         2800 :          ALLOCATE (buffer_cyclic(iproc)%msg(rec_row_size, rec_col_size))
     976       159964 :          buffer_cyclic(iproc)%msg = 0.0_dp
     977              :       END DO
     978              :       ! now collect data from other member of the subgroup and fill
     979              :       ! buffer_cyclic
     980          358 :       rec_counter = 0
     981          732 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     982          374 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     983          374 :          size_rec_buffer = map_rec_size(proc_receive)
     984          732 :          IF (map_rec_size(proc_receive) > 0) THEN
     985          358 :             rec_counter = rec_counter + 1
     986              : 
     987              :             ! wait for the message
     988          358 :             IF (proc_receive /= para_env_sub%mepos) CALL buffer_rec(rec_counter)%msg_req%wait()
     989              : 
     990          358 :             CALL timeset(routineN//"_fill", handle3)
     991          358 :             iii = 0
     992         1660 :             DO jjB = 1, sizes_1i(2, proc_receive)
     993         1302 :                send_pcol = col_indeces_info_1i(1, jjB, proc_receive)
     994         1302 :                j_local = col_indeces_info_1i(2, jjB, proc_receive)
     995        26236 :                DO iiB = 1, sizes_1i(1, proc_receive)
     996        24576 :                   send_prow = row_indeces_info_1i(1, iiB, proc_receive)
     997        24576 :                   proc_send = grid_2_mepos(send_prow, send_pcol)
     998        24576 :                   proc_send_sub = pos_info(proc_send)
     999        24576 :                   IF (proc_send_sub /= para_env_sub%mepos) CYCLE
    1000        24576 :                   iii = iii + 1
    1001        24576 :                   i_local = row_indeces_info_1i(2, iiB, proc_receive)
    1002        24576 :                   proc_send_ex = pos_info_ex(proc_send)
    1003        25878 :                   buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
    1004              :                END DO
    1005              :             END DO
    1006              :             ! and the same for L2_nu_a
    1007         5828 :             DO jjB = 1, sizes_2a(2, proc_receive)
    1008         5470 :                send_pcol = col_indeces_info_2a(1, jjB, proc_receive)
    1009         5470 :                j_local = col_indeces_info_2a(2, jjB, proc_receive)
    1010       126994 :                DO iiB = 1, sizes_2a(1, proc_receive)
    1011       121166 :                   send_prow = row_indeces_info_2a(1, iiB, proc_receive)
    1012       121166 :                   proc_send = grid_2_mepos(send_prow, send_pcol)
    1013       121166 :                   proc_send_sub = pos_info(proc_send)
    1014       121166 :                   IF (proc_send_sub /= para_env_sub%mepos) CYCLE
    1015       121166 :                   iii = iii + 1
    1016       121166 :                   i_local = row_indeces_info_2a(2, iiB, proc_receive)
    1017       121166 :                   proc_send_ex = pos_info_ex(proc_send)
    1018       126636 :                   buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
    1019              :                END DO
    1020              :             END DO
    1021          358 :             CALL timestop(handle3)
    1022              : 
    1023              :             ! deallocate the received message
    1024          358 :             DEALLOCATE (buffer_rec(rec_counter)%msg)
    1025              :          END IF
    1026              :       END DO
    1027          358 :       DEALLOCATE (row_indeces_info_1i)
    1028          358 :       DEALLOCATE (col_indeces_info_1i)
    1029          358 :       DEALLOCATE (row_indeces_info_2a)
    1030          358 :       DEALLOCATE (col_indeces_info_2a)
    1031          716 :       DEALLOCATE (buffer_rec)
    1032          358 :       DEALLOCATE (map_rec_size)
    1033          358 :       CALL timestop(handle2)
    1034              : 
    1035              :       ! 5)  Wait for all messeges to be sent in the subgroup
    1036          358 :       CALL timeset(routineN//"_sub_waitall", handle2)
    1037          358 :       CALL mp_waitall(req_send(:))
    1038          716 :       DO send_counter = 1, number_of_send
    1039          716 :          DEALLOCATE (buffer_send(send_counter)%msg)
    1040              :       END DO
    1041          716 :       DEALLOCATE (buffer_send)
    1042          358 :       DEALLOCATE (req_send)
    1043          358 :       CALL timestop(handle2)
    1044              : 
    1045              :       ! 6) Start with ring communication
    1046          358 :       CALL timeset(routineN//"_ring", handle2)
    1047          358 :       proc_send_static = MODULO(para_env_exchange%mepos + 1, para_env_exchange%num_pe)
    1048          358 :       proc_receive_static = MODULO(para_env_exchange%mepos - 1, para_env_exchange%num_pe)
    1049         1058 :       max_row_size = MAXVAL(sizes(1, :))
    1050         1058 :       max_col_size = MAXVAL(sizes(2, :))
    1051         1432 :       ALLOCATE (mat_send(max_row_size, max_col_size))
    1052         1074 :       ALLOCATE (mat_rec(max_row_size, max_col_size))
    1053        88168 :       mat_send = 0.0_dp
    1054        82264 :       mat_send(1:nrow_local, 1:ncol_local) = buffer_cyclic(para_env_exchange%mepos)%msg(:, :)
    1055          358 :       DEALLOCATE (buffer_cyclic(para_env_exchange%mepos)%msg)
    1056          700 :       DO proc_shift = 1, para_env_exchange%num_pe - 1
    1057          342 :          proc_receive = MODULO(para_env_exchange%mepos - proc_shift, para_env_exchange%num_pe)
    1058              : 
    1059          342 :          rec_row_size = sizes(1, proc_receive)
    1060          342 :          rec_col_size = sizes(2, proc_receive)
    1061              : 
    1062        83246 :          mat_rec = 0.0_dp
    1063              :          CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
    1064          342 :                                          mat_rec, proc_receive_static)
    1065              : 
    1066        83246 :          mat_send = 0.0_dp
    1067              :          mat_send(1:rec_row_size, 1:rec_col_size) = mat_rec(1:rec_row_size, 1:rec_col_size) + &
    1068        77342 :                                                     buffer_cyclic(proc_receive)%msg(:, :)
    1069              : 
    1070          700 :          DEALLOCATE (buffer_cyclic(proc_receive)%msg)
    1071              :       END DO
    1072              :       ! and finally
    1073              :       CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
    1074          358 :                                       mat_rec, proc_receive_static)
    1075        82264 :       L_mu_q%local_data(1:nrow_local, 1:ncol_local) = mat_rec(1:nrow_local, 1:ncol_local)
    1076         1416 :       DEALLOCATE (buffer_cyclic)
    1077          358 :       DEALLOCATE (mat_send)
    1078          358 :       DEALLOCATE (mat_rec)
    1079          358 :       CALL timestop(handle2)
    1080              : 
    1081              :       ! release para_env_exchange
    1082          358 :       CALL mp_para_env_release(para_env_exchange)
    1083              : 
    1084          358 :       CALL cp_fm_release(L1_mu_i)
    1085          358 :       CALL cp_fm_release(L2_nu_a)
    1086          358 :       DEALLOCATE (pos_info_ex)
    1087          358 :       DEALLOCATE (grid_2_mepos)
    1088          358 :       DEALLOCATE (sizes)
    1089          358 :       DEALLOCATE (sizes_1i)
    1090          358 :       DEALLOCATE (sizes_2a)
    1091              : 
    1092              :       ! update the P_ij block of P_MP2 with the
    1093              :       ! non-singular ij pairs
    1094          358 :       CALL timeset(routineN//"_Pij", handle2)
    1095          358 :       NULLIFY (fm_struct_tmp)
    1096              :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1097          358 :                                nrow_global=homo, ncol_global=homo)
    1098          358 :       CALL cp_fm_create(fm_P_ij, fm_struct_tmp, name="fm_P_ij")
    1099          358 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1100          358 :       CALL cp_fm_set_all(fm_P_ij, 0.0_dp)
    1101              : 
    1102              :       ! we have it, update P_ij local
    1103              :       CALL cp_fm_get_info(matrix=fm_P_ij, &
    1104              :                           nrow_local=nrow_local, &
    1105              :                           ncol_local=ncol_local, &
    1106              :                           row_indices=row_indices, &
    1107          358 :                           col_indices=col_indices)
    1108              : 
    1109          358 :       IF (.NOT. mp2_env%method == ri_rpa_method_gpw) THEN
    1110              :          CALL parallel_gemm('T', 'N', homo, homo, nao, 1.0_dp, &
    1111              :                             mo_coeff, L_mu_q, 0.0_dp, fm_P_ij, &
    1112              :                             a_first_col=1, &
    1113              :                             a_first_row=1, &
    1114              :                             b_first_col=1, &
    1115              :                             b_first_row=1, &
    1116              :                             c_first_col=1, &
    1117          330 :                             c_first_row=1)
    1118              :          CALL parallel_gemm('T', 'N', homo, homo, nao, -2.0_dp, &
    1119              :                             L_mu_q, mo_coeff, 2.0_dp, fm_P_ij, &
    1120              :                             a_first_col=1, &
    1121              :                             a_first_row=1, &
    1122              :                             b_first_col=1, &
    1123              :                             b_first_row=1, &
    1124              :                             c_first_col=1, &
    1125          330 :                             c_first_row=1)
    1126              : 
    1127         1524 :          DO jjB = 1, ncol_local
    1128         1194 :             j_global = col_indices(jjB)
    1129         3813 :             DO iiB = 1, nrow_local
    1130         2289 :                i_global = row_indices(iiB)
    1131              :                ! diagonal elements and nearly degenerate ij pairs already updated
    1132         3483 :                IF (ABS(Eigenval(j_global) - Eigenval(i_global)) < mp2_env%ri_grad%eps_canonical) THEN
    1133          679 :                   fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
    1134              :                ELSE
    1135              :                   fm_P_ij%local_data(iiB, jjB) = &
    1136         1610 :                      factor*fm_P_ij%local_data(iiB, jjB)/(Eigenval(j_global) - Eigenval(i_global))
    1137              :                END IF
    1138              :             END DO
    1139              :          END DO
    1140              :       ELSE
    1141          136 :          DO jjB = 1, ncol_local
    1142          108 :             j_global = col_indices(jjB)
    1143          346 :             DO iiB = 1, nrow_local
    1144          210 :                i_global = row_indices(iiB)
    1145          318 :                fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
    1146              :             END DO
    1147              :          END DO
    1148              :       END IF
    1149              :       ! deallocate the local P_ij
    1150          358 :       DEALLOCATE (mp2_env%ri_grad%P_ij(kspin)%array)
    1151          358 :       CALL timestop(handle2)
    1152              : 
    1153              :       ! Now create and fill the P matrix (MO)
    1154              :       ! FOR NOW WE ASSUME P_ab AND P_ij ARE REPLICATED OVER EACH MPI RANK
    1155          358 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%P_mo)) THEN
    1156         1174 :          ALLOCATE (mp2_env%ri_grad%P_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1157              :       END IF
    1158              : 
    1159          358 :       CALL timeset(routineN//"_PMO", handle2)
    1160          358 :       NULLIFY (fm_struct_tmp)
    1161              :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1162          358 :                                nrow_global=nmo, ncol_global=nmo)
    1163          358 :       CALL cp_fm_create(mp2_env%ri_grad%P_mo(kspin), fm_struct_tmp, name="P_MP2_MO")
    1164          358 :       CALL cp_fm_set_all(mp2_env%ri_grad%P_mo(kspin), 0.0_dp)
    1165              : 
    1166              :       ! start with the (easy) occ-occ block and locally held P_ab elements
    1167          358 :       itmp = get_limit(virtual, para_env_sub%num_pe, para_env_sub%mepos)
    1168          358 :       my_B_virtual_start = itmp(1)
    1169          358 :       my_B_virtual_end = itmp(2)
    1170              : 
    1171              :       ! Fill occ-occ block
    1172          358 :       CALL cp_fm_to_fm_submat(fm_P_ij, mp2_env%ri_grad%P_mo(kspin), homo, homo, 1, 1, 1, 1)
    1173          358 :       CALL cp_fm_release(fm_P_ij)
    1174              : 
    1175              :       CALL cp_fm_get_info(mp2_env%ri_grad%P_mo(kspin), &
    1176              :                           nrow_local=nrow_local, &
    1177              :                           ncol_local=ncol_local, &
    1178              :                           row_indices=row_indices, &
    1179          358 :                           col_indices=col_indices)
    1180              : 
    1181          358 :       IF (mp2_env%method == ri_mp2_laplace) THEN
    1182              :          CALL parallel_gemm('T', 'N', virtual, virtual, nao, 1.0_dp, &
    1183              :                             mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%P_mo(kspin), &
    1184              :                             a_first_col=homo + 1, &
    1185              :                             a_first_row=1, &
    1186              :                             b_first_col=homo + 1, &
    1187              :                             b_first_row=1, &
    1188              :                             c_first_col=homo + 1, &
    1189           24 :                             c_first_row=homo + 1)
    1190              :          CALL parallel_gemm('T', 'N', virtual, virtual, nao, -2.0_dp, &
    1191              :                             L_mu_q, mo_coeff, 2.0_dp, mp2_env%ri_grad%P_mo(kspin), &
    1192              :                             a_first_col=homo + 1, &
    1193              :                             a_first_row=1, &
    1194              :                             b_first_col=homo + 1, &
    1195              :                             b_first_row=1, &
    1196              :                             c_first_col=homo + 1, &
    1197           24 :                             c_first_row=homo + 1)
    1198              :       END IF
    1199              : 
    1200          358 :       IF (mp2_env%method == ri_mp2_method_gpw .OR. mp2_env%method == ri_rpa_method_gpw) THEN
    1201              :          ! With MP2 and RPA, we have already calculated the density matrix elements
    1202         6514 :          DO jjB = 1, ncol_local
    1203         6180 :             j_global = col_indices(jjB)
    1204         6180 :             IF (j_global <= homo) CYCLE
    1205        61358 :             DO iiB = 1, nrow_local
    1206        56054 :                i_global = row_indices(iiB)
    1207        62234 :                IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
    1208              :                   mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1209        46400 :                      mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
    1210              :                END IF
    1211              :             END DO
    1212              :          END DO
    1213           24 :       ELSE IF (mp2_env%method == ri_mp2_laplace) THEN
    1214              :          ! With Laplace-SOS-MP2, we still have to calculate the matrix elements of the non-degenerate pairs
    1215          616 :          DO jjB = 1, ncol_local
    1216          592 :             j_global = col_indices(jjB)
    1217          592 :             IF (j_global <= homo) CYCLE
    1218         6764 :             DO iiB = 1, nrow_local
    1219         6240 :                i_global = row_indices(iiB)
    1220         6832 :                IF (ABS(Eigenval(i_global) - Eigenval(j_global)) < mp2_env%ri_grad%eps_canonical) THEN
    1221          565 :                   IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
    1222              :                      mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1223          552 :                         mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
    1224              :                   ELSE
    1225           13 :                      mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = 0.0_dp
    1226              :                   END IF
    1227              :                ELSE
    1228              :                   mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1229              :                      factor*mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB)/ &
    1230         5675 :                      (Eigenval(i_global) - Eigenval(j_global))
    1231              :                END IF
    1232              :             END DO
    1233              :          END DO
    1234              :       ELSE
    1235            0 :          CPABORT("Calculation of virt-virt block of density matrix is dealt with elsewhere!")
    1236              :       END IF
    1237              : 
    1238              :       ! send around the sub_group the local data and check if we
    1239              :       ! have to update our block with external elements
    1240         1074 :       ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
    1241         1074 :       CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
    1242              : 
    1243         1074 :       ALLOCATE (sizes(2, 0:para_env_sub%num_pe - 1))
    1244         1074 :       CALL para_env_sub%allgather([nrow_local, ncol_local], sizes)
    1245              : 
    1246         1432 :       ALLOCATE (ab_rec(nrow_local, ncol_local))
    1247          374 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1248           16 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1249           16 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1250              : 
    1251           16 :          send_prow = mepos_2_grid(1, proc_send)
    1252           16 :          send_pcol = mepos_2_grid(2, proc_send)
    1253              : 
    1254           16 :          send_row_size = sizes(1, proc_send)
    1255           16 :          send_col_size = sizes(2, proc_send)
    1256              : 
    1257           64 :          ALLOCATE (ab_send(send_row_size, send_col_size))
    1258         4922 :          ab_send = 0.0_dp
    1259              : 
    1260              :          ! first loop over row since in this way we can cycle
    1261          206 :          DO iiB = 1, send_row_size
    1262          190 :             i_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_row(iiB, send_prow)
    1263          190 :             IF (i_global <= homo) CYCLE
    1264          154 :             i_global = i_global - homo
    1265          154 :             IF (.NOT. (my_B_virtual_start <= i_global .AND. i_global <= my_B_virtual_end)) CYCLE
    1266          655 :             DO jjB = 1, send_col_size
    1267          614 :                j_global = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%l2g_col(jjB, send_pcol)
    1268          614 :                IF (j_global <= homo) CYCLE
    1269          487 :                j_global = j_global - homo
    1270          804 :                ab_send(iiB, jjB) = mp2_env%ri_grad%P_ab(kspin)%array(i_global - my_B_virtual_start + 1, j_global)
    1271              :             END DO
    1272              :          END DO
    1273              : 
    1274         4922 :          ab_rec = 0.0_dp
    1275              :          CALL para_env_sub%sendrecv(ab_send, proc_send, &
    1276           16 :                                     ab_rec, proc_receive)
    1277              :          mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) = &
    1278              :             mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) + &
    1279         4922 :             ab_rec(1:nrow_local, 1:ncol_local)
    1280              : 
    1281          374 :          DEALLOCATE (ab_send)
    1282              :       END DO
    1283          358 :       DEALLOCATE (ab_rec)
    1284          358 :       DEALLOCATE (mepos_2_grid)
    1285          358 :       DEALLOCATE (sizes)
    1286              : 
    1287              :       ! deallocate the local P_ab
    1288          358 :       DEALLOCATE (mp2_env%ri_grad%P_ab(kspin)%array)
    1289          358 :       CALL timestop(handle2)
    1290              : 
    1291              :       ! create also W_MP2_MO
    1292          358 :       CALL timeset(routineN//"_WMO", handle2)
    1293          358 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%W_mo)) THEN
    1294         1174 :          ALLOCATE (mp2_env%ri_grad%W_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1295              :       END IF
    1296              : 
    1297          358 :       CALL cp_fm_create(mp2_env%ri_grad%W_mo(kspin), fm_struct_tmp, name="W_MP2_MO")
    1298          358 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1299              : 
    1300              :       ! all block
    1301              :       CALL parallel_gemm('T', 'N', nmo, nmo, nao, 2.0_dp*factor, &
    1302              :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1303              :                          a_first_col=1, &
    1304              :                          a_first_row=1, &
    1305              :                          b_first_col=1, &
    1306              :                          b_first_row=1, &
    1307              :                          c_first_col=1, &
    1308          358 :                          c_first_row=1)
    1309              : 
    1310              :       ! occ-occ block
    1311              :       CALL parallel_gemm('T', 'N', homo, homo, nao, -2.0_dp*factor, &
    1312              :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1313              :                          a_first_col=1, &
    1314              :                          a_first_row=1, &
    1315              :                          b_first_col=1, &
    1316              :                          b_first_row=1, &
    1317              :                          c_first_col=1, &
    1318          358 :                          c_first_row=1)
    1319              : 
    1320              :       ! occ-virt block
    1321              :       CALL parallel_gemm('T', 'N', homo, virtual, nao, 2.0_dp*factor, &
    1322              :                          mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1323              :                          a_first_col=1, &
    1324              :                          a_first_row=1, &
    1325              :                          b_first_col=homo + 1, &
    1326              :                          b_first_row=1, &
    1327              :                          c_first_col=homo + 1, &
    1328          358 :                          c_first_row=1)
    1329          358 :       CALL timestop(handle2)
    1330              : 
    1331              :       ! Calculate occ-virt block of the lagrangian in MO
    1332          358 :       CALL timeset(routineN//"_Ljb", handle2)
    1333          358 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%L_jb)) THEN
    1334         1174 :          ALLOCATE (mp2_env%ri_grad%L_jb(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1335              :       END IF
    1336              : 
    1337              :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1338          358 :                                nrow_global=homo, ncol_global=virtual)
    1339          358 :       CALL cp_fm_create(mp2_env%ri_grad%L_jb(kspin), fm_struct_tmp, name="fm_L_jb", set_zero=.TRUE.)
    1340          358 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1341              : 
    1342              :       ! first Virtual
    1343              :       CALL parallel_gemm('T', 'N', homo, virtual, nao, 2.0_dp*factor, &
    1344              :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%L_jb(kspin), &
    1345              :                          a_first_col=1, &
    1346              :                          a_first_row=1, &
    1347              :                          b_first_col=homo + 1, &
    1348              :                          b_first_row=1, &
    1349              :                          c_first_col=1, &
    1350          358 :                          c_first_row=1)
    1351              :       ! then occupied
    1352              :       CALL parallel_gemm('T', 'N', homo, virtual, nao, 2.0_dp*factor, &
    1353              :                          mo_coeff, L_mu_q, 1.0_dp, mp2_env%ri_grad%L_jb(kspin), &
    1354              :                          a_first_col=1, &
    1355              :                          a_first_row=1, &
    1356              :                          b_first_col=homo + 1, &
    1357              :                          b_first_row=1, &
    1358              :                          c_first_col=1, &
    1359          358 :                          c_first_row=1)
    1360              : 
    1361              :       ! finally release L_mu_q
    1362          358 :       CALL cp_fm_release(L_mu_q)
    1363          358 :       CALL timestop(handle2)
    1364              : 
    1365              :       ! here we should be done next CPHF
    1366              : 
    1367          358 :       DEALLOCATE (pos_info)
    1368              : 
    1369          358 :       CALL timestop(handle)
    1370              : 
    1371         7518 :    END SUBROUTINE create_W_P
    1372              : 
    1373              : END MODULE mp2_ri_grad

Generated by: LCOV version 2.0-1