LCOV - code coverage report
Current view: top level - src - mp2_ri_grad.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:e7e05ae) Lines: 615 623 98.7 %
Date: 2024-04-18 06:59:28 Functions: 5 5 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Routines to calculate gradients of RI-GPW-MP2 energy using pw
      10             : !> \par History
      11             : !>      10.2013 created [Mauro Del Ben]
      12             : ! **************************************************************************************************
      13             : MODULE mp2_ri_grad
      14             :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      15             :                                               get_atomic_kind_set
      16             :    USE cell_types,                      ONLY: cell_type
      17             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      18             :    USE cp_control_types,                ONLY: dft_control_type
      19             :    USE cp_dbcsr_operations,             ONLY: copy_dbcsr_to_fm,&
      20             :                                               dbcsr_deallocate_matrix_set
      21             :    USE cp_eri_mme_interface,            ONLY: cp_eri_mme_param
      22             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      23             :                                               cp_fm_struct_release,&
      24             :                                               cp_fm_struct_type
      25             :    USE cp_fm_types,                     ONLY: &
      26             :         cp_fm_create, cp_fm_get_info, cp_fm_indxg2l, cp_fm_indxg2p, cp_fm_indxl2g, cp_fm_release, &
      27             :         cp_fm_set_all, cp_fm_to_fm_submat, cp_fm_type
      28             :    USE dbcsr_api,                       ONLY: &
      29             :         dbcsr_add, dbcsr_copy, dbcsr_copy_into_existing, dbcsr_create, dbcsr_multiply, &
      30             :         dbcsr_p_type, dbcsr_release, dbcsr_set, dbcsr_transposed, dbcsr_type, &
      31             :         dbcsr_type_no_symmetry, dbcsr_type_symmetric
      32             :    USE input_constants,                 ONLY: do_eri_gpw,&
      33             :                                               do_eri_mme,&
      34             :                                               ri_mp2_laplace,&
      35             :                                               ri_mp2_method_gpw,&
      36             :                                               ri_rpa_method_gpw
      37             :    USE kinds,                           ONLY: dp
      38             :    USE libint_2c_3c,                    ONLY: compare_potential_types
      39             :    USE message_passing,                 ONLY: mp_para_env_release,&
      40             :                                               mp_para_env_type,&
      41             :                                               mp_request_null,&
      42             :                                               mp_request_type,&
      43             :                                               mp_waitall
      44             :    USE mp2_eri,                         ONLY: mp2_eri_2c_integrate,&
      45             :                                               mp2_eri_3c_integrate,&
      46             :                                               mp2_eri_deallocate_forces,&
      47             :                                               mp2_eri_force
      48             :    USE mp2_eri_gpw,                     ONLY: cleanup_gpw,&
      49             :                                               integrate_potential_forces_2c,&
      50             :                                               integrate_potential_forces_3c_1c,&
      51             :                                               integrate_potential_forces_3c_2c,&
      52             :                                               prepare_gpw
      53             :    USE mp2_types,                       ONLY: integ_mat_buffer_type,&
      54             :                                               integ_mat_buffer_type_2D,&
      55             :                                               mp2_type
      56             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      57             :    USE particle_types,                  ONLY: particle_type
      58             :    USE pw_env_types,                    ONLY: pw_env_type
      59             :    USE pw_poisson_types,                ONLY: pw_poisson_type
      60             :    USE pw_pool_types,                   ONLY: pw_pool_type
      61             :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      62             :                                               pw_r3d_rs_type
      63             :    USE qs_environment_types,            ONLY: get_qs_env,&
      64             :                                               qs_environment_type
      65             :    USE qs_force_types,                  ONLY: allocate_qs_force,&
      66             :                                               qs_force_type,&
      67             :                                               zero_qs_force
      68             :    USE qs_kind_types,                   ONLY: qs_kind_type
      69             :    USE qs_ks_types,                     ONLY: qs_ks_env_type
      70             :    USE qs_neighbor_list_types,          ONLY: neighbor_list_set_p_type
      71             :    USE task_list_types,                 ONLY: task_list_type
      72             :    USE util,                            ONLY: get_limit
      73             :    USE virial_types,                    ONLY: virial_type
      74             : #include "./base/base_uses.f90"
      75             : 
      76             :    IMPLICIT NONE
      77             : 
      78             :    PRIVATE
      79             : 
      80             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_ri_grad'
      81             : 
      82             :    PUBLIC :: calc_ri_mp2_nonsep
      83             : 
      84             : CONTAINS
      85             : 
      86             : ! **************************************************************************************************
      87             : !> \brief Calculate the non-separable part of the gradients and update the
      88             : !>        Lagrangian
      89             : !> \param qs_env ...
      90             : !> \param mp2_env ...
      91             : !> \param para_env ...
      92             : !> \param para_env_sub ...
      93             : !> \param cell ...
      94             : !> \param particle_set ...
      95             : !> \param atomic_kind_set ...
      96             : !> \param qs_kind_set ...
      97             : !> \param mo_coeff ...
      98             : !> \param nmo ...
      99             : !> \param homo ...
     100             : !> \param dimen_RI ...
     101             : !> \param Eigenval ...
     102             : !> \param my_group_L_start ...
     103             : !> \param my_group_L_end ...
     104             : !> \param my_group_L_size ...
     105             : !> \param sab_orb_sub ...
     106             : !> \param mat_munu ...
     107             : !> \param blacs_env_sub ...
     108             : !> \author Mauro Del Ben
     109             : ! **************************************************************************************************
     110         260 :    SUBROUTINE calc_ri_mp2_nonsep(qs_env, mp2_env, para_env, para_env_sub, cell, particle_set, &
     111         260 :                                  atomic_kind_set, qs_kind_set, mo_coeff, nmo, homo, dimen_RI, Eigenval, &
     112             :                                  my_group_L_start, my_group_L_end, my_group_L_size, sab_orb_sub, mat_munu, &
     113             :                                  blacs_env_sub)
     114             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     115             :       TYPE(mp2_type)                                     :: mp2_env
     116             :       TYPE(mp_para_env_type), POINTER                    :: para_env, para_env_sub
     117             :       TYPE(cell_type), POINTER                           :: cell
     118             :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     119             :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     120             :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     121             :       TYPE(cp_fm_type), DIMENSION(:), INTENT(IN)         :: mo_coeff
     122             :       INTEGER, INTENT(IN)                                :: nmo
     123             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: homo
     124             :       INTEGER, INTENT(IN)                                :: dimen_RI
     125             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: Eigenval
     126             :       INTEGER, INTENT(IN)                                :: my_group_L_start, my_group_L_end, &
     127             :                                                             my_group_L_size
     128             :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     129             :          POINTER                                         :: sab_orb_sub
     130             :       TYPE(dbcsr_p_type), INTENT(INOUT)                  :: mat_munu
     131             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env_sub
     132             : 
     133             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'calc_ri_mp2_nonsep'
     134             : 
     135             :       INTEGER                                            :: dimen, eri_method, handle, handle2, i, &
     136             :                                                             ikind, ispin, itmp(2), L_counter, LLL, &
     137             :                                                             my_P_end, my_P_size, my_P_start, nspins
     138         260 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_of_kind, kind_of, natom_of_kind, &
     139         260 :                                                             virtual
     140             :       LOGICAL                                            :: alpha_beta, use_virial
     141             :       REAL(KIND=dp)                                      :: cutoff_old, eps_filter, factor, &
     142             :                                                             factor_2c, relative_cutoff_old
     143         260 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: e_cutoff_old
     144         260 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: G_PQ_local, G_PQ_local_2
     145             :       REAL(KIND=dp), DIMENSION(3, 3)                     :: h_stress, pv_virial
     146         260 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: I_tmp2
     147             :       TYPE(cp_eri_mme_param), POINTER                    :: eri_param
     148             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     149         260 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: L1_mu_i, L2_nu_a
     150             :       TYPE(dbcsr_p_type)                                 :: matrix_P_munu
     151             :       TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:)      :: mo_coeff_o, mo_coeff_v
     152         260 :       TYPE(dbcsr_p_type), ALLOCATABLE, DIMENSION(:, :)   :: G_P_ia
     153         260 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: mat_munu_local, matrix_P_munu_local
     154             :       TYPE(dbcsr_type)                                   :: matrix_P_munu_nosym
     155         260 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: Lag_mu_i_1, Lag_nu_a_2, matrix_P_inu
     156             :       TYPE(dft_control_type), POINTER                    :: dft_control
     157         260 :       TYPE(mp2_eri_force), ALLOCATABLE, DIMENSION(:)     :: force_2c, force_2c_RI, force_3c_aux, &
     158         260 :                                                             force_3c_orb_mu, force_3c_orb_nu
     159        1040 :       TYPE(pw_c1d_gs_type)                               :: dvg(3), pot_g, rho_g, rho_g_copy
     160             :       TYPE(pw_env_type), POINTER                         :: pw_env_sub
     161             :       TYPE(pw_poisson_type), POINTER                     :: poisson_env
     162             :       TYPE(pw_pool_type), POINTER                        :: auxbas_pw_pool
     163             :       TYPE(pw_r3d_rs_type)                               :: psi_L, rho_r
     164         260 :       TYPE(qs_force_type), DIMENSION(:), POINTER         :: force, mp2_force
     165             :       TYPE(qs_ks_env_type), POINTER                      :: ks_env
     166             :       TYPE(task_list_type), POINTER                      :: task_list_sub
     167             :       TYPE(virial_type), POINTER                         :: virial
     168             : 
     169         260 :       CALL timeset(routineN, handle)
     170             : 
     171         260 :       eri_method = mp2_env%eri_method
     172         260 :       eri_param => mp2_env%eri_mme_param
     173             : 
     174             :       ! Find out whether we have a closed or open shell
     175         260 :       nspins = SIZE(homo)
     176         260 :       alpha_beta = (nspins == 2)
     177             : 
     178         260 :       dimen = nmo
     179         780 :       ALLOCATE (virtual(nspins))
     180         604 :       virtual(:) = dimen - homo(:)
     181         260 :       eps_filter = mp2_env%mp2_gpw%eps_filter
     182       29399 :       ALLOCATE (mo_coeff_o(nspins), mo_coeff_v(nspins), G_P_ia(nspins, my_group_L_size))
     183         604 :       DO ispin = 1, nspins
     184         344 :          mo_coeff_o(ispin)%matrix => mp2_env%ri_grad%mo_coeff_o(ispin)%matrix
     185         344 :          mo_coeff_v(ispin)%matrix => mp2_env%ri_grad%mo_coeff_v(ispin)%matrix
     186       15835 :          DO LLL = 1, my_group_L_size
     187       15575 :             G_P_ia(ispin, LLL)%matrix => mp2_env%ri_grad%G_P_ia(LLL, ispin)%matrix
     188             :          END DO
     189             :       END DO
     190         260 :       DEALLOCATE (mp2_env%ri_grad%G_P_ia)
     191             : 
     192         260 :       itmp = get_limit(dimen_RI, para_env_sub%num_pe, para_env_sub%mepos)
     193         260 :       my_P_start = itmp(1)
     194         260 :       my_P_end = itmp(2)
     195         260 :       my_P_size = itmp(2) - itmp(1) + 1
     196             : 
     197        1040 :       ALLOCATE (G_PQ_local(dimen_RI, my_group_L_size))
     198      981164 :       G_PQ_local = 0.0_dp
     199      927730 :       G_PQ_local(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ
     200         260 :       DEALLOCATE (mp2_env%ri_grad%Gamma_PQ)
     201      927730 :       G_PQ_local(my_P_start:my_P_end, :) = G_PQ_local(my_P_start:my_P_end, :)/REAL(nspins, dp)
     202         260 :       CALL para_env_sub%sum(G_PQ_local)
     203         260 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     204          48 :          ALLOCATE (G_PQ_local_2(dimen_RI, my_group_L_size))
     205       41844 :          G_PQ_local_2 = 0.0_dp
     206       41844 :          G_PQ_local_2(my_P_start:my_P_end, :) = mp2_env%ri_grad%Gamma_PQ_2
     207          12 :          DEALLOCATE (mp2_env%ri_grad%Gamma_PQ_2)
     208       41844 :          G_PQ_local_2(my_P_start:my_P_end, :) = G_PQ_local_2(my_P_start:my_P_end, :)/REAL(nspins, dp)
     209          12 :          CALL para_env_sub%sum(G_PQ_local_2)
     210             :       END IF
     211             : 
     212             :       ! create matrix holding the back transformation (G_P_inu)
     213        1124 :       ALLOCATE (matrix_P_inu(nspins))
     214         604 :       DO ispin = 1, nspins
     215         604 :          CALL dbcsr_create(matrix_P_inu(ispin), template=mo_coeff_o(ispin)%matrix)
     216             :       END DO
     217             : 
     218             :       ! non symmetric matrix
     219             :       CALL dbcsr_create(matrix_P_munu_nosym, template=mat_munu%matrix, &
     220         260 :                         matrix_type=dbcsr_type_no_symmetry)
     221             : 
     222             :       ! create Lagrangian matrices in mixed AO/MO formalism
     223        1124 :       ALLOCATE (Lag_mu_i_1(nspins))
     224         604 :       DO ispin = 1, nspins
     225         344 :          CALL dbcsr_create(Lag_mu_i_1(ispin), template=mo_coeff_o(ispin)%matrix)
     226         604 :          CALL dbcsr_set(Lag_mu_i_1(ispin), 0.0_dp)
     227             :       END DO
     228             : 
     229        1124 :       ALLOCATE (Lag_nu_a_2(nspins))
     230         604 :       DO ispin = 1, nspins
     231         344 :          CALL dbcsr_create(Lag_nu_a_2(ispin), template=mo_coeff_v(ispin)%matrix)
     232         604 :          CALL dbcsr_set(Lag_nu_a_2(ispin), 0.0_dp)
     233             :       END DO
     234             : 
     235             :       ! get forces
     236         260 :       NULLIFY (force, virial)
     237         260 :       CALL get_qs_env(qs_env=qs_env, force=force, virial=virial)
     238             : 
     239             :       ! check if we want to calculate the virial
     240         260 :       use_virial = virial%pv_availability .AND. (.NOT. virial%pv_numer)
     241             : 
     242         260 :       CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, natom_of_kind=natom_of_kind)
     243         260 :       NULLIFY (mp2_force)
     244         260 :       CALL allocate_qs_force(mp2_force, natom_of_kind)
     245         260 :       DEALLOCATE (natom_of_kind)
     246         260 :       CALL zero_qs_force(mp2_force)
     247         260 :       mp2_env%ri_grad%mp2_force => mp2_force
     248             : 
     249         260 :       factor_2c = -4.0_dp
     250         260 :       IF (mp2_env%method == ri_rpa_method_gpw) THEN
     251          20 :          factor_2c = -1.0_dp
     252          20 :          IF (alpha_beta) factor_2c = -2.0_dp
     253             :       END IF
     254             : 
     255             :       ! prepare integral derivatives with mme method
     256         260 :       IF (eri_method .EQ. do_eri_mme) THEN
     257        1184 :          ALLOCATE (matrix_P_munu_local(my_group_L_size))
     258        1184 :          ALLOCATE (mat_munu_local(my_group_L_size))
     259          26 :          L_counter = 0
     260        1132 :          DO LLL = my_group_L_start, my_group_L_end
     261        1106 :             L_counter = L_counter + 1
     262        1106 :             ALLOCATE (mat_munu_local(L_counter)%matrix)
     263             :             CALL dbcsr_create(mat_munu_local(L_counter)%matrix, template=mat_munu%matrix, &
     264        1106 :                               matrix_type=dbcsr_type_symmetric)
     265        1106 :             CALL dbcsr_copy(mat_munu_local(L_counter)%matrix, mat_munu%matrix)
     266        1106 :             CALL dbcsr_set(mat_munu_local(L_counter)%matrix, 0.0_dp)
     267             : 
     268             :             CALL G_P_transform_MO_to_AO(matrix_P_munu_local(L_counter)%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
     269             :                                         G_P_ia(:, L_counter), matrix_P_inu, &
     270        1132 :                                         mo_coeff_v, mo_coeff_o, eps_filter)
     271             :          END DO
     272             : 
     273         104 :          ALLOCATE (I_tmp2(dimen_RI, my_group_L_size))
     274       95790 :          I_tmp2(:, :) = 0.0_dp
     275             :          CALL mp2_eri_2c_integrate(eri_param, mp2_env%potential_parameter, para_env_sub, qs_env, &
     276             :                                    basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
     277             :                                    hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
     278          26 :                                    eri_method=eri_method, pab=G_PQ_local, force_a=force_2c)
     279          26 :          IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     280           0 :             I_tmp2(:, :) = 0.0_dp
     281             :             CALL mp2_eri_2c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
     282             :                                       basis_type_a="RI_AUX", basis_type_b="RI_AUX", &
     283             :                                       hab=I_tmp2, first_b=my_group_L_start, last_b=my_group_L_end, &
     284           0 :                                       eri_method=eri_method, pab=G_PQ_local_2, force_a=force_2c_RI)
     285             :          END IF
     286          26 :          DEALLOCATE (I_tmp2)
     287             : 
     288             :          CALL mp2_eri_3c_integrate(eri_param, mp2_env%ri_metric, para_env_sub, qs_env, &
     289             :                                    first_c=my_group_L_start, last_c=my_group_L_end, mat_ab=mat_munu_local, &
     290             :                                    basis_type_a="ORB", basis_type_b="ORB", basis_type_c="RI_AUX", &
     291             :                                    sab_nl=sab_orb_sub, eri_method=eri_method, &
     292             :                                    pabc=matrix_P_munu_local, &
     293          26 :                                    force_a=force_3c_orb_mu, force_b=force_3c_orb_nu, force_c=force_3c_aux)
     294             : 
     295          26 :          L_counter = 0
     296        1132 :          DO LLL = my_group_L_start, my_group_L_end
     297        1106 :             L_counter = L_counter + 1
     298        2432 :             DO ispin = 1, nspins
     299             :                CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin, L_counter)%matrix, &
     300        2432 :                                    0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
     301             :             END DO
     302             : 
     303             :             ! The matrices of G_P_ia are deallocated here
     304             :             CALL update_lagrangian(mat_munu_local(L_counter)%matrix, matrix_P_inu, Lag_mu_i_1, &
     305             :                                    G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
     306        1132 :                                    eps_filter)
     307             :          END DO
     308             : 
     309          72 :          DO ikind = 1, SIZE(force)
     310             :             mp2_force(ikind)%mp2_non_sep(:, :) = factor_2c*force_2c(ikind)%forces(:, :) + &
     311             :                                                  force_3c_orb_mu(ikind)%forces(:, :) + &
     312             :                                                  force_3c_orb_nu(ikind)%forces(:, :) + &
     313         334 :                                                  force_3c_aux(ikind)%forces(:, :)
     314             : 
     315          72 :             IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     316           0 :                mp2_force(ikind)%mp2_non_sep(:, :) = mp2_force(ikind)%mp2_non_sep(:, :) + factor_2c*force_2c_RI(ikind)%forces
     317             :             END IF
     318             :          END DO
     319             : 
     320          26 :          CALL mp2_eri_deallocate_forces(force_2c)
     321          26 :          IF (.NOT. compare_potential_types(mp2_env%potential_parameter, mp2_env%ri_metric)) THEN
     322           0 :             CALL mp2_eri_deallocate_forces(force_2c_RI)
     323             :          END IF
     324          26 :          CALL mp2_eri_deallocate_forces(force_3c_aux)
     325          26 :          CALL mp2_eri_deallocate_forces(force_3c_orb_mu)
     326          26 :          CALL mp2_eri_deallocate_forces(force_3c_orb_nu)
     327          26 :          CALL dbcsr_deallocate_matrix_set(matrix_P_munu_local)
     328          26 :          CALL dbcsr_deallocate_matrix_set(mat_munu_local)
     329             : 
     330         234 :       ELSEIF (eri_method == do_eri_gpw) THEN
     331         234 :          CALL get_qs_env(qs_env, ks_env=ks_env)
     332             : 
     333         234 :          CALL get_atomic_kind_set(atomic_kind_set, kind_of=kind_of, atom_of_kind=atom_of_kind)
     334             : 
     335             :          ! Supporting stuff for GPW
     336             :          CALL prepare_gpw(qs_env, dft_control, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
     337         234 :                           auxbas_pw_pool, poisson_env, task_list_sub, rho_r, rho_g, pot_g, psi_L, sab_orb_sub)
     338             : 
     339             :          ! in case virial is required we need auxiliary pw
     340             :          ! for calculate the MP2-volume contribution to the virial
     341             :          ! (hartree potential derivatives)
     342         234 :          IF (use_virial) THEN
     343          46 :             CALL auxbas_pw_pool%create_pw(rho_g_copy)
     344         184 :             DO i = 1, 3
     345         184 :                CALL auxbas_pw_pool%create_pw(dvg(i))
     346             :             END DO
     347             :          END IF
     348             : 
     349             :          ! start main loop over auxiliary basis functions
     350         234 :          CALL timeset(routineN//"_loop", handle2)
     351             : 
     352         234 :          IF (use_virial) h_stress = 0.0_dp
     353             : 
     354         234 :          L_counter = 0
     355       10528 :          DO LLL = my_group_L_start, my_group_L_end
     356       10294 :             L_counter = L_counter + 1
     357             : 
     358             :             CALL G_P_transform_MO_to_AO(matrix_P_munu%matrix, matrix_P_munu_nosym, mat_munu%matrix, &
     359             :                                         G_P_ia(:, L_counter), matrix_P_inu, &
     360       10294 :                                         mo_coeff_v, mo_coeff_o, eps_filter)
     361             : 
     362             :             CALL integrate_potential_forces_2c(rho_r, LLL, mo_coeff(1), rho_g, atomic_kind_set, &
     363             :                                                qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
     364             :                                                pot_g, mp2_env%potential_parameter, use_virial, &
     365             :                                                rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local(:, L_counter), &
     366       10294 :                                                mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
     367             : 
     368       10294 :             IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) THEN
     369             : 
     370             :                CALL integrate_potential_forces_2c(rho_r, LLL, mo_coeff(1), rho_g, atomic_kind_set, &
     371             :                                                   qs_kind_set, particle_set, cell, pw_env_sub, poisson_env, &
     372             :                                                   pot_g, mp2_env%ri_metric, use_virial, &
     373             :                                                   rho_g_copy, dvg, kind_of, atom_of_kind, G_PQ_local_2(:, L_counter), &
     374         498 :                                                   mp2_force, h_stress, para_env_sub, dft_control, psi_L, factor_2c)
     375             :             END IF
     376             : 
     377       34042 :             IF (use_virial) pv_virial = virial%pv_virial
     378             :             CALL integrate_potential_forces_3c_1c(mat_munu, rho_r, matrix_P_munu, qs_env, pw_env_sub, &
     379       10294 :                                                   task_list_sub)
     380       10294 :             IF (use_virial) THEN
     381       25727 :                h_stress = h_stress + (virial%pv_virial - pv_virial)
     382       25727 :                virial%pv_virial = pv_virial
     383             :             END IF
     384             : 
     385             :             ! The matrices of G_P_ia are deallocated here
     386             :             CALL update_lagrangian(mat_munu%matrix, matrix_P_inu, Lag_mu_i_1, &
     387             :                                    G_P_ia(:, L_counter), mo_coeff_o, Lag_nu_a_2, &
     388       10294 :                                    eps_filter)
     389             : 
     390             :             CALL integrate_potential_forces_3c_2c(matrix_P_munu, rho_r, rho_g, task_list_sub, pw_env_sub, &
     391             :                                                   mp2_env%ri_metric, &
     392             :                                                   ks_env, poisson_env, pot_g, use_virial, rho_g_copy, dvg, &
     393             :                                                   h_stress, para_env_sub, kind_of, atom_of_kind, &
     394       10528 :                                                   qs_kind_set, particle_set, cell, LLL, mp2_force, dft_control)
     395             :          END DO
     396             : 
     397         234 :          CALL timestop(handle2)
     398             : 
     399         234 :          DEALLOCATE (kind_of)
     400         234 :          DEALLOCATE (atom_of_kind)
     401             : 
     402         234 :          IF (use_virial) THEN
     403          46 :             CALL auxbas_pw_pool%give_back_pw(rho_g_copy)
     404         184 :             DO i = 1, 3
     405         184 :                CALL auxbas_pw_pool%give_back_pw(dvg(i))
     406             :             END DO
     407             :          END IF
     408             : 
     409             :          CALL cleanup_gpw(qs_env, e_cutoff_old, cutoff_old, relative_cutoff_old, para_env_sub, pw_env_sub, &
     410         234 :                           task_list_sub, auxbas_pw_pool, rho_r, rho_g, pot_g, psi_L)
     411             : 
     412         234 :          CALL dbcsr_release(matrix_P_munu%matrix)
     413         234 :          DEALLOCATE (matrix_P_munu%matrix)
     414             : 
     415             :       END IF
     416             : 
     417         812 :       IF (use_virial) mp2_env%ri_grad%mp2_virial = h_stress
     418             : 
     419         260 :       DEALLOCATE (G_PQ_local)
     420         260 :       IF (.NOT. compare_potential_types(mp2_env%ri_metric, mp2_env%potential_parameter)) DEALLOCATE (G_PQ_local_2)
     421             : 
     422         260 :       CALL dbcsr_release(matrix_P_munu_nosym)
     423             : 
     424         604 :       DO ispin = 1, nspins
     425         604 :          CALL dbcsr_release(matrix_P_inu(ispin))
     426             :       END DO
     427         260 :       DEALLOCATE (matrix_P_inu, G_P_ia)
     428             : 
     429             :       ! move the forces in the correct place
     430         260 :       IF (eri_method .EQ. do_eri_gpw) THEN
     431         692 :          DO ikind = 1, SIZE(mp2_force)
     432        6218 :             mp2_force(ikind)%mp2_non_sep(:, :) = force(ikind)%rho_elec(:, :)
     433        3572 :             force(ikind)%rho_elec(:, :) = 0.0_dp
     434             :          END DO
     435             :       END IF
     436             : 
     437             :       ! Now we move from the local matrices to the global ones
     438             :       ! defined over all MPI tasks
     439             :       ! Start with moving from the DBCSR to FM for the lagrangians
     440             : 
     441        1988 :       ALLOCATE (L1_mu_i(nspins), L2_nu_a(nspins))
     442         604 :       DO ispin = 1, nspins
     443             :          ! Now we move from the local matrices to the global ones
     444             :          ! defined over all MPI tasks
     445             :          ! Start with moving from the DBCSR to FM for the lagrangians
     446         344 :          NULLIFY (fm_struct_tmp)
     447             :          CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
     448         344 :                                   nrow_global=dimen, ncol_global=homo(ispin))
     449         344 :          CALL cp_fm_create(L1_mu_i(ispin), fm_struct_tmp, name="Lag_mu_i")
     450         344 :          CALL cp_fm_struct_release(fm_struct_tmp)
     451         344 :          CALL cp_fm_set_all(L1_mu_i(ispin), 0.0_dp)
     452         344 :          CALL copy_dbcsr_to_fm(matrix=Lag_mu_i_1(ispin), fm=L1_mu_i(ispin))
     453             : 
     454             :          ! release Lag_mu_i_1
     455         344 :          CALL dbcsr_release(Lag_mu_i_1(ispin))
     456             : 
     457         344 :          NULLIFY (fm_struct_tmp)
     458             :          CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env_sub, context=blacs_env_sub, &
     459         344 :                                   nrow_global=dimen, ncol_global=virtual(ispin))
     460         344 :          CALL cp_fm_create(L2_nu_a(ispin), fm_struct_tmp, name="Lag_nu_a")
     461         344 :          CALL cp_fm_struct_release(fm_struct_tmp)
     462         344 :          CALL cp_fm_set_all(L2_nu_a(ispin), 0.0_dp)
     463         344 :          CALL copy_dbcsr_to_fm(matrix=Lag_nu_a_2(ispin), fm=L2_nu_a(ispin))
     464             : 
     465             :          ! release Lag_nu_a_2
     466         604 :          CALL dbcsr_release(Lag_nu_a_2(ispin))
     467             :       END DO
     468         260 :       DEALLOCATE (Lag_mu_i_1, Lag_nu_a_2)
     469             : 
     470             :       ! Set the factor to multiply P_ij (depends on the open or closed shell)
     471         260 :       factor = 1.0_dp
     472         260 :       IF (alpha_beta) factor = 0.50_dp
     473             : 
     474         604 :       DO ispin = 1, nspins
     475             :          CALL create_W_P(qs_env, mp2_env, mo_coeff(ispin), homo(ispin), virtual(ispin), dimen, para_env, &
     476             :                          para_env_sub, Eigenval(:, ispin), L1_mu_i(ispin), L2_nu_a(ispin), &
     477         604 :                          factor, ispin)
     478             :       END DO
     479         260 :       DEALLOCATE (L1_mu_i, L2_nu_a)
     480             : 
     481         260 :       CALL timestop(handle)
     482             : 
     483         520 :    END SUBROUTINE calc_ri_mp2_nonsep
     484             : 
     485             : ! **************************************************************************************************
     486             : !> \brief Transforms G_P_ia to G_P_munu
     487             : !> \param G_P_munu The container for G_P_munu, will be allocated and created if not allocated on entry
     488             : !> \param G_P_munu_nosym ...
     489             : !> \param mat_munu ...
     490             : !> \param G_P_ia ...
     491             : !> \param G_P_inu ...
     492             : !> \param mo_coeff_v ...
     493             : !> \param mo_coeff_o ...
     494             : !> \param eps_filter ...
     495             : ! **************************************************************************************************
     496       11400 :    SUBROUTINE G_P_transform_MO_to_AO(G_P_munu, G_P_munu_nosym, mat_munu, G_P_ia, G_P_inu, &
     497       11400 :                                      mo_coeff_v, mo_coeff_o, eps_filter)
     498             :       TYPE(dbcsr_type), POINTER                          :: G_P_munu
     499             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: G_P_munu_nosym, mat_munu
     500             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: G_P_ia
     501             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: G_P_inu
     502             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_v, mo_coeff_o
     503             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     504             : 
     505             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_MO_to_AO'
     506             : 
     507             :       INTEGER                                            :: handle
     508             : 
     509       11400 :       IF (.NOT. ASSOCIATED(G_P_munu)) THEN
     510        1340 :          ALLOCATE (G_P_munu)
     511             :          CALL dbcsr_create(G_P_munu, template=mat_munu, &
     512        1340 :                            matrix_type=dbcsr_type_symmetric)
     513             :       END IF
     514             : 
     515       11400 :       CALL G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu_nosym, mo_coeff_v, mo_coeff_o, eps_filter)
     516             : 
     517             :       ! symmetrize
     518       11400 :       CALL timeset(routineN//"_symmetrize", handle)
     519       11400 :       CALL dbcsr_set(G_P_munu, 0.0_dp)
     520       11400 :       CALL dbcsr_transposed(G_P_munu, G_P_munu_nosym)
     521             :       CALL dbcsr_add(G_P_munu, G_P_munu_nosym, &
     522       11400 :                      alpha_scalar=2.0_dp, beta_scalar=2.0_dp)
     523             :       ! this is a trick to avoid that integrate_v_rspace starts to cry
     524       11400 :       CALL dbcsr_copy_into_existing(mat_munu, G_P_munu)
     525       11400 :       CALL dbcsr_copy(G_P_munu, mat_munu)
     526             : 
     527       11400 :       CALL timestop(handle)
     528             : 
     529       11400 :    END SUBROUTINE G_P_transform_MO_to_AO
     530             : 
     531             : ! **************************************************************************************************
     532             : !> \brief ...
     533             : !> \param G_P_ia ...
     534             : !> \param G_P_inu ...
     535             : !> \param G_P_munu ...
     536             : !> \param mo_coeff_v ...
     537             : !> \param mo_coeff_o ...
     538             : !> \param eps_filter ...
     539             : ! **************************************************************************************************
     540       11400 :    SUBROUTINE G_P_transform_alpha_beta(G_P_ia, G_P_inu, G_P_munu, mo_coeff_v, mo_coeff_o, eps_filter)
     541             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: G_P_ia
     542             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: G_P_inu
     543             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: G_P_munu
     544             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_v, mo_coeff_o
     545             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     546             : 
     547             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'G_P_transform_alpha_beta'
     548             : 
     549             :       INTEGER                                            :: handle, ispin
     550             :       REAL(KIND=dp)                                      :: factor
     551             : 
     552       11400 :       CALL timeset(routineN, handle)
     553             : 
     554       11400 :       factor = 1.0_dp/REAL(SIZE(G_P_ia), dp)
     555             : 
     556       11400 :       CALL dbcsr_set(G_P_munu, 0.0_dp)
     557             : 
     558       26631 :       DO ispin = 1, SIZE(G_P_ia)
     559             :          ! first back-transformation a->nu
     560             :          CALL dbcsr_multiply("N", "T", 1.0_dp, mo_coeff_v(ispin)%matrix, G_P_ia(ispin)%matrix, &
     561       15231 :                              0.0_dp, G_P_inu(ispin), filter_eps=eps_filter)
     562             : 
     563             :          ! second back-transformation i->mu
     564             :          CALL dbcsr_multiply("N", "T", factor, G_P_inu(ispin), mo_coeff_o(ispin)%matrix, &
     565       26631 :                              1.0_dp, G_P_munu, filter_eps=eps_filter)
     566             :       END DO
     567             : 
     568       11400 :       CALL timestop(handle)
     569             : 
     570       11400 :    END SUBROUTINE G_P_transform_alpha_beta
     571             : 
     572             : ! **************************************************************************************************
     573             : !> \brief ...
     574             : !> \param mat_munu ...
     575             : !> \param matrix_P_inu ...
     576             : !> \param Lag_mu_i_1 ...
     577             : !> \param G_P_ia ...
     578             : !> \param mo_coeff_o ...
     579             : !> \param Lag_nu_a_2 ...
     580             : !> \param eps_filter ...
     581             : ! **************************************************************************************************
     582       11400 :    SUBROUTINE update_lagrangian(mat_munu, matrix_P_inu, Lag_mu_i_1, &
     583       22800 :                                 G_P_ia, mo_coeff_o, Lag_nu_a_2, &
     584             :                                 eps_filter)
     585             :       TYPE(dbcsr_type), INTENT(IN)                       :: mat_munu
     586             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: matrix_P_inu, Lag_mu_i_1
     587             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(INOUT)    :: G_P_ia
     588             :       TYPE(dbcsr_p_type), DIMENSION(:), INTENT(IN)       :: mo_coeff_o
     589             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: Lag_nu_a_2
     590             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
     591             : 
     592             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'update_lagrangian'
     593             : 
     594             :       INTEGER                                            :: handle, ispin
     595             : 
     596             :       ! update lagrangian
     597       11400 :       CALL timeset(routineN, handle)
     598             : 
     599       26631 :       DO ispin = 1, SIZE(G_P_ia)
     600             :          ! first contract mat_munu with the half back transformed Gamma_i_nu
     601             :          ! in order to update Lag_mu_i_1
     602             :          CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, matrix_P_inu(ispin), &
     603       15231 :                              1.0_dp, Lag_mu_i_1(ispin), filter_eps=eps_filter)
     604             : 
     605             :          ! transform first index of mat_munu and store the result into matrix_P_inu
     606       15231 :          CALL dbcsr_set(matrix_P_inu(ispin), 0.0_dp)
     607             :          CALL dbcsr_multiply("N", "N", 1.0_dp, mat_munu, mo_coeff_o(ispin)%matrix, &
     608       15231 :                              0.0_dp, matrix_P_inu(ispin), filter_eps=eps_filter)
     609             : 
     610             :          ! contract the transformend matrix_P_inu with the untransformend Gamma_i_a
     611             :          ! in order to update Lag_nu_a_2
     612             :          CALL dbcsr_multiply("N", "N", -1.0_dp, matrix_P_inu(ispin), G_P_ia(ispin)%matrix, &
     613       15231 :                              1.0_dp, Lag_nu_a_2(ispin), filter_eps=eps_filter)
     614             : 
     615             :          ! release the actual gamma_P_ia
     616       15231 :          CALL dbcsr_release(G_P_ia(ispin)%matrix)
     617       26631 :          DEALLOCATE (G_P_ia(ispin)%matrix)
     618             :       END DO
     619             : 
     620       11400 :       CALL timestop(handle)
     621             : 
     622       11400 :    END SUBROUTINE update_lagrangian
     623             : 
     624             : ! **************************************************************************************************
     625             : !> \brief ...
     626             : !> \param qs_env ...
     627             : !> \param mp2_env ...
     628             : !> \param mo_coeff ...
     629             : !> \param homo ...
     630             : !> \param virtual ...
     631             : !> \param dimen ...
     632             : !> \param para_env ...
     633             : !> \param para_env_sub ...
     634             : !> \param Eigenval ...
     635             : !> \param L1_mu_i ...
     636             : !> \param L2_nu_a ...
     637             : !> \param factor ...
     638             : !> \param kspin ...
     639             : ! **************************************************************************************************
     640         344 :    SUBROUTINE create_W_P(qs_env, mp2_env, mo_coeff, homo, virtual, dimen, para_env, para_env_sub, &
     641         344 :                          Eigenval, L1_mu_i, L2_nu_a, factor, kspin)
     642             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     643             :       TYPE(mp2_type)                                     :: mp2_env
     644             :       TYPE(cp_fm_type), INTENT(IN)                       :: mo_coeff
     645             :       INTEGER, INTENT(IN)                                :: homo, virtual, dimen
     646             :       TYPE(mp_para_env_type), POINTER                    :: para_env, para_env_sub
     647             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: Eigenval
     648             :       TYPE(cp_fm_type), INTENT(INOUT)                    :: L1_mu_i, L2_nu_a
     649             :       REAL(KIND=dp), INTENT(IN)                          :: factor
     650             :       INTEGER, INTENT(IN)                                :: kspin
     651             : 
     652             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'create_W_P'
     653             : 
     654             :       INTEGER :: color_exchange, dummy_proc, handle, handle2, handle3, i_global, i_local, iiB, &
     655             :          iii, iproc, itmp(2), j_global, j_local, jjB, max_col_size, max_row_size, &
     656             :          my_B_virtual_end, my_B_virtual_start, mypcol, myprow, ncol_block, ncol_block_1i, &
     657             :          ncol_block_2a, ncol_local, ncol_local_1i, ncol_local_2a, npcol, npcol_1i, npcol_2a, &
     658             :          nprow, nprow_1i, nprow_2a, nrow_block, nrow_block_1i, nrow_block_2a, nrow_local, &
     659             :          nrow_local_1i, nrow_local_2a, number_of_rec, number_of_send, proc_receive, &
     660             :          proc_receive_static, proc_send, proc_send_ex, proc_send_static, proc_send_sub, &
     661             :          proc_shift, rec_col_size
     662             :       INTEGER :: rec_counter, rec_row_size, send_col_size, send_counter, send_pcol, send_prow, &
     663             :          send_row_size, size_rec_buffer, size_send_buffer
     664         344 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: iii_vet, map_rec_size, map_send_size, &
     665         344 :                                                             pos_info, pos_info_ex, proc_2_send_pos
     666         344 :       INTEGER, ALLOCATABLE, DIMENSION(:, :) :: grid_2_mepos, mepos_2_grid, my_col_indeces_info_1i, &
     667         344 :          my_col_indeces_info_2a, my_row_indeces_info_1i, my_row_indeces_info_2a, sizes, sizes_1i, &
     668         344 :          sizes_2a
     669         344 :       INTEGER, ALLOCATABLE, DIMENSION(:, :, :)           :: col_indeces_info_1i, &
     670         344 :                                                             col_indeces_info_2a, &
     671         344 :                                                             row_indeces_info_1i, &
     672         344 :                                                             row_indeces_info_2a
     673         344 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, col_indices_1i, &
     674         344 :                                                             col_indices_2a, row_indices, &
     675         344 :                                                             row_indices_1i, row_indices_2a
     676         344 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: ab_rec, ab_send, mat_rec, mat_send
     677             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     678             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     679             :       TYPE(cp_fm_type)                                   :: fm_P_ij, L_mu_q
     680             :       TYPE(integ_mat_buffer_type), ALLOCATABLE, &
     681         344 :          DIMENSION(:)                                    :: buffer_rec, buffer_send
     682             :       TYPE(integ_mat_buffer_type_2D), ALLOCATABLE, &
     683         344 :          DIMENSION(:)                                    :: buffer_cyclic
     684             :       TYPE(mp_para_env_type), POINTER                    :: para_env_exchange
     685         344 :       TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:)   :: req_send
     686             : 
     687         344 :       CALL timeset(routineN, handle)
     688             : 
     689             :       ! create the globally distributed mixed lagrangian
     690         344 :       NULLIFY (blacs_env)
     691         344 :       CALL get_qs_env(qs_env, blacs_env=blacs_env)
     692             : 
     693         344 :       NULLIFY (fm_struct_tmp)
     694             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
     695         344 :                                nrow_global=dimen, ncol_global=dimen)
     696         344 :       CALL cp_fm_create(L_mu_q, fm_struct_tmp, name="Lag_mu_q")
     697         344 :       CALL cp_fm_struct_release(fm_struct_tmp)
     698         344 :       CALL cp_fm_set_all(L_mu_q, 0.0_dp)
     699             : 
     700             :       ! create all information array
     701        1032 :       ALLOCATE (pos_info(0:para_env%num_pe - 1))
     702         344 :       CALL para_env%allgather(para_env_sub%mepos, pos_info)
     703             : 
     704             :       ! get matrix information for the global
     705             :       CALL cp_fm_get_info(matrix=L_mu_q, &
     706             :                           nrow_local=nrow_local, &
     707             :                           ncol_local=ncol_local, &
     708             :                           row_indices=row_indices, &
     709             :                           col_indices=col_indices, &
     710             :                           nrow_block=nrow_block, &
     711         344 :                           ncol_block=ncol_block)
     712         344 :       myprow = L_mu_q%matrix_struct%context%mepos(1)
     713         344 :       mypcol = L_mu_q%matrix_struct%context%mepos(2)
     714         344 :       nprow = L_mu_q%matrix_struct%context%num_pe(1)
     715         344 :       npcol = L_mu_q%matrix_struct%context%num_pe(2)
     716             : 
     717        1376 :       ALLOCATE (grid_2_mepos(0:nprow - 1, 0:npcol - 1))
     718        1376 :       grid_2_mepos = 0
     719         344 :       grid_2_mepos(myprow, mypcol) = para_env%mepos
     720         344 :       CALL para_env%sum(grid_2_mepos)
     721             : 
     722             :       ! get matrix information for L1_mu_i
     723             :       CALL cp_fm_get_info(matrix=L1_mu_i, &
     724             :                           nrow_local=nrow_local_1i, &
     725             :                           ncol_local=ncol_local_1i, &
     726             :                           row_indices=row_indices_1i, &
     727             :                           col_indices=col_indices_1i, &
     728             :                           nrow_block=nrow_block_1i, &
     729         344 :                           ncol_block=ncol_block_1i)
     730         344 :       nprow_1i = L1_mu_i%matrix_struct%context%num_pe(1)
     731         344 :       npcol_1i = L1_mu_i%matrix_struct%context%num_pe(2)
     732             : 
     733        1032 :       ALLOCATE (sizes_1i(2, 0:para_env_sub%num_pe - 1))
     734        1032 :       CALL para_env_sub%allgather([nrow_local_1i, ncol_local_1i], sizes_1i)
     735             : 
     736             :       ! get matrix information for L2_nu_a
     737             :       CALL cp_fm_get_info(matrix=L2_nu_a, &
     738             :                           nrow_local=nrow_local_2a, &
     739             :                           ncol_local=ncol_local_2a, &
     740             :                           row_indices=row_indices_2a, &
     741             :                           col_indices=col_indices_2a, &
     742             :                           nrow_block=nrow_block_2a, &
     743         344 :                           ncol_block=ncol_block_2a)
     744         344 :       nprow_2a = L2_nu_a%matrix_struct%context%num_pe(1)
     745         344 :       npcol_2a = L2_nu_a%matrix_struct%context%num_pe(2)
     746             : 
     747        1032 :       ALLOCATE (sizes_2a(2, 0:para_env_sub%num_pe - 1))
     748        1032 :       CALL para_env_sub%allgather([nrow_local_2a, ncol_local_2a], sizes_2a)
     749             : 
     750             :       ! Here we perform a ring communication scheme taking into account
     751             :       ! for the sub-group distribution of the source matrices.
     752             :       ! as a first step we need to redistribute the data within
     753             :       ! the subgroup.
     754             :       ! In order to do so we have to allocate the structure
     755             :       ! that will hold the local data involved in communication, this
     756             :       ! structure will be the same for processes in different subgroups
     757             :       ! sharing the same position in the subgroup.
     758             :       ! -1) create the exchange para_env
     759         344 :       color_exchange = para_env_sub%mepos
     760         344 :       ALLOCATE (para_env_exchange)
     761         344 :       CALL para_env_exchange%from_split(para_env, color_exchange)
     762        1032 :       ALLOCATE (pos_info_ex(0:para_env%num_pe - 1))
     763         344 :       CALL para_env%allgather(para_env_exchange%mepos, pos_info_ex)
     764        1032 :       ALLOCATE (sizes(2, 0:para_env_exchange%num_pe - 1))
     765        1032 :       CALL para_env_exchange%allgather([nrow_local, ncol_local], sizes)
     766             : 
     767             :       ! 0) store some info about indeces of the fm matrices (subgroup)
     768         344 :       CALL timeset(routineN//"_inx", handle2)
     769             :       ! matrix L1_mu_i
     770         704 :       max_row_size = MAXVAL(sizes_1i(1, :))
     771         704 :       max_col_size = MAXVAL(sizes_1i(2, :))
     772        1376 :       ALLOCATE (row_indeces_info_1i(2, max_row_size, 0:para_env_sub%num_pe - 1))
     773        1376 :       ALLOCATE (col_indeces_info_1i(2, max_col_size, 0:para_env_sub%num_pe - 1))
     774        1032 :       ALLOCATE (my_row_indeces_info_1i(2, max_row_size))
     775        1032 :       ALLOCATE (my_col_indeces_info_1i(2, max_col_size))
     776       20426 :       row_indeces_info_1i = 0
     777        4664 :       col_indeces_info_1i = 0
     778         344 :       dummy_proc = 0
     779             :       ! row
     780        6716 :       DO iiB = 1, nrow_local_1i
     781        6372 :          i_global = row_indices_1i(iiB)
     782             :          send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
     783        6372 :                                    L_mu_q%matrix_struct%first_p_pos(1), nprow)
     784             :          i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
     785        6372 :                                  L_mu_q%matrix_struct%first_p_pos(1), nprow)
     786        6372 :          my_row_indeces_info_1i(1, iiB) = send_prow
     787        6716 :          my_row_indeces_info_1i(2, iiB) = i_local
     788             :       END DO
     789             :       ! col
     790        1592 :       DO jjB = 1, ncol_local_1i
     791        1248 :          j_global = col_indices_1i(jjB)
     792             :          send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
     793        1248 :                                    L_mu_q%matrix_struct%first_p_pos(2), npcol)
     794             :          j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
     795        1248 :                                  L_mu_q%matrix_struct%first_p_pos(2), npcol)
     796        1248 :          my_col_indeces_info_1i(1, jjB) = send_pcol
     797        1592 :          my_col_indeces_info_1i(2, jjB) = j_local
     798             :       END DO
     799         344 :       CALL para_env_sub%allgather(my_row_indeces_info_1i, row_indeces_info_1i)
     800         344 :       CALL para_env_sub%allgather(my_col_indeces_info_1i, col_indeces_info_1i)
     801         344 :       DEALLOCATE (my_row_indeces_info_1i, my_col_indeces_info_1i)
     802             : 
     803             :       ! matrix L2_nu_a
     804         704 :       max_row_size = MAXVAL(sizes_2a(1, :))
     805         704 :       max_col_size = MAXVAL(sizes_2a(2, :))
     806        1376 :       ALLOCATE (row_indeces_info_2a(2, max_row_size, 0:para_env_sub%num_pe - 1))
     807        1376 :       ALLOCATE (col_indeces_info_2a(2, max_col_size, 0:para_env_sub%num_pe - 1))
     808        1032 :       ALLOCATE (my_row_indeces_info_2a(2, max_row_size))
     809        1032 :       ALLOCATE (my_col_indeces_info_2a(2, max_col_size))
     810       20426 :       row_indeces_info_2a = 0
     811       17570 :       col_indeces_info_2a = 0
     812             :       ! row
     813        6716 :       DO iiB = 1, nrow_local_2a
     814        6372 :          i_global = row_indices_2a(iiB)
     815             :          send_prow = cp_fm_indxg2p(i_global, nrow_block, dummy_proc, &
     816        6372 :                                    L_mu_q%matrix_struct%first_p_pos(1), nprow)
     817             :          i_local = cp_fm_indxg2l(i_global, nrow_block, dummy_proc, &
     818        6372 :                                  L_mu_q%matrix_struct%first_p_pos(1), nprow)
     819        6372 :          my_row_indeces_info_2a(1, iiB) = send_prow
     820        6716 :          my_row_indeces_info_2a(2, iiB) = i_local
     821             :       END DO
     822             :       ! col
     823        5658 :       DO jjB = 1, ncol_local_2a
     824        5314 :          j_global = col_indices_2a(jjB) + homo
     825             :          send_pcol = cp_fm_indxg2p(j_global, ncol_block, dummy_proc, &
     826        5314 :                                    L_mu_q%matrix_struct%first_p_pos(2), npcol)
     827             :          j_local = cp_fm_indxg2l(j_global, ncol_block, dummy_proc, &
     828        5314 :                                  L_mu_q%matrix_struct%first_p_pos(2), npcol)
     829        5314 :          my_col_indeces_info_2a(1, jjB) = send_pcol
     830        5658 :          my_col_indeces_info_2a(2, jjB) = j_local
     831             :       END DO
     832         344 :       CALL para_env_sub%allgather(my_row_indeces_info_2a, row_indeces_info_2a)
     833         344 :       CALL para_env_sub%allgather(my_col_indeces_info_2a, col_indeces_info_2a)
     834         344 :       DEALLOCATE (my_row_indeces_info_2a, my_col_indeces_info_2a)
     835         344 :       CALL timestop(handle2)
     836             : 
     837             :       ! 1) define the map for sending data in the subgroup starting with L1_mu_i
     838         344 :       CALL timeset(routineN//"_subinfo", handle2)
     839        1032 :       ALLOCATE (map_send_size(0:para_env_sub%num_pe - 1))
     840         704 :       map_send_size = 0
     841        1592 :       DO jjB = 1, ncol_local_1i
     842        1248 :          send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
     843       25286 :          DO iiB = 1, nrow_local_1i
     844       23694 :             send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
     845       23694 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     846       23694 :             proc_send_sub = pos_info(proc_send)
     847       24942 :             map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
     848             :          END DO
     849             :       END DO
     850             :       ! and the same for L2_nu_a
     851        5658 :       DO jjB = 1, ncol_local_2a
     852        5314 :          send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
     853      123528 :          DO iiB = 1, nrow_local_2a
     854      117870 :             send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
     855      117870 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     856      117870 :             proc_send_sub = pos_info(proc_send)
     857      123184 :             map_send_size(proc_send_sub) = map_send_size(proc_send_sub) + 1
     858             :          END DO
     859             :       END DO
     860             :       ! and exchange data in order to create map_rec_size
     861        1032 :       ALLOCATE (map_rec_size(0:para_env_sub%num_pe - 1))
     862         704 :       map_rec_size = 0
     863         344 :       CALL para_env_sub%alltoall(map_send_size, map_rec_size, 1)
     864         344 :       CALL timestop(handle2)
     865             : 
     866             :       ! 2) reorder data in sending buffer
     867         344 :       CALL timeset(routineN//"_sub_Bsend", handle2)
     868             :       ! count the number of messages (include myself)
     869         344 :       number_of_send = 0
     870         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     871         360 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     872         704 :          IF (map_send_size(proc_send) > 0) THEN
     873         344 :             number_of_send = number_of_send + 1
     874             :          END IF
     875             :       END DO
     876             :       ! allocate the structure that will hold the messages to be sent
     877        1376 :       ALLOCATE (buffer_send(number_of_send))
     878         344 :       send_counter = 0
     879        1032 :       ALLOCATE (proc_2_send_pos(0:para_env_sub%num_pe - 1))
     880         704 :       proc_2_send_pos = 0
     881         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     882         360 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     883         360 :          size_send_buffer = map_send_size(proc_send)
     884         704 :          IF (map_send_size(proc_send) > 0) THEN
     885         344 :             send_counter = send_counter + 1
     886             :             ! allocate the sending buffer (msg)
     887        1032 :             ALLOCATE (buffer_send(send_counter)%msg(size_send_buffer))
     888      141908 :             buffer_send(send_counter)%msg = 0.0_dp
     889         344 :             buffer_send(send_counter)%proc = proc_send
     890         344 :             proc_2_send_pos(proc_send) = send_counter
     891             :          END IF
     892             :       END DO
     893             :       ! loop over the locally held data and fill the buffer_send
     894             :       ! for doing that we need an array that keep track if the
     895             :       ! sequential increase of the index for each message
     896        1032 :       ALLOCATE (iii_vet(number_of_send))
     897         688 :       iii_vet = 0
     898        1592 :       DO jjB = 1, ncol_local_1i
     899        1248 :          send_pcol = col_indeces_info_1i(1, jjB, para_env_sub%mepos)
     900       25286 :          DO iiB = 1, nrow_local_1i
     901       23694 :             send_prow = row_indeces_info_1i(1, iiB, para_env_sub%mepos)
     902       23694 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     903       23694 :             proc_send_sub = pos_info(proc_send)
     904       23694 :             send_counter = proc_2_send_pos(proc_send_sub)
     905       23694 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
     906       23694 :             iii = iii_vet(send_counter)
     907       24942 :             buffer_send(send_counter)%msg(iii) = L1_mu_i%local_data(iiB, jjB)
     908             :          END DO
     909             :       END DO
     910             :       ! release the local data of L1_mu_i
     911         344 :       DEALLOCATE (L1_mu_i%local_data)
     912             :       ! and the same for L2_nu_a
     913        5658 :       DO jjB = 1, ncol_local_2a
     914        5314 :          send_pcol = col_indeces_info_2a(1, jjB, para_env_sub%mepos)
     915      123528 :          DO iiB = 1, nrow_local_2a
     916      117870 :             send_prow = row_indeces_info_2a(1, iiB, para_env_sub%mepos)
     917      117870 :             proc_send = grid_2_mepos(send_prow, send_pcol)
     918      117870 :             proc_send_sub = pos_info(proc_send)
     919      117870 :             send_counter = proc_2_send_pos(proc_send_sub)
     920      117870 :             iii_vet(send_counter) = iii_vet(send_counter) + 1
     921      117870 :             iii = iii_vet(send_counter)
     922      123184 :             buffer_send(send_counter)%msg(iii) = L2_nu_a%local_data(iiB, jjB)
     923             :          END DO
     924             :       END DO
     925         344 :       DEALLOCATE (L2_nu_a%local_data)
     926         344 :       DEALLOCATE (proc_2_send_pos)
     927         344 :       DEALLOCATE (iii_vet)
     928         344 :       CALL timestop(handle2)
     929             : 
     930             :       ! 3) create the buffer for receive, post the message with irecv
     931             :       !    and send the messages non-blocking
     932         344 :       CALL timeset(routineN//"_sub_isendrecv", handle2)
     933             :       ! count the number of messages to be received
     934         344 :       number_of_rec = 0
     935         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     936         360 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     937         704 :          IF (map_rec_size(proc_receive) > 0) THEN
     938         344 :             number_of_rec = number_of_rec + 1
     939             :          END IF
     940             :       END DO
     941        1376 :       ALLOCATE (buffer_rec(number_of_rec))
     942         344 :       rec_counter = 0
     943         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     944         360 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     945         360 :          size_rec_buffer = map_rec_size(proc_receive)
     946         704 :          IF (map_rec_size(proc_receive) > 0) THEN
     947         344 :             rec_counter = rec_counter + 1
     948             :             ! prepare the buffer for receive
     949        1032 :             ALLOCATE (buffer_rec(rec_counter)%msg(size_rec_buffer))
     950      141908 :             buffer_rec(rec_counter)%msg = 0.0_dp
     951         344 :             buffer_rec(rec_counter)%proc = proc_receive
     952             :             ! post the message to be received (not need to send to myself)
     953         344 :             IF (proc_receive /= para_env_sub%mepos) THEN
     954             :                CALL para_env_sub%irecv(buffer_rec(rec_counter)%msg, proc_receive, &
     955           0 :                                        buffer_rec(rec_counter)%msg_req)
     956             :             END IF
     957             :          END IF
     958             :       END DO
     959             :       ! send messages
     960        1720 :       ALLOCATE (req_send(number_of_send))
     961         688 :       req_send = mp_request_null
     962         344 :       send_counter = 0
     963         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     964         360 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
     965         704 :          IF (map_send_size(proc_send) > 0) THEN
     966         344 :             send_counter = send_counter + 1
     967         344 :             IF (proc_send == para_env_sub%mepos) THEN
     968      141908 :                buffer_rec(send_counter)%msg(:) = buffer_send(send_counter)%msg
     969             :             ELSE
     970             :                CALL para_env_sub%isend(buffer_send(send_counter)%msg, proc_send, &
     971           0 :                                        buffer_send(send_counter)%msg_req)
     972           0 :                req_send(send_counter) = buffer_send(send_counter)%msg_req
     973             :             END IF
     974             :          END IF
     975             :       END DO
     976         344 :       DEALLOCATE (map_send_size)
     977         344 :       CALL timestop(handle2)
     978             : 
     979             :       ! 4) (if memory is a problem we should move this part after point 5)
     980             :       !    Here we create the new buffer for cyclic(ring) communication and
     981             :       !    we fill it with the data received from the other member of the
     982             :       !    subgroup
     983         344 :       CALL timeset(routineN//"_Bcyclic", handle2)
     984             :       ! first allocata new structure
     985        1704 :       ALLOCATE (buffer_cyclic(0:para_env_exchange%num_pe - 1))
     986        1016 :       DO iproc = 0, para_env_exchange%num_pe - 1
     987         672 :          rec_row_size = sizes(1, iproc)
     988         672 :          rec_col_size = sizes(2, iproc)
     989        2688 :          ALLOCATE (buffer_cyclic(iproc)%msg(rec_row_size, rec_col_size))
     990      155324 :          buffer_cyclic(iproc)%msg = 0.0_dp
     991             :       END DO
     992             :       ! now collect data from other member of the subgroup and fill
     993             :       ! buffer_cyclic
     994         344 :       rec_counter = 0
     995         704 :       DO proc_shift = 0, para_env_sub%num_pe - 1
     996         360 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
     997         360 :          size_rec_buffer = map_rec_size(proc_receive)
     998         704 :          IF (map_rec_size(proc_receive) > 0) THEN
     999         344 :             rec_counter = rec_counter + 1
    1000             : 
    1001             :             ! wait for the message
    1002         344 :             IF (proc_receive /= para_env_sub%mepos) CALL buffer_rec(rec_counter)%msg_req%wait()
    1003             : 
    1004         344 :             CALL timeset(routineN//"_fill", handle3)
    1005         344 :             iii = 0
    1006        1592 :             DO jjB = 1, sizes_1i(2, proc_receive)
    1007        1248 :                send_pcol = col_indeces_info_1i(1, jjB, proc_receive)
    1008        1248 :                j_local = col_indeces_info_1i(2, jjB, proc_receive)
    1009       25286 :                DO iiB = 1, sizes_1i(1, proc_receive)
    1010       23694 :                   send_prow = row_indeces_info_1i(1, iiB, proc_receive)
    1011       23694 :                   proc_send = grid_2_mepos(send_prow, send_pcol)
    1012       23694 :                   proc_send_sub = pos_info(proc_send)
    1013       23694 :                   IF (proc_send_sub /= para_env_sub%mepos) CYCLE
    1014       23694 :                   iii = iii + 1
    1015       23694 :                   i_local = row_indeces_info_1i(2, iiB, proc_receive)
    1016       23694 :                   proc_send_ex = pos_info_ex(proc_send)
    1017       24942 :                   buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
    1018             :                END DO
    1019             :             END DO
    1020             :             ! and the same for L2_nu_a
    1021        5658 :             DO jjB = 1, sizes_2a(2, proc_receive)
    1022        5314 :                send_pcol = col_indeces_info_2a(1, jjB, proc_receive)
    1023        5314 :                j_local = col_indeces_info_2a(2, jjB, proc_receive)
    1024      123528 :                DO iiB = 1, sizes_2a(1, proc_receive)
    1025      117870 :                   send_prow = row_indeces_info_2a(1, iiB, proc_receive)
    1026      117870 :                   proc_send = grid_2_mepos(send_prow, send_pcol)
    1027      117870 :                   proc_send_sub = pos_info(proc_send)
    1028      117870 :                   IF (proc_send_sub /= para_env_sub%mepos) CYCLE
    1029      117870 :                   iii = iii + 1
    1030      117870 :                   i_local = row_indeces_info_2a(2, iiB, proc_receive)
    1031      117870 :                   proc_send_ex = pos_info_ex(proc_send)
    1032      123184 :                   buffer_cyclic(proc_send_ex)%msg(i_local, j_local) = buffer_rec(rec_counter)%msg(iii)
    1033             :                END DO
    1034             :             END DO
    1035         344 :             CALL timestop(handle3)
    1036             : 
    1037             :             ! deallocate the received message
    1038         344 :             DEALLOCATE (buffer_rec(rec_counter)%msg)
    1039             :          END IF
    1040             :       END DO
    1041         344 :       DEALLOCATE (row_indeces_info_1i)
    1042         344 :       DEALLOCATE (col_indeces_info_1i)
    1043         344 :       DEALLOCATE (row_indeces_info_2a)
    1044         344 :       DEALLOCATE (col_indeces_info_2a)
    1045         688 :       DEALLOCATE (buffer_rec)
    1046         344 :       DEALLOCATE (map_rec_size)
    1047         344 :       CALL timestop(handle2)
    1048             : 
    1049             :       ! 5)  Wait for all messeges to be sent in the subgroup
    1050         344 :       CALL timeset(routineN//"_sub_waitall", handle2)
    1051         344 :       CALL mp_waitall(req_send(:))
    1052         688 :       DO send_counter = 1, number_of_send
    1053         688 :          DEALLOCATE (buffer_send(send_counter)%msg)
    1054             :       END DO
    1055         688 :       DEALLOCATE (buffer_send)
    1056         344 :       DEALLOCATE (req_send)
    1057         344 :       CALL timestop(handle2)
    1058             : 
    1059             :       ! 6) Start with ring communication
    1060         344 :       CALL timeset(routineN//"_ring", handle2)
    1061         344 :       proc_send_static = MODULO(para_env_exchange%mepos + 1, para_env_exchange%num_pe)
    1062         344 :       proc_receive_static = MODULO(para_env_exchange%mepos - 1, para_env_exchange%num_pe)
    1063        1016 :       max_row_size = MAXVAL(sizes(1, :))
    1064        1016 :       max_col_size = MAXVAL(sizes(2, :))
    1065        1376 :       ALLOCATE (mat_send(max_row_size, max_col_size))
    1066        1376 :       ALLOCATE (mat_rec(max_row_size, max_col_size))
    1067       81732 :       mat_send = 0.0_dp
    1068       79951 :       mat_send(1:nrow_local, 1:ncol_local) = buffer_cyclic(para_env_exchange%mepos)%msg(:, :)
    1069         344 :       DEALLOCATE (buffer_cyclic(para_env_exchange%mepos)%msg)
    1070         672 :       DO proc_shift = 1, para_env_exchange%num_pe - 1
    1071         328 :          proc_receive = MODULO(para_env_exchange%mepos - proc_shift, para_env_exchange%num_pe)
    1072             : 
    1073         328 :          rec_row_size = sizes(1, proc_receive)
    1074         328 :          rec_col_size = sizes(2, proc_receive)
    1075             : 
    1076       76810 :          mat_rec = 0.0_dp
    1077             :          CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
    1078         328 :                                          mat_rec, proc_receive_static)
    1079             : 
    1080       76810 :          mat_send = 0.0_dp
    1081             :          mat_send(1:rec_row_size, 1:rec_col_size) = mat_rec(1:rec_row_size, 1:rec_col_size) + &
    1082       75029 :                                                     buffer_cyclic(proc_receive)%msg(:, :)
    1083             : 
    1084         672 :          DEALLOCATE (buffer_cyclic(proc_receive)%msg)
    1085             :       END DO
    1086             :       ! and finally
    1087             :       CALL para_env_exchange%sendrecv(mat_send, proc_send_static, &
    1088         344 :                                       mat_rec, proc_receive_static)
    1089       79951 :       L_mu_q%local_data(1:nrow_local, 1:ncol_local) = mat_rec(1:nrow_local, 1:ncol_local)
    1090        1360 :       DEALLOCATE (buffer_cyclic)
    1091         344 :       DEALLOCATE (mat_send)
    1092         344 :       DEALLOCATE (mat_rec)
    1093         344 :       CALL timestop(handle2)
    1094             : 
    1095             :       ! release para_env_exchange
    1096         344 :       CALL mp_para_env_release(para_env_exchange)
    1097             : 
    1098         344 :       CALL cp_fm_release(L1_mu_i)
    1099         344 :       CALL cp_fm_release(L2_nu_a)
    1100         344 :       DEALLOCATE (pos_info_ex)
    1101         344 :       DEALLOCATE (grid_2_mepos)
    1102         344 :       DEALLOCATE (sizes)
    1103         344 :       DEALLOCATE (sizes_1i)
    1104         344 :       DEALLOCATE (sizes_2a)
    1105             : 
    1106             :       ! update the P_ij block of P_MP2 with the
    1107             :       ! non-singular ij pairs
    1108         344 :       CALL timeset(routineN//"_Pij", handle2)
    1109         344 :       NULLIFY (fm_struct_tmp)
    1110             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1111         344 :                                nrow_global=homo, ncol_global=homo)
    1112         344 :       CALL cp_fm_create(fm_P_ij, fm_struct_tmp, name="fm_P_ij")
    1113         344 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1114         344 :       CALL cp_fm_set_all(fm_P_ij, 0.0_dp)
    1115             : 
    1116             :       ! we have it, update P_ij local
    1117             :       CALL cp_fm_get_info(matrix=fm_P_ij, &
    1118             :                           nrow_local=nrow_local, &
    1119             :                           ncol_local=ncol_local, &
    1120             :                           row_indices=row_indices, &
    1121         344 :                           col_indices=col_indices)
    1122             : 
    1123         344 :       IF (.NOT. mp2_env%method == ri_rpa_method_gpw) THEN
    1124             :          CALL parallel_gemm('T', 'N', homo, homo, dimen, 1.0_dp, &
    1125             :                             mo_coeff, L_mu_q, 0.0_dp, fm_P_ij, &
    1126             :                             a_first_col=1, &
    1127             :                             a_first_row=1, &
    1128             :                             b_first_col=1, &
    1129             :                             b_first_row=1, &
    1130             :                             c_first_col=1, &
    1131         320 :                             c_first_row=1)
    1132             :          CALL parallel_gemm('T', 'N', homo, homo, dimen, -2.0_dp, &
    1133             :                             L_mu_q, mo_coeff, 2.0_dp, fm_P_ij, &
    1134             :                             a_first_col=1, &
    1135             :                             a_first_row=1, &
    1136             :                             b_first_col=1, &
    1137             :                             b_first_row=1, &
    1138             :                             c_first_col=1, &
    1139         320 :                             c_first_row=1)
    1140             : 
    1141        1476 :          DO jjB = 1, ncol_local
    1142        1156 :             j_global = col_indices(jjB)
    1143        3692 :             DO iiB = 1, nrow_local
    1144        2216 :                i_global = row_indices(iiB)
    1145             :                ! diagonal elements and nearly degenerate ij pairs already updated
    1146        3372 :                IF (ABS(Eigenval(j_global) - Eigenval(i_global)) < mp2_env%ri_grad%eps_canonical) THEN
    1147         656 :                   fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
    1148             :                ELSE
    1149             :                   fm_P_ij%local_data(iiB, jjB) = &
    1150        1560 :                      factor*fm_P_ij%local_data(iiB, jjB)/(Eigenval(j_global) - Eigenval(i_global))
    1151             :                END IF
    1152             :             END DO
    1153             :          END DO
    1154             :       ELSE
    1155         116 :          DO jjB = 1, ncol_local
    1156          92 :             j_global = col_indices(jjB)
    1157         294 :             DO iiB = 1, nrow_local
    1158         178 :                i_global = row_indices(iiB)
    1159         270 :                fm_P_ij%local_data(iiB, jjB) = mp2_env%ri_grad%P_ij(kspin)%array(i_global, j_global)
    1160             :             END DO
    1161             :          END DO
    1162             :       END IF
    1163             :       ! deallocate the local P_ij
    1164         344 :       DEALLOCATE (mp2_env%ri_grad%P_ij(kspin)%array)
    1165         344 :       CALL timestop(handle2)
    1166             : 
    1167             :       ! Now create and fill the P matrix (MO)
    1168             :       ! FOR NOW WE ASSUME P_ab AND P_ij ARE REPLICATED OVER EACH MPI RANK
    1169         344 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%P_mo)) THEN
    1170        1124 :          ALLOCATE (mp2_env%ri_grad%P_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1171             :       END IF
    1172             : 
    1173         344 :       CALL timeset(routineN//"_PMO", handle2)
    1174         344 :       NULLIFY (fm_struct_tmp)
    1175             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1176         344 :                                nrow_global=dimen, ncol_global=dimen)
    1177         344 :       CALL cp_fm_create(mp2_env%ri_grad%P_mo(kspin), fm_struct_tmp, name="P_MP2_MO")
    1178         344 :       CALL cp_fm_set_all(mp2_env%ri_grad%P_mo(kspin), 0.0_dp)
    1179             : 
    1180             :       ! start with the (easy) occ-occ block and locally held P_ab elements
    1181         344 :       itmp = get_limit(virtual, para_env_sub%num_pe, para_env_sub%mepos)
    1182         344 :       my_B_virtual_start = itmp(1)
    1183         344 :       my_B_virtual_end = itmp(2)
    1184             : 
    1185             :       ! Fill occ-occ block
    1186         344 :       CALL cp_fm_to_fm_submat(fm_P_ij, mp2_env%ri_grad%P_mo(kspin), homo, homo, 1, 1, 1, 1)
    1187         344 :       CALL cp_fm_release(fm_P_ij)
    1188             : 
    1189             :       CALL cp_fm_get_info(mp2_env%ri_grad%P_mo(kspin), &
    1190             :                           nrow_local=nrow_local, &
    1191             :                           ncol_local=ncol_local, &
    1192             :                           row_indices=row_indices, &
    1193             :                           col_indices=col_indices, &
    1194             :                           nrow_block=nrow_block, &
    1195         344 :                           ncol_block=ncol_block)
    1196         344 :       myprow = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%mepos(1)
    1197         344 :       mypcol = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%mepos(2)
    1198         344 :       nprow = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%num_pe(1)
    1199         344 :       npcol = mp2_env%ri_grad%P_mo(kspin)%matrix_struct%context%num_pe(2)
    1200             : 
    1201         344 :       IF (mp2_env%method == ri_mp2_laplace) THEN
    1202             :          CALL parallel_gemm('T', 'N', virtual, virtual, dimen, 1.0_dp, &
    1203             :                             mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%P_mo(kspin), &
    1204             :                             a_first_col=homo + 1, &
    1205             :                             a_first_row=1, &
    1206             :                             b_first_col=homo + 1, &
    1207             :                             b_first_row=1, &
    1208             :                             c_first_col=homo + 1, &
    1209          24 :                             c_first_row=homo + 1)
    1210             :          CALL parallel_gemm('T', 'N', virtual, virtual, dimen, -2.0_dp, &
    1211             :                             L_mu_q, mo_coeff, 2.0_dp, mp2_env%ri_grad%P_mo(kspin), &
    1212             :                             a_first_col=homo + 1, &
    1213             :                             a_first_row=1, &
    1214             :                             b_first_col=homo + 1, &
    1215             :                             b_first_row=1, &
    1216             :                             c_first_col=homo + 1, &
    1217          24 :                             c_first_row=homo + 1)
    1218             :       END IF
    1219             : 
    1220         344 :       IF (mp2_env%method == ri_mp2_method_gpw .OR. mp2_env%method == ri_rpa_method_gpw) THEN
    1221             :          ! With MP2 and RPA, we have already calculated the density matrix elements
    1222        6290 :          DO jjB = 1, ncol_local
    1223        5970 :             j_global = col_indices(jjB)
    1224        5970 :             IF (j_global <= homo) CYCLE
    1225       59660 :             DO iiB = 1, nrow_local
    1226       54526 :                i_global = row_indices(iiB)
    1227       60496 :                IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
    1228             :                   mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1229       45300 :                      mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
    1230             :                END IF
    1231             :             END DO
    1232             :          END DO
    1233          24 :       ELSE IF (mp2_env%method == ri_mp2_laplace) THEN
    1234             :          ! With Laplace-SOS-MP2, we still have to calculate the matrix elements of the non-degenerate pairs
    1235         616 :          DO jjB = 1, ncol_local
    1236         592 :             j_global = col_indices(jjB)
    1237         592 :             IF (j_global <= homo) CYCLE
    1238        6764 :             DO iiB = 1, nrow_local
    1239        6240 :                i_global = row_indices(iiB)
    1240        6832 :                IF (ABS(Eigenval(i_global) - Eigenval(j_global)) < mp2_env%ri_grad%eps_canonical) THEN
    1241         565 :                   IF (my_B_virtual_start <= i_global - homo .AND. i_global - homo <= my_B_virtual_end) THEN
    1242             :                      mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1243         552 :                         mp2_env%ri_grad%P_ab(kspin)%array(i_global - homo - my_B_virtual_start + 1, j_global - homo)
    1244             :                   ELSE
    1245          13 :                      mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = 0.0_dp
    1246             :                   END IF
    1247             :                ELSE
    1248             :                   mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB) = &
    1249             :                      factor*mp2_env%ri_grad%P_mo(kspin)%local_data(iiB, jjB)/ &
    1250        5675 :                      (Eigenval(i_global) - Eigenval(j_global))
    1251             :                END IF
    1252             :             END DO
    1253             :          END DO
    1254             :       ELSE
    1255           0 :          CPABORT("Calculation of virt-virt block of density matrix is dealt with elsewhere!")
    1256             :       END IF
    1257             : 
    1258             :       ! send around the sub_group the local data and check if we
    1259             :       ! have to update our block with external elements
    1260        1032 :       ALLOCATE (mepos_2_grid(2, 0:para_env_sub%num_pe - 1))
    1261        1032 :       CALL para_env_sub%allgather([myprow, mypcol], mepos_2_grid)
    1262             : 
    1263        1032 :       ALLOCATE (sizes(2, 0:para_env_sub%num_pe - 1))
    1264        1032 :       CALL para_env_sub%allgather([nrow_local, ncol_local], sizes)
    1265             : 
    1266        1376 :       ALLOCATE (ab_rec(nrow_local, ncol_local))
    1267         360 :       DO proc_shift = 1, para_env_sub%num_pe - 1
    1268          16 :          proc_send = MODULO(para_env_sub%mepos + proc_shift, para_env_sub%num_pe)
    1269          16 :          proc_receive = MODULO(para_env_sub%mepos - proc_shift, para_env_sub%num_pe)
    1270             : 
    1271          16 :          send_prow = mepos_2_grid(1, proc_send)
    1272          16 :          send_pcol = mepos_2_grid(2, proc_send)
    1273             : 
    1274          16 :          send_row_size = sizes(1, proc_send)
    1275          16 :          send_col_size = sizes(2, proc_send)
    1276             : 
    1277          64 :          ALLOCATE (ab_send(send_row_size, send_col_size))
    1278        4922 :          ab_send = 0.0_dp
    1279             : 
    1280             :          ! first loop over row since in this way we can cycle
    1281         206 :          DO iiB = 1, send_row_size
    1282             :             i_global = cp_fm_indxl2g(iiB, nrow_block, send_prow, &
    1283         190 :                                      mp2_env%ri_grad%P_mo(kspin)%matrix_struct%first_p_pos(1), nprow)
    1284         190 :             IF (i_global <= homo) CYCLE
    1285         154 :             i_global = i_global - homo
    1286         154 :             IF (.NOT. (my_B_virtual_start <= i_global .AND. i_global <= my_B_virtual_end)) CYCLE
    1287         493 :             DO jjB = 1, send_col_size
    1288             :                j_global = cp_fm_indxl2g(jjB, ncol_block, send_pcol, &
    1289         458 :                                         mp2_env%ri_grad%P_mo(kspin)%matrix_struct%first_p_pos(2), npcol)
    1290         458 :                IF (j_global <= homo) CYCLE
    1291         367 :                j_global = j_global - homo
    1292         648 :                ab_send(iiB, jjB) = mp2_env%ri_grad%P_ab(kspin)%array(i_global - my_B_virtual_start + 1, j_global)
    1293             :             END DO
    1294             :          END DO
    1295             : 
    1296        4922 :          ab_rec = 0.0_dp
    1297             :          CALL para_env_sub%sendrecv(ab_send, proc_send, &
    1298          16 :                                     ab_rec, proc_receive)
    1299             :          mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) = &
    1300             :             mp2_env%ri_grad%P_mo(kspin)%local_data(1:nrow_local, 1:ncol_local) + &
    1301        4922 :             ab_rec(1:nrow_local, 1:ncol_local)
    1302             : 
    1303         360 :          DEALLOCATE (ab_send)
    1304             :       END DO
    1305         344 :       DEALLOCATE (ab_rec)
    1306         344 :       DEALLOCATE (mepos_2_grid)
    1307         344 :       DEALLOCATE (sizes)
    1308             : 
    1309             :       ! deallocate the local P_ab
    1310         344 :       DEALLOCATE (mp2_env%ri_grad%P_ab(kspin)%array)
    1311         344 :       CALL timestop(handle2)
    1312             : 
    1313             :       ! create also W_MP2_MO
    1314         344 :       CALL timeset(routineN//"_WMO", handle2)
    1315         344 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%W_mo)) THEN
    1316        1124 :          ALLOCATE (mp2_env%ri_grad%W_mo(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1317             :       END IF
    1318             : 
    1319         344 :       CALL cp_fm_create(mp2_env%ri_grad%W_mo(kspin), fm_struct_tmp, name="W_MP2_MO")
    1320         344 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1321             : 
    1322             :       ! all block
    1323             :       CALL parallel_gemm('T', 'N', dimen, dimen, dimen, 2.0_dp*factor, &
    1324             :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1325             :                          a_first_col=1, &
    1326             :                          a_first_row=1, &
    1327             :                          b_first_col=1, &
    1328             :                          b_first_row=1, &
    1329             :                          c_first_col=1, &
    1330         344 :                          c_first_row=1)
    1331             : 
    1332             :       ! occ-occ block
    1333             :       CALL parallel_gemm('T', 'N', homo, homo, dimen, -2.0_dp*factor, &
    1334             :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1335             :                          a_first_col=1, &
    1336             :                          a_first_row=1, &
    1337             :                          b_first_col=1, &
    1338             :                          b_first_row=1, &
    1339             :                          c_first_col=1, &
    1340         344 :                          c_first_row=1)
    1341             : 
    1342             :       ! occ-virt block
    1343             :       CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
    1344             :                          mo_coeff, L_mu_q, 0.0_dp, mp2_env%ri_grad%W_mo(kspin), &
    1345             :                          a_first_col=1, &
    1346             :                          a_first_row=1, &
    1347             :                          b_first_col=homo + 1, &
    1348             :                          b_first_row=1, &
    1349             :                          c_first_col=homo + 1, &
    1350         344 :                          c_first_row=1)
    1351         344 :       CALL timestop(handle2)
    1352             : 
    1353             :       ! Calculate occ-virt block of the lagrangian in MO
    1354         344 :       CALL timeset(routineN//"_Ljb", handle2)
    1355         344 :       IF (.NOT. ALLOCATED(mp2_env%ri_grad%L_jb)) THEN
    1356        1124 :          ALLOCATE (mp2_env%ri_grad%L_jb(SIZE(mp2_env%ri_grad%mo_coeff_o)))
    1357             :       END IF
    1358             : 
    1359             :       CALL cp_fm_struct_create(fm_struct_tmp, para_env=para_env, context=blacs_env, &
    1360         344 :                                nrow_global=homo, ncol_global=virtual)
    1361         344 :       CALL cp_fm_create(mp2_env%ri_grad%L_jb(kspin), fm_struct_tmp, name="fm_L_jb")
    1362         344 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1363             : 
    1364             :       ! first Virtual
    1365             :       CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
    1366             :                          L_mu_q, mo_coeff, 0.0_dp, mp2_env%ri_grad%L_jb(kspin), &
    1367             :                          a_first_col=1, &
    1368             :                          a_first_row=1, &
    1369             :                          b_first_col=homo + 1, &
    1370             :                          b_first_row=1, &
    1371             :                          c_first_col=1, &
    1372         344 :                          c_first_row=1)
    1373             :       ! then occupied
    1374             :       CALL parallel_gemm('T', 'N', homo, virtual, dimen, 2.0_dp*factor, &
    1375             :                          mo_coeff, L_mu_q, 1.0_dp, mp2_env%ri_grad%L_jb(kspin), &
    1376             :                          a_first_col=1, &
    1377             :                          a_first_row=1, &
    1378             :                          b_first_col=homo + 1, &
    1379             :                          b_first_row=1, &
    1380             :                          c_first_col=1, &
    1381         344 :                          c_first_row=1)
    1382             : 
    1383             :       ! finally release L_mu_q
    1384         344 :       CALL cp_fm_release(L_mu_q)
    1385         344 :       CALL timestop(handle2)
    1386             : 
    1387             :       ! here we should be done next CPHF
    1388             : 
    1389         344 :       DEALLOCATE (pos_info)
    1390             : 
    1391         344 :       CALL timestop(handle)
    1392             : 
    1393        6192 :    END SUBROUTINE create_W_P
    1394             : 
    1395             : END MODULE mp2_ri_grad

Generated by: LCOV version 1.15