LCOV - code coverage report
Current view: top level - src - mp2_direct_method.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 85.2 % 770 656
Test Date: 2025-12-04 06:27:48 Functions: 85.7 % 7 6

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Routines to calculate MP2 energy
      10              : !> \par History
      11              : !>      06.2011 created [Mauro Del Ben]
      12              : !> \author Mauro Del Ben
      13              : ! **************************************************************************************************
      14              : MODULE mp2_direct_method
      15              :    USE cell_types,                      ONLY: cell_type
      16              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      17              :                                               cp_logger_type
      18              :    USE hfx_energy_potential,            ONLY: coulomb4
      19              :    USE hfx_load_balance_methods,        ONLY: cost_model,&
      20              :                                               p1_energy,&
      21              :                                               p2_energy,&
      22              :                                               p3_energy
      23              :    USE hfx_pair_list_methods,           ONLY: build_pair_list_mp2
      24              :    USE hfx_types,                       ONLY: hfx_basis_type,&
      25              :                                               hfx_pgf_list,&
      26              :                                               hfx_pgf_product_list,&
      27              :                                               hfx_potential_type,&
      28              :                                               hfx_screen_coeff_type,&
      29              :                                               hfx_type,&
      30              :                                               pair_set_list_type
      31              :    USE input_constants,                 ONLY: do_potential_TShPSC
      32              :    USE kinds,                           ONLY: dp,&
      33              :                                               int_8
      34              :    USE libint_wrapper,                  ONLY: cp_libint_t
      35              :    USE machine,                         ONLY: m_flush
      36              :    USE mathconstants,                   ONLY: zero
      37              :    USE message_passing,                 ONLY: mp_para_env_release,&
      38              :                                               mp_para_env_type
      39              :    USE mp2_ri_libint,                   ONLY: prepare_integral_calc
      40              :    USE mp2_types,                       ONLY: init_TShPSC_lmax,&
      41              :                                               mp2_biel_type,&
      42              :                                               mp2_type,&
      43              :                                               pair_list_type_mp2
      44              :    USE orbital_pointers,                ONLY: ncoset
      45              :    USE particle_types,                  ONLY: particle_type
      46              :    USE qs_environment_types,            ONLY: qs_environment_type
      47              :    USE t_sh_p_s_c,                      ONLY: free_C0
      48              : #include "./base/base_uses.f90"
      49              : 
      50              :    IMPLICIT NONE
      51              :    PRIVATE
      52              : 
      53              :    PUBLIC ::  mp2_direct_energy, mp2_canonical_direct_single_batch
      54              : 
      55              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'mp2_direct_method'
      56              : 
      57              : !***
      58              : 
      59              : CONTAINS
      60              : 
      61              : ! **************************************************************************************************
      62              : !> \brief ...
      63              : !> \param dimen ...
      64              : !> \param occ_i ...
      65              : !> \param occ_j ...
      66              : !> \param mp2_biel ...
      67              : !> \param mp2_env ...
      68              : !> \param C_i ...
      69              : !> \param Auto_i ...
      70              : !> \param Emp2 ...
      71              : !> \param Emp2_Cou ...
      72              : !> \param Emp2_ex ...
      73              : !> \param qs_env ...
      74              : !> \param para_env ...
      75              : !> \param unit_nr ...
      76              : !> \param C_j ...
      77              : !> \param Auto_j ...
      78              : ! **************************************************************************************************
      79           26 :    SUBROUTINE mp2_direct_energy(dimen, occ_i, occ_j, mp2_biel, mp2_env, C_i, Auto_i, Emp2, Emp2_Cou, Emp2_ex, &
      80           26 :                                 qs_env, para_env, unit_nr, C_j, Auto_j)
      81              :       INTEGER                                            :: dimen, occ_i, occ_j
      82              :       TYPE(mp2_biel_type)                                :: mp2_biel
      83              :       TYPE(mp2_type)                                     :: mp2_env
      84              :       REAL(KIND=dp), DIMENSION(dimen, dimen)             :: C_i
      85              :       REAL(KIND=dp), DIMENSION(dimen)                    :: Auto_i
      86              :       REAL(KIND=dp)                                      :: Emp2, Emp2_Cou, Emp2_ex
      87              :       TYPE(qs_environment_type), POINTER                 :: qs_env
      88              :       TYPE(mp_para_env_type), POINTER                    :: para_env
      89              :       INTEGER                                            :: unit_nr
      90              :       REAL(KIND=dp), DIMENSION(dimen, dimen), OPTIONAL   :: C_j
      91              :       REAL(KIND=dp), DIMENSION(dimen), OPTIONAL          :: Auto_j
      92              : 
      93              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'mp2_direct_energy'
      94              :       REAL(KIND=dp), PARAMETER                           :: zero = 0.D+00
      95              : 
      96              :       INTEGER :: batch_number, color_sub, counter, elements_ij_proc, group_counter, handle, i, &
      97              :          i_batch, i_batch_start, i_group_counter, j, j_batch_start, j_group_counter, last_batch, &
      98              :          max_batch_number, max_batch_size, max_set, minimum_memory_needed, my_batch_size, &
      99              :          my_I_batch_size, my_I_occupied_end, my_I_occupied_start, my_J_batch_size, &
     100              :          my_J_occupied_end, my_J_occupied_start, natom, Ni_occupied, Nj_occupied, number_groups, &
     101              :          number_i_subset, number_j_subset, one, sqrt_number_groups, total_I_size_batch_group, &
     102              :          total_J_size_batch_group, virt_i, virt_j
     103           26 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: batch_sizes, batch_sizes_tmp, &
     104           26 :                                                             vector_batch_I_size_group, &
     105           26 :                                                             vector_batch_J_size_group
     106           26 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: ij_list_proc, ij_list_proc_temp, &
     107           26 :                                                             ij_matrix
     108              :       LOGICAL                                            :: alpha_beta_case
     109              :       TYPE(mp_para_env_type), POINTER                    :: para_env_sub
     110              : 
     111           26 :       CALL timeset(routineN, handle)
     112              : 
     113           26 :       alpha_beta_case = .FALSE.
     114           26 :       IF (PRESENT(C_j) .AND. PRESENT(Auto_j)) alpha_beta_case = .TRUE.
     115              : 
     116           26 :       IF (unit_nr > 0 .AND. mp2_env%potential_parameter%potential_type == do_potential_TShPSC) THEN
     117            1 :          IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T64,F12.6,A5)') 'Truncated MP2 method, Rt=', &
     118            2 :             mp2_env%potential_parameter%cutoff_radius, ' Bohr'
     119              :       END IF
     120              : 
     121              :       ! create the local para env
     122              :       ! each para_env_sub corresponds to a group that is going to compute
     123              :       ! all the integrals. To each group a batch I is assigned and the
     124              :       ! communication takes place only inside the group
     125           26 :       number_groups = para_env%num_pe/mp2_env%mp2_num_proc
     126           26 :       IF (number_groups*mp2_env%mp2_num_proc /= para_env%num_pe) THEN
     127            0 :          CPABORT(" The number of processors needs to be a multiple of the processors per group. ")
     128              :       END IF
     129           26 :       IF (number_groups > occ_i*occ_j) THEN
     130            2 :          IF (unit_nr > 0) WRITE (unit_nr, '(T3,A)') 'Number of groups greater then the number of IJ pairs!'
     131            2 :          IF (unit_nr > 0) WRITE (unit_nr, '(T3,A)') 'Consider using more processors per group for improved efficiency'
     132              :       END IF
     133              : 
     134           26 :       color_sub = para_env%mepos/mp2_env%mp2_num_proc
     135           26 :       ALLOCATE (para_env_sub)
     136           26 :       CALL para_env_sub%from_split(para_env, color_sub)
     137              : 
     138              :       ! calculate the maximal size of the batch, according to the maximum RS size
     139           26 :       max_set = SIZE(mp2_biel%index_table, 2)
     140           26 :       minimum_memory_needed = (8*(max_set**4))/1024**2
     141           26 :       IF (minimum_memory_needed > mp2_env%mp2_memory) THEN
     142            0 :          IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T66,F12.6,A3)') 'Memory required below the minimum, new memory:', &
     143            0 :             minimum_memory_needed, 'MiB'
     144            0 :          mp2_env%mp2_memory = minimum_memory_needed
     145              :       END IF
     146              : 
     147              :       ! Distribute the batches over the groups in
     148              :       ! a rectangular fashion, bigger size for J index
     149              :       ! the sizes of the I batches should be as small as possible
     150           26 :       sqrt_number_groups = INT(SQRT(REAL(number_groups, KIND=dp)))
     151           26 :       DO i = 1, number_groups
     152           26 :          IF (MOD(number_groups, i) == 0) THEN
     153           26 :             IF (sqrt_number_groups/i <= 1) THEN
     154              :                number_j_subset = i
     155              :                EXIT
     156              :             END IF
     157              :          END IF
     158              :       END DO
     159           26 :       number_i_subset = number_groups/number_j_subset
     160              : 
     161           26 :       IF (number_i_subset < number_j_subset) THEN
     162            0 :          number_i_subset = number_j_subset
     163            0 :          number_j_subset = number_groups/number_i_subset
     164              :       END IF
     165              : 
     166              :       ! Distribute the I index and the J index over groups
     167           26 :       total_I_size_batch_group = occ_i/number_i_subset
     168           26 :       IF (total_I_size_batch_group < 1) total_I_size_batch_group = 1
     169           78 :       ALLOCATE (vector_batch_I_size_group(0:number_i_subset - 1))
     170              : 
     171           78 :       vector_batch_I_size_group = 0
     172           78 :       DO i = 0, number_i_subset - 1
     173           78 :          vector_batch_I_size_group(i) = total_I_size_batch_group
     174              :       END DO
     175           78 :       IF (SUM(vector_batch_I_size_group) /= occ_i) THEN
     176           54 :          one = 1
     177           54 :          IF (SUM(vector_batch_I_size_group) > occ_i) one = -1
     178           18 :          i = -1
     179              :          DO
     180           18 :             i = i + 1
     181           18 :             vector_batch_I_size_group(i) = vector_batch_I_size_group(i) + one
     182           54 :             IF (SUM(vector_batch_I_size_group) == occ_i) EXIT
     183           18 :             IF (i == number_i_subset - 1) i = -1
     184              :          END DO
     185              :       END IF
     186              : 
     187           26 :       total_J_size_batch_group = occ_j/number_j_subset
     188           26 :       IF (total_J_size_batch_group < 1) total_J_size_batch_group = 1
     189           78 :       ALLOCATE (vector_batch_J_size_group(0:number_j_subset - 1))
     190              : 
     191           52 :       vector_batch_J_size_group = 0
     192           52 :       DO i = 0, number_J_subset - 1
     193           52 :          vector_batch_J_size_group(i) = total_J_size_batch_group
     194              :       END DO
     195           52 :       IF (SUM(vector_batch_J_size_group) /= occ_j) THEN
     196            0 :          one = 1
     197            0 :          IF (SUM(vector_batch_J_size_group) > occ_j) one = -1
     198            0 :          i = -1
     199              :          DO
     200            0 :             i = i + 1
     201            0 :             vector_batch_J_size_group(i) = vector_batch_J_size_group(i) + one
     202            0 :             IF (SUM(vector_batch_J_size_group) == occ_j) EXIT
     203            0 :             IF (i == number_J_subset - 1) i = -1
     204              :          END DO
     205              :       END IF
     206              : 
     207              :       ! now the starting and ending I and J occupied orbitals are assigned to each group
     208           26 :       group_counter = 0
     209           26 :       i_group_counter = 0
     210           26 :       my_I_occupied_start = 1
     211           39 :       DO i = 0, number_i_subset - 1
     212              :          my_J_occupied_start = 1
     213              :          j_group_counter = 0
     214           52 :          DO j = 0, number_j_subset - 1
     215           39 :             group_counter = group_counter + 1
     216           39 :             IF (color_sub == group_counter - 1) EXIT
     217           13 :             my_J_occupied_start = my_J_occupied_start + vector_batch_J_size_group(j)
     218           52 :             j_group_counter = j_group_counter + 1
     219              :          END DO
     220           39 :          IF (color_sub == group_counter - 1) EXIT
     221           13 :          my_I_occupied_start = my_I_occupied_start + vector_batch_I_size_group(i)
     222           39 :          i_group_counter = i_group_counter + 1
     223              :       END DO
     224           26 :       my_I_occupied_end = my_I_occupied_start + vector_batch_I_size_group(i_group_counter) - 1
     225           26 :       my_I_batch_size = vector_batch_I_size_group(i_group_counter)
     226           26 :       my_J_occupied_end = my_J_occupied_start + vector_batch_J_size_group(j_group_counter) - 1
     227           26 :       my_J_batch_size = vector_batch_J_size_group(j_group_counter)
     228              : 
     229           26 :       DEALLOCATE (vector_batch_I_size_group)
     230           26 :       DEALLOCATE (vector_batch_J_size_group)
     231              : 
     232              :       max_batch_size = MIN( &
     233              :                        MAX(1, &
     234              :                            INT(mp2_env%mp2_memory*INT(1024, KIND=int_8)**2/ &
     235              :                                (8*(2*dimen - occ_i)*INT(dimen, KIND=int_8)*my_J_batch_size/para_env_sub%num_pe))) &
     236           26 :                        , my_I_batch_size)
     237           26 :       IF (max_batch_size < 1) THEN
     238            1 :          max_batch_size = INT((8*(occ_i + 1)*INT(dimen, KIND=int_8)**2/para_env%num_pe)/1024**2)
     239            1 :          IF (unit_nr > 0) WRITE (unit_nr, '(T3,A,T72,I6,A3)') 'More memory required, at least:', max_batch_size, 'MiB'
     240            1 :          max_batch_size = 1
     241              :       END IF
     242              : 
     243              :       ! create the size of the batches inside the group
     244           26 :       my_batch_size = my_I_batch_size
     245           77 :       ALLOCATE (batch_sizes(my_batch_size))
     246              : 
     247           85 :       batch_sizes = -HUGE(0)
     248              :       batch_number = 0
     249           51 :       DO i = 1, my_batch_size
     250           45 :          IF (i*max_batch_size > my_batch_size) EXIT
     251           25 :          batch_number = batch_number + 1
     252           51 :          batch_sizes(i) = max_batch_size
     253              :       END DO
     254           26 :       last_batch = my_batch_size - max_batch_size*batch_number
     255           26 :       IF (last_batch > 0) THEN
     256            0 :          batch_number = batch_number + 1
     257            0 :          batch_sizes(batch_number) = last_batch
     258              :       END IF
     259              : 
     260           77 :       ALLOCATE (batch_sizes_tmp(batch_number))
     261           51 :       batch_sizes_tmp(1:batch_number) = batch_sizes(1:batch_number)
     262           26 :       DEALLOCATE (batch_sizes)
     263           51 :       ALLOCATE (batch_sizes(batch_number))
     264           51 :       batch_sizes(:) = batch_sizes_tmp
     265           26 :       DEALLOCATE (batch_sizes_tmp)
     266              : 
     267           51 :       max_batch_size = MAXVAL(batch_sizes)
     268           26 :       CALL para_env%max(max_batch_size)
     269           26 :       max_batch_number = batch_number
     270           26 :       CALL para_env%max(max_batch_number)
     271           26 :       IF (unit_nr > 0) THEN
     272           13 :          WRITE (unit_nr, '(T3,A,T76,I5)') 'Maximum used batch size: ', max_batch_size
     273           13 :          WRITE (unit_nr, '(T3,A,T76,I5)') 'Number of integral recomputations: ', max_batch_number
     274           13 :          CALL m_flush(unit_nr)
     275              :       END IF
     276              : 
     277              :       ! Batches sizes exceed the occupied orbitals allocated for group
     278           51 :       CPASSERT(SUM(batch_sizes) <= my_batch_size)
     279              : 
     280           26 :       virt_i = dimen - occ_i
     281           26 :       virt_j = dimen - occ_j
     282           26 :       natom = SIZE(mp2_biel%index_table, 1)
     283              : 
     284           26 :       CALL para_env%sync()
     285           26 :       Emp2 = zero
     286           26 :       Emp2_Cou = zero
     287           26 :       Emp2_ex = zero
     288           26 :       i_batch_start = my_I_occupied_start - 1
     289           26 :       j_batch_start = my_J_occupied_start - 1
     290           26 :       Nj_occupied = my_J_batch_size
     291           51 :       DO i_batch = 1, batch_number
     292              : 
     293           25 :          Ni_occupied = batch_sizes(i_batch)
     294              : 
     295           25 :          counter = -1
     296          100 :          ALLOCATE (ij_matrix(Ni_occupied, Nj_occupied))
     297              : 
     298          456 :          ij_matrix = 0
     299           84 :          DO i = 1, Ni_occupied
     300          402 :             DO j = 1, Nj_occupied
     301          318 :                counter = counter + 1
     302          377 :                IF (MOD(counter, para_env_sub%num_pe) == para_env_sub%mepos) THEN
     303          318 :                   ij_matrix(i, j) = ij_matrix(i, j) + 1
     304              :                END IF
     305              :             END DO
     306              :          END DO
     307              : 
     308           75 :          ALLOCATE (ij_list_proc_temp(Ni_occupied*occ_j, 2))
     309              : 
     310           25 :          elements_ij_proc = 0
     311           84 :          DO i = 1, Ni_occupied
     312          402 :             DO j = 1, Nj_occupied
     313          318 :                IF (ij_matrix(i, j) == 0) CYCLE
     314          318 :                elements_ij_proc = elements_ij_proc + 1
     315          318 :                ij_list_proc_temp(elements_ij_proc, 1) = i
     316          377 :                ij_list_proc_temp(elements_ij_proc, 2) = j
     317              :             END DO
     318              :          END DO
     319           25 :          DEALLOCATE (ij_matrix)
     320              : 
     321           75 :          ALLOCATE (ij_list_proc(elements_ij_proc, 2))
     322          343 :          DO i = 1, elements_ij_proc
     323          318 :             ij_list_proc(i, 1) = ij_list_proc_temp(i, 1)
     324          343 :             ij_list_proc(i, 2) = ij_list_proc_temp(i, 2)
     325              :          END DO
     326           25 :          DEALLOCATE (ij_list_proc_temp)
     327              : 
     328           25 :          IF (.NOT. alpha_beta_case) THEN
     329              :             CALL mp2_canonical_direct_single_batch(Emp2, Emp2_Cou, Emp2_ex, mp2_env, qs_env, para_env_sub, &
     330              :                                                    mp2_biel, dimen, C_i, Auto_i, i_batch_start, Ni_occupied, occ_i, &
     331           21 :                                                    elements_ij_proc, ij_list_proc, Nj_occupied, j_batch_start)
     332              :          ELSE
     333              :             CALL mp2_canonical_direct_single_batch(Emp2, Emp2_Cou, Emp2_ex, mp2_env, qs_env, para_env_sub, &
     334              :                                                    mp2_biel, dimen, C_i, Auto_i, i_batch_start, Ni_occupied, occ_i, &
     335              :                                                    elements_ij_proc, ij_list_proc, Nj_occupied, j_batch_start, &
     336            4 :                                                    occ_j, C_j, Auto_j)
     337              :          END IF
     338              : 
     339           25 :          i_batch_start = i_batch_start + Ni_occupied
     340              : 
     341           51 :          DEALLOCATE (ij_list_proc)
     342              : 
     343              :       END DO
     344              : 
     345           26 :       CALL para_env%sum(Emp2_Cou)
     346           26 :       CALL para_env%sum(Emp2_Ex)
     347           26 :       CALL para_env%sum(Emp2)
     348              : 
     349           26 :       CALL mp_para_env_release(para_env_sub)
     350              : 
     351           26 :       CALL timestop(handle)
     352              : 
     353           52 :    END SUBROUTINE mp2_direct_energy
     354              : 
     355              : ! **************************************************************************************************
     356              : !> \brief ...
     357              : !> \param Emp2 ...
     358              : !> \param Emp2_Cou ...
     359              : !> \param Emp2_ex ...
     360              : !> \param mp2_env ...
     361              : !> \param qs_env ...
     362              : !> \param para_env ...
     363              : !> \param mp2_biel ...
     364              : !> \param dimen ...
     365              : !> \param C ...
     366              : !> \param Auto ...
     367              : !> \param i_batch_start ...
     368              : !> \param Ni_occupied ...
     369              : !> \param occupied ...
     370              : !> \param elements_ij_proc ...
     371              : !> \param ij_list_proc ...
     372              : !> \param Nj_occupied ...
     373              : !> \param j_batch_start ...
     374              : !> \param occupied_beta ...
     375              : !> \param C_beta ...
     376              : !> \param Auto_beta ...
     377              : !> \param Integ_MP2 ...
     378              : !> \par History
     379              : !>      06.2011 created [Mauro Del Ben]
     380              : !> \author Mauro Del Ben
     381              : ! **************************************************************************************************
     382           43 :    SUBROUTINE mp2_canonical_direct_single_batch(Emp2, Emp2_Cou, Emp2_ex, mp2_env, qs_env, para_env, &
     383           86 :                                                 mp2_biel, dimen, C, Auto, i_batch_start, Ni_occupied, &
     384           43 :                                                 occupied, elements_ij_proc, ij_list_proc, Nj_occupied, j_batch_start, &
     385           10 :                                                 occupied_beta, C_beta, Auto_beta, Integ_MP2)
     386              : 
     387              :       REAL(KIND=dp), INTENT(INOUT)                       :: Emp2, Emp2_Cou, Emp2_ex
     388              :       TYPE(mp2_type)                                     :: mp2_env
     389              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     390              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
     391              :       TYPE(mp2_biel_type), INTENT(IN)                    :: mp2_biel
     392              :       INTEGER, INTENT(IN)                                :: dimen
     393              :       REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C
     394              :       REAL(KIND=dp), DIMENSION(dimen), INTENT(IN)        :: Auto
     395              :       INTEGER, INTENT(IN)                                :: i_batch_start, Ni_occupied, occupied, &
     396              :                                                             elements_ij_proc
     397              :       INTEGER, DIMENSION(elements_ij_proc, 2), &
     398              :          INTENT(IN)                                      :: ij_list_proc
     399              :       INTEGER, INTENT(IN)                                :: Nj_occupied, j_batch_start
     400              :       INTEGER, INTENT(IN), OPTIONAL                      :: occupied_beta
     401              :       REAL(KIND=dp), DIMENSION(dimen, dimen), &
     402              :          INTENT(IN), OPTIONAL                            :: C_beta
     403              :       REAL(KIND=dp), DIMENSION(dimen), INTENT(IN), &
     404              :          OPTIONAL                                        :: Auto_beta
     405              :       REAL(KIND=dp), ALLOCATABLE, &
     406              :          DIMENSION(:, :, :, :), INTENT(OUT), OPTIONAL    :: Integ_MP2
     407              : 
     408              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'mp2_canonical_direct_single_batch'
     409              : 
     410              :       INTEGER :: case_index, counter_proc, elements_ij_proc_rec, elements_kl_proc, global_counter, &
     411              :          handle, i, i_list_ij, i_list_kl, i_set_list_ij, i_set_list_ij_start, i_set_list_ij_stop, &
     412              :          i_set_list_kl, i_set_list_kl_start, i_set_list_kl_stop, i_start, iatom, iatom_end, &
     413              :          iatom_start, iiB, ij_elem_max, ikind, index_ij_rec, index_ij_send, index_kl, &
     414              :          index_proc_ij, index_proc_shift, iset, jatom, jatom_end, jatom_start, jjB, jkind, jset, &
     415              :          katom, katom_end, katom_start, kkB, kkind, kset, latom, latom_end, latom_start, lkind, &
     416              :          llB, lset, max_num_call_sec_transf, max_pgf, max_set, multiple
     417              :       INTEGER :: my_num_call_sec_transf, natom, ncob, nints, nseta, nsetb, nsgf_max, nspins, &
     418              :          primitive_counter, proc_num, proc_receive, proc_send, R_offset_rec, Rsize_rec, &
     419              :          S_offset_rec, same_size_kl_index, sphi_a_u1, sphi_a_u2, sphi_a_u3, sphi_b_u1, sphi_b_u2, &
     420              :          sphi_b_u3, sphi_c_u1, sphi_c_u2, sphi_c_u3, sphi_d_u1, sphi_d_u2, sphi_d_u3, Ssize_rec, &
     421              :          step_size, total_num_RS_task
     422              :       INTEGER(int_8)                                     :: estimate_to_store_int, neris_tmp, &
     423              :                                                             neris_total, nprim_ints
     424           43 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: kind_of, nimages, proc_num_task, &
     425           43 :                                                             same_size_kl_elements_counter
     426           43 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: kl_list_proc, task_counter_RS, &
     427           43 :                                                             task_counter_RS_temp
     428              :       INTEGER, DIMENSION(4)                              :: RS_counter_temp
     429              :       INTEGER, DIMENSION(5)                              :: size_parameter_rec, size_parameter_send
     430           43 :       INTEGER, DIMENSION(:), POINTER                     :: la_max, la_min, lb_max, lb_min, lc_max, &
     431           43 :                                                             lc_min, ld_max, ld_min, npgfa, npgfb, &
     432           43 :                                                             npgfc, npgfd, nsgfa, nsgfb, nsgfc, &
     433           43 :                                                             nsgfd
     434           43 :       INTEGER, DIMENSION(:, :), POINTER                  :: nsgfl_a, nsgfl_b, nsgfl_c, nsgfl_d
     435              :       LOGICAL                                            :: alpha_beta_case, case_send_receive, &
     436              :                                                             copy_integrals, do_periodic
     437           43 :       LOGICAL, DIMENSION(:, :), POINTER                  :: shm_atomic_pair_list
     438              :       REAL(KIND=dp) :: cartesian_estimate, coeffs_kind_max0, cost_tmp, eps_schwarz, ln_10, &
     439              :          log10_eps_schwarz, log10_pmax, max_contraction_val, max_val1, max_val2, max_val2_set, &
     440              :          pmax_atom, pmax_entry, ra(3), rab2, rb(3), rc(3), rcd2, rd(3), screen_kind_ij, &
     441              :          screen_kind_kl
     442           43 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: cost_RS, cost_RS_temp, ee_buffer1, &
     443           43 :                                                             ee_buffer2, ee_primitives_tmp, &
     444           43 :                                                             ee_work, ee_work2, primitive_integrals
     445           43 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: BIb_RS_mat_rec, C_beta_T, max_contraction
     446           43 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BIb, BIb_RS_mat_rec_big, zero_mat_big
     447           43 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :, :)  :: BI1, MNRS
     448           43 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: p_work
     449           43 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: shm_pmax_block, zeta, zetb, zetc, zetd
     450           43 :       REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: sphi_a_ext_set, sphi_b_ext_set, &
     451           43 :                                                             sphi_c_ext_set, sphi_d_ext_set
     452           43 :       REAL(KIND=dp), DIMENSION(:, :, :, :), POINTER      :: sphi_a_ext, sphi_b_ext, sphi_c_ext, &
     453           43 :                                                             sphi_d_ext
     454           86 :       REAL(KIND=dp), DIMENSION(dimen, 2)                 :: zero_mat
     455           86 :       REAL(KIND=dp), DIMENSION(dimen, dimen)             :: C_T
     456              :       TYPE(cell_type), POINTER                           :: cell
     457              :       TYPE(cp_libint_t)                                  :: private_lib
     458              :       TYPE(cp_logger_type), POINTER                      :: logger
     459           43 :       TYPE(hfx_basis_type), DIMENSION(:), POINTER        :: basis_parameter
     460           43 :       TYPE(hfx_pgf_list), ALLOCATABLE, DIMENSION(:)      :: pgf_list_ij, pgf_list_kl
     461              :       TYPE(hfx_pgf_product_list), ALLOCATABLE, &
     462           43 :          DIMENSION(:)                                    :: pgf_product_list
     463              :       TYPE(hfx_potential_type)                           :: mp2_potential_parameter
     464              :       TYPE(hfx_screen_coeff_type), DIMENSION(:, :), &
     465           43 :          POINTER                                         :: screen_coeffs_kind, tmp_R_1, tmp_R_2, &
     466           43 :                                                             tmp_screen_pgf1, tmp_screen_pgf2
     467              :       TYPE(hfx_screen_coeff_type), &
     468           43 :          DIMENSION(:, :, :, :), POINTER                  :: screen_coeffs_set
     469              :       TYPE(hfx_screen_coeff_type), &
     470           43 :          DIMENSION(:, :, :, :, :, :), POINTER            :: radii_pgf, screen_coeffs_pgf
     471              :       TYPE(hfx_type), POINTER                            :: actual_x_data
     472           43 :       TYPE(pair_list_type_mp2)                           :: list_ij, list_kl
     473              :       TYPE(pair_set_list_type), ALLOCATABLE, &
     474           43 :          DIMENSION(:)                                    :: set_list_ij, set_list_kl
     475           43 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     476              : 
     477           43 :       CALL timeset(routineN, handle)
     478              : 
     479              :       ! The Integ_MP2 will contain the (ia|jb) integrals, necessary for example
     480              :       ! for the RI-MP2 basis optimization. In this case the number of ij batches
     481              :       ! has to be equal to 1 (all integrals over molecular orbitals are computed
     482              :       ! in a single step).
     483           43 :       copy_integrals = .FALSE.
     484           43 :       IF (PRESENT(Integ_MP2)) copy_integrals = .TRUE.
     485              : 
     486           43 :       alpha_beta_case = .FALSE.
     487              : 
     488              :       CALL prepare_integral_calc(cell, qs_env, mp2_env, para_env, mp2_potential_parameter, actual_x_data, &
     489              :                                  do_periodic, basis_parameter, max_set, particle_set, natom, kind_of, &
     490              :                                  nsgf_max, primitive_integrals, ee_work, ee_work2, ee_buffer1, ee_buffer2, &
     491              :                                  ee_primitives_tmp, nspins, max_contraction, max_pgf, pgf_list_ij, &
     492              :                                  pgf_list_kl, pgf_product_list, nimages, eps_schwarz, log10_eps_schwarz, &
     493              :                                  private_lib, p_work, screen_coeffs_set, screen_coeffs_kind, screen_coeffs_pgf, &
     494           43 :                                  radii_pgf)
     495              : 
     496           43 :       ln_10 = LOG(10.0_dp)
     497              : 
     498           43 :       neris_tmp = 0_int_8
     499           43 :       neris_total = 0_int_8
     500           43 :       nprim_ints = 0_int_8
     501              : 
     502              : !!!!!!!!!
     503          880 :       ALLOCATE (list_ij%elements(natom**2))
     504          837 :       ALLOCATE (list_kl%elements(natom**2))
     505              : !!!!!!!!!
     506              : 
     507          201 :       coeffs_kind_max0 = MAXVAL(screen_coeffs_kind(:, :)%x(2))
     508        15190 :       ALLOCATE (set_list_ij((max_set*natom)**2))
     509        15147 :       ALLOCATE (set_list_kl((max_set*natom)**2))
     510              : 
     511              :       !! precalculate maximum density matrix elements in blocks
     512          365 :       actual_x_data%pmax_block = 0.0_dp
     513           43 :       shm_pmax_block => actual_x_data%pmax_block
     514              : 
     515           43 :       shm_atomic_pair_list => actual_x_data%atomic_pair_list
     516              : 
     517           43 :       iatom_start = 1
     518           43 :       iatom_end = natom
     519           43 :       jatom_start = 1
     520           43 :       jatom_end = natom
     521           43 :       katom_start = 1
     522           43 :       katom_end = natom
     523           43 :       latom_start = 1
     524           43 :       latom_end = natom
     525              : 
     526              :       CALL build_pair_list_mp2(natom, list_ij, set_list_ij, iatom_start, iatom_end, &
     527              :                                jatom_start, jatom_end, &
     528              :                                kind_of, basis_parameter, particle_set, &
     529              :                                do_periodic, screen_coeffs_set, screen_coeffs_kind, &
     530              :                                coeffs_kind_max0, log10_eps_schwarz, cell, 0.D+00, &
     531           43 :                                shm_atomic_pair_list)
     532              : 
     533              :       CALL build_pair_list_mp2(natom, list_kl, set_list_kl, katom_start, katom_end, &
     534              :                                latom_start, latom_end, &
     535              :                                kind_of, basis_parameter, particle_set, &
     536              :                                do_periodic, screen_coeffs_set, screen_coeffs_kind, &
     537              :                                coeffs_kind_max0, log10_eps_schwarz, cell, 0.D+00, &
     538           43 :                                shm_atomic_pair_list)
     539              : 
     540          215 :       ALLOCATE (BIb(dimen, dimen, elements_ij_proc))
     541      1296103 :       BIb = 0.0D+00
     542        64195 :       C_T = TRANSPOSE(C)
     543              : 
     544           43 :       IF (PRESENT(occupied_beta) .AND. PRESENT(C_beta) .AND. PRESENT(Auto_beta)) THEN
     545           40 :          ALLOCATE (C_beta_T(dimen, dimen))
     546         9322 :          C_beta_T(:, :) = TRANSPOSE(C_beta)
     547              :          alpha_beta_case = .TRUE.
     548              :       END IF
     549              : 
     550           43 :       ij_elem_max = elements_ij_proc
     551           43 :       CALL para_env%max(ij_elem_max)
     552              : 
     553              :       ! calculate the minimum multiple of num_pe >= to Ni_occupied*occupied, in such a way
     554              :       ! that the i, j loop is performed exactly the same number of time for each procewssor
     555           43 :       multiple = 0
     556              :       DO
     557          402 :          multiple = multiple + para_env%num_pe
     558          402 :          IF (multiple >= Ni_occupied*Nj_occupied) EXIT
     559              :       END DO
     560              : 
     561              :       ! proc_num_task contains the number of times the second occupied
     562              :       ! orbital transformation is called for each processor, needs for balancing
     563              :       ! the point to point send
     564          129 :       ALLOCATE (proc_num_task(0:para_env%num_pe - 1))
     565              : 
     566          104 :       proc_num_task = 0
     567              : 
     568           43 :       counter_proc = 0
     569          204 :       DO i_list_ij = 1, list_ij%n_element
     570          161 :          iatom = list_ij%elements(i_list_ij)%pair(1)
     571          161 :          jatom = list_ij%elements(i_list_ij)%pair(2)
     572          161 :          i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
     573          161 :          i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
     574          161 :          ikind = list_ij%elements(i_list_ij)%kind_pair(1)
     575          161 :          jkind = list_ij%elements(i_list_ij)%kind_pair(2)
     576              : 
     577          161 :          nsgfb => basis_parameter(jkind)%nsgf
     578          161 :          nsgfa => basis_parameter(ikind)%nsgf
     579              : 
     580         6381 :          DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
     581         6177 :             iset = set_list_ij(i_set_list_ij)%pair(1)
     582         6177 :             jset = set_list_ij(i_set_list_ij)%pair(2)
     583         6177 :             IF (iatom == jatom .AND. jset < iset) CYCLE
     584              : 
     585         4707 :             counter_proc = counter_proc + 1
     586         4707 :             proc_num = MOD(counter_proc, para_env%num_pe)
     587              : 
     588         6338 :             proc_num_task(proc_num) = proc_num_task(proc_num) + 1
     589              : 
     590              :          END DO
     591              :       END DO
     592              :       ! calculate the exact maximum number of calls to the second occupied
     593              :       ! orbital transformation
     594              :       ! max_num_call_sec_transf=MAXVAL(proc_num_task)
     595              : 
     596              :       ! distribute the RS pair over all processor
     597          129 :       ALLOCATE (kl_list_proc(proc_num_task(para_env%mepos), 3))
     598              : 
     599        14131 :       kl_list_proc = 0
     600              : 
     601              :       counter_proc = 0
     602              :       elements_kl_proc = 0
     603          204 :       DO i_list_ij = 1, list_ij%n_element
     604          161 :          iatom = list_ij%elements(i_list_ij)%pair(1)
     605          161 :          jatom = list_ij%elements(i_list_ij)%pair(2)
     606          161 :          i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
     607          161 :          i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
     608          161 :          ikind = list_ij%elements(i_list_ij)%kind_pair(1)
     609          161 :          jkind = list_ij%elements(i_list_ij)%kind_pair(2)
     610              : 
     611          161 :          nsgfb => basis_parameter(jkind)%nsgf
     612          161 :          nsgfa => basis_parameter(ikind)%nsgf
     613              : 
     614         6381 :          DO i_set_list_ij = i_set_list_ij_start, i_set_list_ij_stop
     615         6177 :             iset = set_list_ij(i_set_list_ij)%pair(1)
     616         6177 :             jset = set_list_ij(i_set_list_ij)%pair(2)
     617         6177 :             IF (iatom == jatom .AND. jset < iset) CYCLE
     618              : 
     619         4707 :             counter_proc = counter_proc + 1
     620         4707 :             proc_num = MOD(counter_proc, para_env%num_pe)
     621              : 
     622         4868 :             IF (proc_num == para_env%mepos) THEN
     623         4653 :                elements_kl_proc = elements_kl_proc + 1
     624         4653 :                kl_list_proc(elements_kl_proc, 1) = i_list_ij
     625         4653 :                kl_list_proc(elements_kl_proc, 2) = i_set_list_ij
     626         4653 :                kl_list_proc(elements_kl_proc, 3) = counter_proc
     627              :             END IF
     628              : 
     629              :          END DO
     630              :       END DO
     631              : 
     632          104 :       total_num_RS_task = SUM(proc_num_task)
     633          129 :       ALLOCATE (task_counter_RS(total_num_RS_task, 4))
     634              : 
     635          129 :       ALLOCATE (cost_RS(total_num_RS_task))
     636              : 
     637        19043 :       task_counter_RS = 0
     638         4750 :       cost_RS = 0.0_dp
     639              : 
     640          129 :       DO case_index = 1, 2
     641              : 
     642              :          my_num_call_sec_transf = 0
     643         9392 :          DO index_kl = 1, elements_kl_proc
     644              : 
     645         9306 :             i_list_ij = kl_list_proc(index_kl, 1)
     646         9306 :             i_set_list_ij = kl_list_proc(index_kl, 2)
     647              : 
     648         9306 :             iatom = list_ij%elements(i_list_ij)%pair(1)
     649         9306 :             jatom = list_ij%elements(i_list_ij)%pair(2)
     650         9306 :             i_set_list_ij_start = list_ij%elements(i_list_ij)%set_bounds(1)
     651         9306 :             i_set_list_ij_stop = list_ij%elements(i_list_ij)%set_bounds(2)
     652         9306 :             ikind = list_ij%elements(i_list_ij)%kind_pair(1)
     653         9306 :             jkind = list_ij%elements(i_list_ij)%kind_pair(2)
     654        37224 :             ra = list_ij%elements(i_list_ij)%r1
     655        37224 :             rb = list_ij%elements(i_list_ij)%r2
     656         9306 :             rab2 = list_ij%elements(i_list_ij)%dist2
     657              : 
     658         9306 :             la_max => basis_parameter(ikind)%lmax
     659         9306 :             la_min => basis_parameter(ikind)%lmin
     660         9306 :             npgfa => basis_parameter(ikind)%npgf
     661         9306 :             nseta = basis_parameter(ikind)%nset
     662         9306 :             zeta => basis_parameter(ikind)%zet
     663         9306 :             nsgfa => basis_parameter(ikind)%nsgf
     664         9306 :             sphi_a_ext => basis_parameter(ikind)%sphi_ext(:, :, :, :)
     665         9306 :             nsgfl_a => basis_parameter(ikind)%nsgfl
     666         9306 :             sphi_a_u1 = UBOUND(sphi_a_ext, 1)
     667         9306 :             sphi_a_u2 = UBOUND(sphi_a_ext, 2)
     668         9306 :             sphi_a_u3 = UBOUND(sphi_a_ext, 3)
     669              : 
     670         9306 :             lb_max => basis_parameter(jkind)%lmax
     671         9306 :             lb_min => basis_parameter(jkind)%lmin
     672         9306 :             npgfb => basis_parameter(jkind)%npgf
     673         9306 :             nsetb = basis_parameter(jkind)%nset
     674         9306 :             zetb => basis_parameter(jkind)%zet
     675         9306 :             nsgfb => basis_parameter(jkind)%nsgf
     676         9306 :             sphi_b_ext => basis_parameter(jkind)%sphi_ext(:, :, :, :)
     677         9306 :             nsgfl_b => basis_parameter(jkind)%nsgfl
     678         9306 :             sphi_b_u1 = UBOUND(sphi_b_ext, 1)
     679         9306 :             sphi_b_u2 = UBOUND(sphi_b_ext, 2)
     680         9306 :             sphi_b_u3 = UBOUND(sphi_b_ext, 3)
     681              : 
     682         9306 :             iset = set_list_ij(i_set_list_ij)%pair(1)
     683         9306 :             jset = set_list_ij(i_set_list_ij)%pair(2)
     684              : 
     685         9306 :             ncob = npgfb(jset)*ncoset(lb_max(jset))
     686              :             max_val1 = screen_coeffs_set(jset, iset, jkind, ikind)%x(1)*rab2 + &
     687         9306 :                        screen_coeffs_set(jset, iset, jkind, ikind)%x(2)
     688              : 
     689         9306 :             sphi_a_ext_set => sphi_a_ext(:, :, :, iset)
     690         9306 :             sphi_b_ext_set => sphi_b_ext(:, :, :, jset)
     691              : 
     692         9306 :             IF (case_index == 1) THEN
     693         4653 :                global_counter = kl_list_proc(index_kl, 3)
     694         4653 :                task_counter_RS(global_counter, 1) = i_list_ij
     695         4653 :                task_counter_RS(global_counter, 2) = i_set_list_ij
     696         4653 :                task_counter_RS(global_counter, 3) = nsgfb(jset)*nsgfa(iset)
     697              :             END IF
     698              : 
     699         9306 :             IF (ALLOCATED(BI1)) DEALLOCATE (BI1)
     700        55836 :             ALLOCATE (BI1(dimen, Ni_occupied, nsgfb(jset), nsgfa(iset)))
     701              : 
     702     12485758 :             BI1 = 0.D+00
     703              : 
     704        71604 :             DO i_list_kl = 1, list_kl%n_element
     705              : 
     706        62298 :                katom = list_kl%elements(i_list_kl)%pair(1)
     707        62298 :                latom = list_kl%elements(i_list_kl)%pair(2)
     708              : 
     709        62298 :                i_set_list_kl_start = list_kl%elements(i_list_kl)%set_bounds(1)
     710        62298 :                i_set_list_kl_stop = list_kl%elements(i_list_kl)%set_bounds(2)
     711        62298 :                kkind = list_kl%elements(i_list_kl)%kind_pair(1)
     712        62298 :                lkind = list_kl%elements(i_list_kl)%kind_pair(2)
     713       249192 :                rc = list_kl%elements(i_list_kl)%r1
     714       249192 :                rd = list_kl%elements(i_list_kl)%r2
     715        62298 :                rcd2 = list_kl%elements(i_list_kl)%dist2
     716              : 
     717        62298 :                pmax_atom = 0.0_dp
     718              : 
     719              :                screen_kind_ij = screen_coeffs_kind(jkind, ikind)%x(1)*rab2 + &
     720        62298 :                                 screen_coeffs_kind(jkind, ikind)%x(2)
     721              :                screen_kind_kl = screen_coeffs_kind(lkind, kkind)%x(1)*rcd2 + &
     722        62298 :                                 screen_coeffs_kind(lkind, kkind)%x(2)
     723              : 
     724              :                !!!!! Change the loop order
     725        62298 :                IF (max_val1 + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
     726              :                !!!!!
     727        62274 :                IF (screen_kind_ij + screen_kind_kl + pmax_atom < log10_eps_schwarz) CYCLE
     728              : 
     729        62274 :                lc_max => basis_parameter(kkind)%lmax
     730        62274 :                lc_min => basis_parameter(kkind)%lmin
     731        62274 :                npgfc => basis_parameter(kkind)%npgf
     732        62274 :                zetc => basis_parameter(kkind)%zet
     733        62274 :                nsgfc => basis_parameter(kkind)%nsgf
     734        62274 :                sphi_c_ext => basis_parameter(kkind)%sphi_ext(:, :, :, :)
     735        62274 :                nsgfl_c => basis_parameter(kkind)%nsgfl
     736        62274 :                sphi_c_u1 = UBOUND(sphi_c_ext, 1)
     737        62274 :                sphi_c_u2 = UBOUND(sphi_c_ext, 2)
     738        62274 :                sphi_c_u3 = UBOUND(sphi_c_ext, 3)
     739              : 
     740        62274 :                ld_max => basis_parameter(lkind)%lmax
     741        62274 :                ld_min => basis_parameter(lkind)%lmin
     742        62274 :                npgfd => basis_parameter(lkind)%npgf
     743        62274 :                zetd => basis_parameter(lkind)%zet
     744        62274 :                nsgfd => basis_parameter(lkind)%nsgf
     745        62274 :                sphi_d_ext => basis_parameter(lkind)%sphi_ext(:, :, :, :)
     746        62274 :                nsgfl_d => basis_parameter(lkind)%nsgfl
     747        62274 :                sphi_d_u1 = UBOUND(sphi_d_ext, 1)
     748        62274 :                sphi_d_u2 = UBOUND(sphi_d_ext, 2)
     749        62274 :                sphi_d_u3 = UBOUND(sphi_d_ext, 3)
     750              : 
     751      2761710 :                DO i_set_list_kl = i_set_list_kl_start, i_set_list_kl_stop
     752      2690130 :                   kset = set_list_kl(i_set_list_kl)%pair(1)
     753      2690130 :                   lset = set_list_kl(i_set_list_kl)%pair(2)
     754              : 
     755      2690130 :                   IF (katom == latom .AND. lset < kset) CYCLE
     756              : 
     757              :                   max_val2_set = (screen_coeffs_set(lset, kset, lkind, kkind)%x(1)*rcd2 + &
     758      2076726 :                                   screen_coeffs_set(lset, kset, lkind, kkind)%x(2))
     759      2076726 :                   max_val2 = max_val1 + max_val2_set
     760              : 
     761              :                   !! Near field screening
     762      2076726 :                   IF (max_val2 + pmax_atom < log10_eps_schwarz) CYCLE
     763      2023538 :                   sphi_c_ext_set => sphi_c_ext(:, :, :, kset)
     764      2023538 :                   sphi_d_ext_set => sphi_d_ext(:, :, :, lset)
     765              :                   !! get max_vals if we screen on initial density
     766      2023538 :                   pmax_entry = 0.0_dp
     767              : 
     768      2023538 :                   log10_pmax = pmax_entry
     769      2023538 :                   max_val2 = max_val2 + log10_pmax
     770              :                   IF (max_val2 < log10_eps_schwarz) CYCLE
     771      2023538 :                   pmax_entry = EXP(log10_pmax*ln_10)
     772              : 
     773      2085836 :                   IF (case_index == 2) THEN
     774      1011769 :                      IF (ALLOCATED(MNRS)) DEALLOCATE (MNRS)
     775      6070614 :                      ALLOCATE (MNRS(nsgfd(lset), nsgfc(kset), nsgfb(jset), nsgfa(iset)))
     776              : 
     777     82996187 :                      MNRS = 0.D+00
     778              : 
     779              :                      max_contraction_val = max_contraction(iset, iatom)* &
     780              :                                            max_contraction(jset, jatom)* &
     781              :                                            max_contraction(kset, katom)* &
     782      1011769 :                                            max_contraction(lset, latom)*pmax_entry
     783      1011769 :                      tmp_R_1 => radii_pgf(:, :, jset, iset, jkind, ikind)
     784      1011769 :                      tmp_R_2 => radii_pgf(:, :, lset, kset, lkind, kkind)
     785      1011769 :                      tmp_screen_pgf1 => screen_coeffs_pgf(:, :, jset, iset, jkind, ikind)
     786      1011769 :                      tmp_screen_pgf2 => screen_coeffs_pgf(:, :, lset, kset, lkind, kkind)
     787              : 
     788              :                      CALL coulomb4(private_lib, ra, rb, rc, rd, npgfa(iset), npgfb(jset), npgfc(kset), npgfd(lset), &
     789              :                                    la_min(iset), la_max(iset), lb_min(jset), lb_max(jset), &
     790              :                                    lc_min(kset), lc_max(kset), ld_min(lset), ld_max(lset), &
     791              :                                    nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
     792              :                                    sphi_a_u1, sphi_a_u2, sphi_a_u3, &
     793              :                                    sphi_b_u1, sphi_b_u2, sphi_b_u3, &
     794              :                                    sphi_c_u1, sphi_c_u2, sphi_c_u3, &
     795              :                                    sphi_d_u1, sphi_d_u2, sphi_d_u3, &
     796              :                                    zeta(1:npgfa(iset), iset), zetb(1:npgfb(jset), jset), &
     797              :                                    zetc(1:npgfc(kset), kset), zetd(1:npgfd(lset), lset), &
     798              :                                    primitive_integrals, &
     799              :                                    mp2_potential_parameter, &
     800              :                                    actual_x_data%neighbor_cells, screen_coeffs_set(jset, iset, jkind, ikind)%x, &
     801              :                                    screen_coeffs_set(lset, kset, lkind, kkind)%x, eps_schwarz, &
     802              :                                    max_contraction_val, cartesian_estimate, cell, neris_tmp, &
     803              :                                    log10_pmax, log10_eps_schwarz, &
     804              :                                    tmp_R_1, tmp_R_2, tmp_screen_pgf1, tmp_screen_pgf2, &
     805              :                                    pgf_list_ij, pgf_list_kl, pgf_product_list, &
     806              :                                    nsgfl_a(:, iset), nsgfl_b(:, jset), &
     807              :                                    nsgfl_c(:, kset), nsgfl_d(:, lset), &
     808              :                                    sphi_a_ext_set, &
     809              :                                    sphi_b_ext_set, &
     810              :                                    sphi_c_ext_set, &
     811              :                                    sphi_d_ext_set, &
     812              :                                    ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp, &
     813      1011769 :                                    nimages, do_periodic, p_work)
     814              : 
     815      1011769 :                      nints = nsgfa(iset)*nsgfb(jset)*nsgfc(kset)*nsgfd(lset)
     816      1011769 :                      neris_total = neris_total + nints
     817      1011769 :                      nprim_ints = nprim_ints + neris_tmp
     818      1011769 :                      IF (cartesian_estimate == 0.0_dp) cartesian_estimate = TINY(cartesian_estimate)
     819      1011769 :                      estimate_to_store_int = EXPONENT(cartesian_estimate)
     820      1011769 :                      estimate_to_store_int = MAX(estimate_to_store_int, -15_int_8)
     821      1011769 :                      cartesian_estimate = SET_EXPONENT(1.0_dp, estimate_to_store_int + 1)
     822              : 
     823      1011769 :                      IF (cartesian_estimate < eps_schwarz) CYCLE
     824              : 
     825              :                      primitive_counter = 0
     826      3501876 :                      DO llB = 1, nsgfd(lset)
     827     10171505 :                         DO kkB = 1, nsgfc(kset)
     828     28159746 :                            DO jjB = 1, nsgfb(jset)
     829     80452823 :                               DO iiB = 1, nsgfa(iset)
     830     54784876 :                                  primitive_counter = primitive_counter + 1
     831     73783194 :                                  MNRS(llB, kkB, jjB, iiB) = primitive_integrals(primitive_counter)
     832              :                               END DO
     833              :                            END DO
     834              :                         END DO
     835              :                      END DO
     836              : 
     837              :                      CALL transform_occupied_orbitals_first(dimen, iatom, jatom, katom, latom, &
     838              :                                                             iset, jset, kset, lset, &
     839              :                                                             nsgfa(iset), nsgfb(jset), nsgfc(kset), nsgfd(lset), &
     840              :                                                             i_batch_start, Ni_occupied, &
     841      1010077 :                                                             MNRS, C_T, mp2_biel, BI1)
     842              :                   ELSE
     843      1011769 :                      task_counter_RS(global_counter, 4) = task_counter_RS(global_counter, 4) + 1
     844              : 
     845      1011769 :                      cost_tmp = 0.0_dp
     846              :                      cost_tmp = cost_model(nsgfd(lset), nsgfc(kset), nsgfb(jset), nsgfa(iset), &
     847              :                                            npgfd(lset), npgfc(kset), npgfb(jset), npgfa(iset), &
     848              :                                            max_val2/log10_eps_schwarz, &
     849      1011769 :                                            p1_energy, p2_energy, p3_energy)
     850      1011769 :                      cost_RS(global_counter) = cost_RS(global_counter) + cost_tmp
     851              :                   END IF
     852              : 
     853              :                END DO ! i_set_list_kl
     854              :             END DO ! i_list_kl
     855              : 
     856         9392 :             IF (case_index == 2) THEN
     857         4653 :                my_num_call_sec_transf = my_num_call_sec_transf + 1
     858         4653 :                IF (.NOT. alpha_beta_case) THEN
     859         3993 :                   IF (.NOT. mp2_env%direct_canonical%big_send) THEN
     860              :                      CALL transform_occupied_orbitals_second(dimen, iatom, jatom, iset, jset, &
     861              :                                                              nsgfa(iset), nsgfb(jset), Ni_occupied, Nj_occupied, j_batch_start, &
     862              :                                                              BI1, C_T, mp2_biel, para_env, elements_ij_proc, &
     863            0 :                                                              multiple, BIb)
     864              :                   ELSE
     865              :                      CALL transform_occupied_orbitals_second_big( &
     866              :                         dimen, iatom, jatom, iset, jset, &
     867              :                         nsgfa(iset), nsgfb(jset), Ni_occupied, Nj_occupied, j_batch_start, &
     868         3993 :                         ij_elem_max, BI1, C_T, mp2_biel, para_env, elements_ij_proc, BIb)
     869              :                   END IF
     870              :                ELSE
     871          660 :                   IF (.NOT. mp2_env%direct_canonical%big_send) THEN
     872              :                      CALL transform_occupied_orbitals_second(dimen, iatom, jatom, iset, jset, &
     873              :                                                              nsgfa(iset), nsgfb(jset), Ni_occupied, Nj_occupied, j_batch_start, &
     874              :                                                              BI1, C_beta_T, mp2_biel, para_env, elements_ij_proc, &
     875            0 :                                                              multiple, BIb)
     876              :                   ELSE
     877              :                      CALL transform_occupied_orbitals_second_big( &
     878              :                         dimen, iatom, jatom, iset, jset, &
     879              :                         nsgfa(iset), nsgfb(jset), Ni_occupied, Nj_occupied, j_batch_start, &
     880          660 :                         ij_elem_max, BI1, C_beta_T, mp2_biel, para_env, elements_ij_proc, BIb)
     881              :                   END IF
     882              :                END IF
     883              :             END IF
     884              : 
     885              :          END DO !i_list_ij
     886              : 
     887          129 :          IF (case_index == 1) THEN
     888           43 :             CALL para_env%sum(task_counter_RS)
     889           43 :             CALL para_env%sum(cost_RS)
     890           86 :             ALLOCATE (task_counter_RS_temp(total_num_RS_task, 4))
     891              : 
     892           86 :             ALLOCATE (cost_RS_temp(total_num_RS_task))
     893              : 
     894           43 :             step_size = 1
     895          129 :             ALLOCATE (same_size_kl_elements_counter((nsgf_max**2 + 1)/step_size + 1))
     896              : 
     897         1372 :             same_size_kl_elements_counter = 0
     898              : 
     899           43 :             same_size_kl_index = 0
     900           43 :             global_counter = 0
     901           43 :             DO iiB = nsgf_max**2 + 1, 0, -step_size
     902       149802 :                DO jjB = 1, total_num_RS_task
     903       149802 :                   IF (task_counter_RS(jjB, 3) > iiB - step_size .AND. task_counter_RS(jjB, 3) <= iiB) THEN
     904         4707 :                      global_counter = global_counter + 1
     905        23535 :                      task_counter_RS_temp(global_counter, 1:4) = task_counter_RS(jjB, 1:4)
     906         4707 :                      cost_RS_temp(global_counter) = cost_RS(jjB)
     907              :                   END IF
     908              :                END DO
     909         1329 :                same_size_kl_index = same_size_kl_index + 1
     910         1329 :                same_size_kl_elements_counter(same_size_kl_index) = global_counter
     911              :             END DO
     912              : 
     913           43 :             DEALLOCATE (task_counter_RS)
     914           43 :             DEALLOCATE (cost_RS)
     915              : 
     916           43 :             i_start = 1
     917         1372 :             DO same_size_kl_index = 1, SIZE(same_size_kl_elements_counter)
     918         6036 :                DO iiB = i_start, same_size_kl_elements_counter(same_size_kl_index)
     919       100828 :                   DO jjB = iiB + 1, same_size_kl_elements_counter(same_size_kl_index)
     920              : 
     921        99499 :                      IF (cost_RS_temp(jjB) >= cost_RS_temp(iiB)) THEN
     922       213860 :                         RS_counter_temp = task_counter_RS_temp(iiB, 1:4)
     923       213860 :                         task_counter_RS_temp(iiB, 1:4) = task_counter_RS_temp(jjB, 1:4)
     924       213860 :                         task_counter_RS_temp(jjB, 1:4) = RS_counter_temp
     925              : 
     926        42772 :                         cost_tmp = cost_RS_temp(iiB)
     927        42772 :                         cost_RS_temp(iiB) = cost_RS_temp(jjB)
     928        42772 :                         cost_RS_temp(jjB) = cost_tmp
     929              :                      END IF
     930              :                   END DO
     931              :                END DO
     932         1372 :                i_start = same_size_kl_elements_counter(same_size_kl_index) + 1
     933              :             END DO
     934              : 
     935          104 :             proc_num_task = 0
     936         4750 :             DO counter_proc = 1, total_num_RS_task
     937         4707 :                proc_num = MOD(counter_proc, para_env%num_pe)
     938         4750 :                proc_num_task(proc_num) = proc_num_task(proc_num) + 1
     939              :             END DO
     940              : 
     941          104 :             max_num_call_sec_transf = MAXVAL(proc_num_task)
     942              : 
     943           43 :             DEALLOCATE (kl_list_proc)
     944          129 :             ALLOCATE (kl_list_proc(proc_num_task(para_env%mepos), 2))
     945              : 
     946         9435 :             kl_list_proc = 0
     947              : 
     948              :             elements_kl_proc = 0
     949         4750 :             DO counter_proc = 1, total_num_RS_task
     950         4707 :                proc_num = MOD(counter_proc, para_env%num_pe)
     951         4750 :                IF (proc_num == para_env%mepos) THEN
     952         4653 :                   elements_kl_proc = elements_kl_proc + 1
     953         4653 :                   kl_list_proc(elements_kl_proc, 1) = task_counter_RS_temp(counter_proc, 1)
     954         4653 :                   kl_list_proc(elements_kl_proc, 2) = task_counter_RS_temp(counter_proc, 2)
     955              :                END IF
     956              :             END DO
     957              : 
     958           43 :             DEALLOCATE (task_counter_RS_temp)
     959           43 :             DEALLOCATE (cost_RS_temp)
     960              :          END IF
     961              :       END DO ! case_index
     962              : 
     963           43 :       size_parameter_send(1) = 1
     964           43 :       size_parameter_send(2) = 1
     965           43 :       size_parameter_send(3) = 0
     966           43 :       size_parameter_send(4) = 0
     967           43 :       size_parameter_send(5) = elements_ij_proc
     968              : 
     969           43 :       IF (mp2_env%direct_canonical%big_send) THEN
     970          172 :          ALLOCATE (zero_mat_big(dimen, 2, ij_elem_max))
     971              : 
     972              :       END IF
     973              : 
     974           43 :       DO iiB = my_num_call_sec_transf + 1, max_num_call_sec_transf
     975           43 :          DO index_proc_shift = 0, para_env%num_pe - 1
     976              : 
     977            0 :             proc_send = MODULO(para_env%mepos + index_proc_shift, para_env%num_pe)
     978            0 :             proc_receive = MODULO(para_env%mepos - index_proc_shift, para_env%num_pe)
     979              : 
     980            0 :             case_send_receive = (proc_send /= para_env%mepos)
     981              : 
     982            0 :             IF (case_send_receive) THEN
     983              :                ! the processor starts to send (and receive?)
     984              : 
     985            0 :                CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
     986              : 
     987            0 :                Rsize_rec = size_parameter_rec(1)
     988            0 :                Ssize_rec = size_parameter_rec(2)
     989            0 :                R_offset_rec = size_parameter_rec(3)
     990            0 :                S_offset_rec = size_parameter_rec(4)
     991            0 :                elements_ij_proc_rec = size_parameter_rec(5)
     992            0 :                IF (.NOT. mp2_env%direct_canonical%big_send) THEN
     993            0 :                   ALLOCATE (BIb_RS_mat_rec(dimen, Rsize_rec + Ssize_rec))
     994              :                ELSE
     995            0 :                   ALLOCATE (BIb_RS_mat_rec_big(dimen, Rsize_rec + Ssize_rec, ij_elem_max))
     996              :                END IF
     997              :             ELSE
     998            0 :                elements_ij_proc_rec = elements_ij_proc
     999              :             END IF
    1000              : 
    1001            0 :             IF (.NOT. mp2_env%direct_canonical%big_send) THEN
    1002            0 :                index_ij_send = 0
    1003            0 :                index_ij_rec = 0
    1004            0 :                DO index_proc_ij = proc_send + 1, multiple, para_env%num_pe
    1005              : 
    1006            0 :                   zero_mat = 0.D+00
    1007              : 
    1008            0 :                   IF (case_send_receive) THEN
    1009              : 
    1010            0 :                      CALL para_env%sendrecv(zero_mat, proc_send, BIb_RS_mat_rec, proc_receive)
    1011              : 
    1012            0 :                      index_ij_rec = index_ij_rec + 1
    1013            0 :                      IF (index_ij_rec <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
    1014              : 
    1015              :                         BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, index_ij_rec) = &
    1016              :                            BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, index_ij_rec) + &
    1017            0 :                            BIb_RS_mat_rec(1:dimen, 1:Rsize_rec)
    1018              : 
    1019              :                         BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, index_ij_rec) = &
    1020              :                            BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, index_ij_rec) + &
    1021            0 :                            BIb_RS_mat_rec(1:dimen, Rsize_rec + 1:Rsize_rec + Ssize_rec)
    1022              : 
    1023              :                      END IF
    1024              :                   END IF
    1025              : 
    1026              :                END DO
    1027              :             ELSE
    1028            0 :                zero_mat_big = 0.D+00
    1029              : 
    1030            0 :                IF (case_send_receive) THEN
    1031              : 
    1032            0 :                   CALL para_env%sendrecv(zero_mat_big, proc_send, BIb_RS_mat_rec_big, proc_receive)
    1033              : 
    1034              :                   BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, 1:elements_ij_proc) = &
    1035              :                      BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, 1:elements_ij_proc) + &
    1036            0 :                      BIb_RS_mat_rec_big(1:dimen, 1:Rsize_rec, 1:elements_ij_proc)
    1037              : 
    1038              :                   BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, 1:elements_ij_proc) = &
    1039              :                      BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, 1:elements_ij_proc) + &
    1040            0 :                      BIb_RS_mat_rec_big(1:dimen, Rsize_rec + 1:Rsize_rec + Ssize_rec, 1:elements_ij_proc)
    1041              : 
    1042              :                END IF
    1043              :             END IF
    1044              : 
    1045            0 :             IF (case_send_receive) THEN
    1046            0 :                IF (.NOT. mp2_env%direct_canonical%big_send) THEN
    1047            0 :                   DEALLOCATE (BIb_RS_mat_rec)
    1048              :                ELSE
    1049            0 :                   DEALLOCATE (BIb_RS_mat_rec_big)
    1050              :                END IF
    1051              :             END IF
    1052              : 
    1053              :          END DO
    1054              :       END DO
    1055              : 
    1056           43 :       IF (mp2_env%direct_canonical%big_send) THEN
    1057           43 :          DEALLOCATE (zero_mat_big)
    1058              :       END IF
    1059              : 
    1060           43 :       logger => cp_get_default_logger()
    1061              : 
    1062           43 :       DEALLOCATE (primitive_integrals)
    1063              : 
    1064           43 :       IF (.NOT. alpha_beta_case) THEN
    1065              :          CALL transform_virtual_orbitals_and_accumulate(dimen, occupied, dimen - occupied, i_batch_start, &
    1066              :                                                         j_batch_start, BIb, C, Auto, elements_ij_proc, ij_list_proc, &
    1067           33 :                                                         nspins, Emp2, Emp2_Cou, Emp2_ex)
    1068              :       ELSE
    1069              :          CALL transform_virtual_orbitals_and_accumulate_ABcase( &
    1070              :             dimen, occupied, occupied_beta, dimen - occupied, dimen - occupied_beta, &
    1071              :             i_batch_start, j_batch_start, &
    1072              :             BIb, C, C_beta, Auto, Auto_beta, &
    1073           10 :             elements_ij_proc, ij_list_proc, Emp2, Emp2_Cou)
    1074           10 :          DEALLOCATE (C_beta_T)
    1075              :       END IF
    1076              : 
    1077           43 :       IF (copy_integrals) THEN
    1078           18 :          IF (.NOT. alpha_beta_case) THEN
    1079           72 :             ALLOCATE (Integ_MP2(dimen - occupied, dimen - occupied, occupied, occupied))
    1080        11976 :             Integ_MP2 = 0.0_dp
    1081           72 :             DO i = 1, elements_ij_proc
    1082           60 :                iiB = ij_list_proc(i, 1)
    1083           60 :                jjB = ij_list_proc(i, 2)
    1084         5976 :                Integ_MP2(:, :, iiB + i_batch_start, jjB + j_batch_start) = BIb(1:dimen - occupied, 1:dimen - occupied, i)
    1085              :             END DO
    1086              :          ELSE
    1087           36 :             ALLOCATE (Integ_MP2(dimen - occupied, dimen - occupied_beta, occupied, occupied_beta))
    1088         5346 :             Integ_MP2 = 0.0_dp
    1089           30 :             DO i = 1, elements_ij_proc
    1090           24 :                iiB = ij_list_proc(i, 1)
    1091           24 :                jjB = ij_list_proc(i, 2)
    1092         2670 :                Integ_MP2(:, :, iiB + i_batch_start, jjB + j_batch_start) = BIb(1:dimen - occupied, 1:dimen - occupied_beta, i)
    1093              :             END DO
    1094              :          END IF
    1095              :       END IF
    1096           43 :       DEALLOCATE (BIb)
    1097              : 
    1098           43 :       DEALLOCATE (set_list_ij, set_list_kl)
    1099              : 
    1100         1665 :       DO i = 1, max_pgf**2
    1101         1622 :          DEALLOCATE (pgf_list_ij(i)%image_list)
    1102         1665 :          DEALLOCATE (pgf_list_kl(i)%image_list)
    1103              :       END DO
    1104              : 
    1105           43 :       DEALLOCATE (pgf_list_ij)
    1106           43 :       DEALLOCATE (pgf_list_kl)
    1107           43 :       DEALLOCATE (pgf_product_list)
    1108              : 
    1109           43 :       DEALLOCATE (max_contraction, kind_of)
    1110              : 
    1111           43 :       DEALLOCATE (ee_work, ee_work2, ee_buffer1, ee_buffer2, ee_primitives_tmp)
    1112              : 
    1113           43 :       DEALLOCATE (nimages)
    1114              : 
    1115           43 :       IF (mp2_env%potential_parameter%potential_type == do_potential_TShPSC) THEN
    1116            2 :          init_TShPSC_lmax = -1
    1117            2 :          CALL free_C0()
    1118              :       END IF
    1119              : 
    1120           43 :       CALL timestop(handle)
    1121              : 
    1122          139 :    END SUBROUTINE mp2_canonical_direct_single_batch
    1123              : 
    1124              : ! **************************************************************************************************
    1125              : !> \brief ...
    1126              : !> \param dimen ...
    1127              : !> \param latom ...
    1128              : !> \param katom ...
    1129              : !> \param jatom ...
    1130              : !> \param iatom ...
    1131              : !> \param lset ...
    1132              : !> \param kset ...
    1133              : !> \param jset ...
    1134              : !> \param iset ...
    1135              : !> \param Ssize ...
    1136              : !> \param Rsize ...
    1137              : !> \param Nsize ...
    1138              : !> \param Msize ...
    1139              : !> \param i_batch_start ...
    1140              : !> \param Ni_occupied ...
    1141              : !> \param MNRS ...
    1142              : !> \param C_T ...
    1143              : !> \param mp2_biel ...
    1144              : !> \param BI1 ...
    1145              : ! **************************************************************************************************
    1146      1010077 :    SUBROUTINE transform_occupied_orbitals_first(dimen, latom, katom, jatom, iatom, &
    1147              :                                                 lset, kset, jset, iset, &
    1148              :                                                 Ssize, Rsize, Nsize, Msize, &
    1149              :                                                 i_batch_start, Ni_occupied, &
    1150      1010077 :                                                 MNRS, C_T, mp2_biel, BI1)
    1151              : 
    1152              :       INTEGER, INTENT(IN)                                :: dimen, latom, katom, jatom, iatom, lset, &
    1153              :                                                             kset, jset, iset, Ssize, Rsize, Nsize, &
    1154              :                                                             Msize, i_batch_start, Ni_occupied
    1155              :       REAL(KIND=dp), &
    1156              :          DIMENSION(Msize, Nsize, Rsize, Ssize), &
    1157              :          INTENT(IN)                                      :: MNRS
    1158              :       REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C_T
    1159              :       TYPE(mp2_biel_type), INTENT(IN)                    :: mp2_biel
    1160              :       REAL(KIND=dp), &
    1161              :          DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
    1162              :          INTENT(INOUT)                                   :: BI1
    1163              : 
    1164              :       INTEGER                                            :: i, i_global, m, M_global, M_offset, &
    1165              :                                                             M_start, n, N_global, N_offset, r, &
    1166              :                                                             R_offset, R_start, s, S_offset
    1167              :       REAL(KIND=dp)                                      :: MNRS_element
    1168              : 
    1169      1010077 :       N_offset = mp2_biel%index_table(jatom, jset) - 1
    1170      1010077 :       M_offset = mp2_biel%index_table(iatom, iset) - 1
    1171      1010077 :       S_offset = mp2_biel%index_table(latom, lset) - 1
    1172      1010077 :       R_offset = mp2_biel%index_table(katom, kset) - 1
    1173              : 
    1174      3549406 :       DO S = 1, Ssize
    1175      2539329 :          R_start = 1
    1176      2539329 :          IF (katom == latom .AND. kset == lset) R_start = S
    1177      9924964 :          DO R = R_start, Rsize
    1178              : 
    1179              :             ! fast i don't know why
    1180     25956088 :             DO N = 1, Nsize
    1181     17041201 :                N_global = N + N_offset
    1182     17041201 :                M_start = 1
    1183     17041201 :                IF (iatom == jatom .AND. iset == jset) THEN
    1184      1686183 :                   M = N
    1185      1686183 :                   M_global = M + M_offset
    1186      1686183 :                   MNRS_element = MNRS(M, N, R, S)
    1187      7428033 :                   DO i = 1, Ni_occupied
    1188      5741850 :                      i_global = i + i_batch_start
    1189      7428033 :                      BI1(N_global, i, R, S) = BI1(N_global, i, R, S) + C_T(i_global, M_global)*MNRS_element
    1190              :                   END DO
    1191      1686183 :                   M_start = N + 1
    1192              :                END IF
    1193              : 
    1194     71485095 :                DO M = M_start, Msize
    1195     48068336 :                   M_global = M + M_offset
    1196     48068336 :                   MNRS_element = MNRS(M, N, R, S)
    1197    247797157 :                   DO i = 1, Ni_occupied
    1198    182687620 :                      i_global = i + i_batch_start
    1199    182687620 :                      BI1(N_global, i, R, S) = BI1(N_global, i, R, S) + C_T(i_global, M_global)*MNRS_element
    1200    230755956 :                      BI1(M_global, i, R, S) = BI1(M_global, i, R, S) + C_T(i_global, N_global)*MNRS_element
    1201              :                   END DO
    1202              :                END DO
    1203              :             END DO
    1204              : 
    1205              :          END DO
    1206              :       END DO
    1207              : 
    1208      1010077 :    END SUBROUTINE transform_occupied_orbitals_first
    1209              : 
    1210              : ! **************************************************************************************************
    1211              : !> \brief ...
    1212              : !> \param dimen ...
    1213              : !> \param latom ...
    1214              : !> \param katom ...
    1215              : !> \param lset ...
    1216              : !> \param kset ...
    1217              : !> \param Ssize ...
    1218              : !> \param Rsize ...
    1219              : !> \param Ni_occupied ...
    1220              : !> \param Nj_occupied ...
    1221              : !> \param j_batch_start ...
    1222              : !> \param BI1 ...
    1223              : !> \param C_T ...
    1224              : !> \param mp2_biel ...
    1225              : !> \param para_env ...
    1226              : !> \param elements_ij_proc ...
    1227              : !> \param multiple ...
    1228              : !> \param BIb ...
    1229              : ! **************************************************************************************************
    1230            0 :    SUBROUTINE transform_occupied_orbitals_second(dimen, latom, katom, lset, kset, &
    1231              :                                                  Ssize, Rsize, Ni_occupied, Nj_occupied, j_batch_start, &
    1232            0 :                                                  BI1, C_T, mp2_biel, para_env, &
    1233              :                                                  elements_ij_proc, &
    1234            0 :                                                  multiple, BIb)
    1235              : 
    1236              :       INTEGER, INTENT(IN)                                :: dimen, latom, katom, lset, kset, Ssize, &
    1237              :                                                             Rsize, Ni_occupied, Nj_occupied, &
    1238              :                                                             j_batch_start
    1239              :       REAL(KIND=dp), &
    1240              :          DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
    1241              :          INTENT(IN)                                      :: BI1
    1242              :       REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C_T
    1243              :       TYPE(mp2_biel_type), INTENT(IN)                    :: mp2_biel
    1244              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1245              :       INTEGER, INTENT(IN)                                :: elements_ij_proc, multiple
    1246              :       REAL(KIND=dp), &
    1247              :          DIMENSION(dimen, dimen, elements_ij_proc), &
    1248              :          INTENT(INOUT)                                   :: BIb
    1249              : 
    1250              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'transform_occupied_orbitals_second'
    1251              : 
    1252              :       INTEGER :: elements_ij_proc_rec, handle, i, index_ij_rec, index_ij_send, index_proc_ij, &
    1253              :          index_proc_shift, j, n, proc_receive, proc_send, r, R_global, R_offset, R_offset_rec, &
    1254              :          R_start, Rsize_rec, s, S_global, S_offset, S_offset_rec, Ssize_rec
    1255              :       INTEGER, DIMENSION(5)                              :: size_parameter_rec, size_parameter_send
    1256              :       LOGICAL                                            :: case_send_receive
    1257              :       REAL(KIND=dp)                                      :: C_T_R, C_T_S
    1258            0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: BIb_RS_mat_rec
    1259            0 :       REAL(KIND=dp), DIMENSION(dimen, Rsize+Ssize)       :: BIb_RS_mat
    1260              : 
    1261            0 :       CALL timeset(routineN, handle)
    1262              : 
    1263            0 :       S_offset = mp2_biel%index_table(latom, lset) - 1
    1264            0 :       R_offset = mp2_biel%index_table(katom, kset) - 1
    1265              : 
    1266            0 :       size_parameter_send(1) = Rsize
    1267            0 :       size_parameter_send(2) = Ssize
    1268            0 :       size_parameter_send(3) = R_offset
    1269            0 :       size_parameter_send(4) = S_offset
    1270            0 :       size_parameter_send(5) = elements_ij_proc
    1271              : 
    1272            0 :       DO index_proc_shift = 0, para_env%num_pe - 1
    1273              : 
    1274            0 :          proc_send = MODULO(para_env%mepos + index_proc_shift, para_env%num_pe)
    1275            0 :          proc_receive = MODULO(para_env%mepos - index_proc_shift, para_env%num_pe)
    1276              : 
    1277            0 :          case_send_receive = (proc_send /= para_env%mepos)
    1278              : 
    1279            0 :          IF (case_send_receive) THEN
    1280              :             ! the processor starts to send (and receive?)
    1281              : 
    1282            0 :             CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
    1283              : 
    1284            0 :             Rsize_rec = size_parameter_rec(1)
    1285            0 :             Ssize_rec = size_parameter_rec(2)
    1286            0 :             R_offset_rec = size_parameter_rec(3)
    1287            0 :             S_offset_rec = size_parameter_rec(4)
    1288            0 :             elements_ij_proc_rec = size_parameter_rec(5)
    1289            0 :             ALLOCATE (BIb_RS_mat_rec(dimen, Rsize_rec + Ssize_rec))
    1290              : 
    1291              :          ELSE
    1292            0 :             elements_ij_proc_rec = elements_ij_proc
    1293              :          END IF
    1294              : 
    1295            0 :          index_ij_send = 0
    1296            0 :          index_ij_rec = 0
    1297            0 :          DO index_proc_ij = proc_send + 1, multiple, para_env%num_pe
    1298              : 
    1299            0 :             BIb_RS_mat = zero
    1300            0 :             IF (index_proc_ij <= Ni_occupied*Nj_occupied) THEN
    1301              : 
    1302            0 :                index_ij_send = index_ij_send + 1
    1303              : 
    1304            0 :                i = (index_proc_ij - 1)/Nj_occupied + 1
    1305            0 :                j = index_proc_ij - (i - 1)*Nj_occupied + j_batch_start
    1306              : 
    1307            0 :                DO S = 1, Ssize
    1308            0 :                   S_global = S + S_offset
    1309            0 :                   R_start = 1
    1310            0 :                   IF (katom == latom .AND. kset == lset) R_start = S
    1311            0 :                   DO R = R_start, Rsize
    1312            0 :                      R_global = R + R_offset
    1313              : 
    1314            0 :                      IF (R_global /= S_global) THEN
    1315            0 :                         C_T_R = C_T(j, R_global)
    1316            0 :                         C_T_S = C_T(j, S_global)
    1317            0 :                         DO N = 1, dimen
    1318            0 :                            BIb_RS_mat(N, R) = BIb_RS_mat(N, R) + C_T_S*BI1(N, i, R, S)
    1319              :                         END DO
    1320            0 :                         DO N = 1, dimen
    1321            0 :                            BIb_RS_mat(N, Rsize + S) = BIb_RS_mat(N, Rsize + S) + C_T_R*BI1(N, i, R, S)
    1322              :                         END DO
    1323              :                      ELSE
    1324            0 :                         C_T_S = C_T(j, S_global)
    1325            0 :                         DO N = 1, dimen
    1326            0 :                            BIb_RS_mat(N, R) = BIb_RS_mat(N, R) + C_T_S*BI1(N, i, R, S)
    1327              :                         END DO
    1328              :                      END IF
    1329              : 
    1330              :                   END DO
    1331              :                END DO
    1332              : 
    1333              :             END IF
    1334              : 
    1335            0 :             IF (case_send_receive) THEN
    1336              : 
    1337            0 :                CALL para_env%sendrecv(BIb_RS_mat, proc_send, BIb_RS_mat_rec, proc_receive)
    1338              : 
    1339            0 :                index_ij_rec = index_ij_rec + 1
    1340            0 :                IF (index_ij_rec <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
    1341              : 
    1342              :                   BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, index_ij_rec) = &
    1343              :                      BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, index_ij_rec) + &
    1344            0 :                      BIb_RS_mat_rec(1:dimen, 1:Rsize_rec)
    1345              : 
    1346              :                   BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, index_ij_rec) = &
    1347              :                      BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, index_ij_rec) + &
    1348            0 :                      BIb_RS_mat_rec(1:dimen, Rsize_rec + 1:Rsize_rec + Ssize_rec)
    1349              : 
    1350              :                END IF
    1351              :             ELSE
    1352              :                ! the processor is the sender and receiver itself
    1353            0 :                IF (index_ij_send <= elements_ij_proc .AND. elements_ij_proc > 0) THEN
    1354              : 
    1355              :                   BIb(1:dimen, R_offset + 1:R_offset + Rsize, index_ij_send) = &
    1356            0 :                      BIb(1:dimen, R_offset + 1:R_offset + Rsize, index_ij_send) + BIb_RS_mat(1:dimen, 1:Rsize)
    1357              : 
    1358              :                   BIb(1:dimen, S_offset + 1:S_offset + Ssize, index_ij_send) = &
    1359            0 :                      BIb(1:dimen, S_offset + 1:S_offset + Ssize, index_ij_send) + BIb_RS_mat(1:dimen, Rsize + 1:Rsize + Ssize)
    1360              : 
    1361              :                END IF
    1362              :             END IF
    1363              : 
    1364              :          END DO ! loop over the ij of the processor
    1365              : 
    1366            0 :          IF (case_send_receive) THEN
    1367            0 :             DEALLOCATE (BIb_RS_mat_rec)
    1368              :          END IF
    1369              : 
    1370              :       END DO ! loop over the processor starting from itself
    1371              : 
    1372            0 :       CALL timestop(handle)
    1373              : 
    1374            0 :    END SUBROUTINE transform_occupied_orbitals_second
    1375              : 
    1376              : ! **************************************************************************************************
    1377              : !> \brief ...
    1378              : !> \param dimen ...
    1379              : !> \param latom ...
    1380              : !> \param katom ...
    1381              : !> \param lset ...
    1382              : !> \param kset ...
    1383              : !> \param Ssize ...
    1384              : !> \param Rsize ...
    1385              : !> \param Ni_occupied ...
    1386              : !> \param Nj_occupied ...
    1387              : !> \param j_batch_start ...
    1388              : !> \param ij_elem_max ...
    1389              : !> \param BI1 ...
    1390              : !> \param C_T ...
    1391              : !> \param mp2_biel ...
    1392              : !> \param para_env ...
    1393              : !> \param elements_ij_proc ...
    1394              : !> \param BIb ...
    1395              : ! **************************************************************************************************
    1396         4653 :    SUBROUTINE transform_occupied_orbitals_second_big(dimen, latom, katom, lset, kset, &
    1397              :                                                      Ssize, Rsize, Ni_occupied, Nj_occupied, j_batch_start, &
    1398         4653 :                                                      ij_elem_max, BI1, C_T, mp2_biel, para_env, &
    1399         4653 :                                                      elements_ij_proc, BIb)
    1400              : 
    1401              :       INTEGER, INTENT(IN)                                :: dimen, latom, katom, lset, kset, Ssize, &
    1402              :                                                             Rsize, Ni_occupied, Nj_occupied, &
    1403              :                                                             j_batch_start, ij_elem_max
    1404              :       REAL(KIND=dp), &
    1405              :          DIMENSION(dimen, Ni_occupied, Rsize, Ssize), &
    1406              :          INTENT(IN)                                      :: BI1
    1407              :       REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C_T
    1408              :       TYPE(mp2_biel_type), INTENT(IN)                    :: mp2_biel
    1409              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
    1410              :       INTEGER, INTENT(IN)                                :: elements_ij_proc
    1411              :       REAL(KIND=dp), &
    1412              :          DIMENSION(dimen, dimen, elements_ij_proc), &
    1413              :          INTENT(INOUT)                                   :: BIb
    1414              : 
    1415              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'transform_occupied_orbitals_second_big'
    1416              : 
    1417              :       INTEGER :: elements_ij_proc_rec, handle, i, index_ij_rec, index_ij_send, index_proc_ij, &
    1418              :          index_proc_shift, j, n, proc_receive, proc_send, r, R_global, R_offset, R_offset_rec, &
    1419              :          R_start, Rsize_rec, s, S_global, S_offset, S_offset_rec, Ssize_rec
    1420              :       INTEGER, DIMENSION(5)                              :: size_parameter_rec, size_parameter_send
    1421              :       LOGICAL                                            :: case_send_receive
    1422              :       REAL(KIND=dp)                                      :: C_T_R, C_T_S
    1423         4653 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: BIb_RS_mat_rec
    1424              :       REAL(KIND=dp), &
    1425         9306 :          DIMENSION(dimen, Rsize+Ssize, ij_elem_max)      :: BIb_RS_mat
    1426              : 
    1427         4653 :       CALL timeset(routineN, handle)
    1428              : 
    1429         4653 :       S_offset = mp2_biel%index_table(latom, lset) - 1
    1430         4653 :       R_offset = mp2_biel%index_table(katom, kset) - 1
    1431              : 
    1432         4653 :       size_parameter_send(1) = Rsize
    1433         4653 :       size_parameter_send(2) = Ssize
    1434         4653 :       size_parameter_send(3) = R_offset
    1435         4653 :       size_parameter_send(4) = S_offset
    1436         4653 :       size_parameter_send(5) = elements_ij_proc
    1437              : 
    1438         9360 :       DO index_proc_shift = 0, para_env%num_pe - 1
    1439              : 
    1440         4707 :          proc_send = MODULO(para_env%mepos + index_proc_shift, para_env%num_pe)
    1441         4707 :          proc_receive = MODULO(para_env%mepos - index_proc_shift, para_env%num_pe)
    1442              : 
    1443         4707 :          case_send_receive = (proc_send /= para_env%mepos)
    1444              : 
    1445         4707 :          IF (case_send_receive) THEN
    1446              :             ! the processor starts to send (and receive?)
    1447              : 
    1448           54 :             CALL para_env%sendrecv(size_parameter_send, proc_send, size_parameter_rec, proc_receive)
    1449              : 
    1450           54 :             Rsize_rec = size_parameter_rec(1)
    1451           54 :             Ssize_rec = size_parameter_rec(2)
    1452           54 :             R_offset_rec = size_parameter_rec(3)
    1453           54 :             S_offset_rec = size_parameter_rec(4)
    1454           54 :             elements_ij_proc_rec = size_parameter_rec(5)
    1455          270 :             ALLOCATE (BIb_RS_mat_rec(dimen, Rsize_rec + Ssize_rec, ij_elem_max))
    1456              :          ELSE
    1457         4707 :             elements_ij_proc_rec = elements_ij_proc
    1458              :          END IF
    1459              : 
    1460         4707 :          index_ij_send = 0
    1461         4707 :          index_ij_rec = 0
    1462     30728964 :          BIb_RS_mat = zero
    1463              : 
    1464        81312 :          DO index_proc_ij = proc_send + 1, Ni_occupied*Nj_occupied, para_env%num_pe
    1465              : 
    1466        76605 :             index_ij_send = index_ij_send + 1
    1467              : 
    1468        76605 :             i = (index_proc_ij - 1)/Nj_occupied + 1
    1469        76605 :             j = index_proc_ij - (i - 1)*Nj_occupied + j_batch_start
    1470              : 
    1471       290238 :             DO S = 1, Ssize
    1472       208926 :                S_global = S + S_offset
    1473       208926 :                R_start = 1
    1474       208926 :                IF (katom == latom .AND. kset == lset) R_start = S
    1475       907160 :                DO R = R_start, Rsize
    1476       621629 :                   R_global = R + R_offset
    1477              : 
    1478       830555 :                   IF (R_global /= S_global) THEN
    1479       601864 :                      C_T_R = C_T(j, R_global)
    1480       601864 :                      C_T_S = C_T(j, S_global)
    1481     45769383 :                      DO N = 1, dimen
    1482     45769383 :                         BIb_RS_mat(N, R, index_ij_send) = BIb_RS_mat(N, R, index_ij_send) + C_T_S*BI1(N, i, R, S)
    1483              :                      END DO
    1484     45769383 :                      DO N = 1, dimen
    1485     45769383 :                         BIb_RS_mat(N, Rsize + S, index_ij_send) = BIb_RS_mat(N, Rsize + S, index_ij_send) + C_T_R*BI1(N, i, R, S)
    1486              :                      END DO
    1487              :                   ELSE
    1488        19765 :                      C_T_S = C_T(j, S_global)
    1489      1295658 :                      DO N = 1, dimen
    1490      1295658 :                         BIb_RS_mat(N, R, index_ij_send) = BIb_RS_mat(N, R, index_ij_send) + C_T_S*BI1(N, i, R, S)
    1491              :                      END DO
    1492              :                   END IF
    1493              : 
    1494              :                END DO
    1495              :             END DO
    1496              : 
    1497              :          END DO
    1498              : 
    1499         9360 :          IF (case_send_receive) THEN
    1500              : 
    1501           54 :             CALL para_env%sendrecv(BIb_RS_mat, proc_send, BIb_RS_mat_rec, proc_receive)
    1502              : 
    1503              :             BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, 1:elements_ij_proc) = &
    1504              :                BIb(1:dimen, R_offset_rec + 1:R_offset_rec + Rsize_rec, 1:elements_ij_proc) + &
    1505        16182 :                BIb_RS_mat_rec(1:dimen, 1:Rsize_rec, 1:elements_ij_proc)
    1506              : 
    1507              :             BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, 1:elements_ij_proc) = &
    1508              :                BIb(1:dimen, S_offset_rec + 1:S_offset_rec + Ssize_rec, 1:elements_ij_proc) + &
    1509        15006 :                BIb_RS_mat_rec(1:dimen, Rsize_rec + 1:Rsize_rec + Ssize_rec, 1:elements_ij_proc)
    1510              : 
    1511           54 :             DEALLOCATE (BIb_RS_mat_rec)
    1512              :          ELSE
    1513              :             ! the processor is the sender and receiver itself
    1514              :             BIb(1:dimen, R_offset + 1:R_offset + Rsize, 1:elements_ij_proc) = &
    1515              :                BIb(1:dimen, R_offset + 1:R_offset + Rsize, 1:elements_ij_proc) + &
    1516     16293273 :                BIb_RS_mat(1:dimen, 1:Rsize, 1:elements_ij_proc)
    1517              : 
    1518              :             BIb(1:dimen, S_offset + 1:S_offset + Ssize, 1:elements_ij_proc) = &
    1519              :                BIb(1:dimen, S_offset + 1:S_offset + Ssize, 1:elements_ij_proc) + &
    1520     14485815 :                BIb_RS_mat(1:dimen, Rsize + 1:Rsize + Ssize, 1:elements_ij_proc)
    1521              : 
    1522              :          END IF
    1523              : 
    1524              :       END DO ! loop over the processor starting from itself
    1525              : 
    1526         4653 :       CALL timestop(handle)
    1527              : 
    1528         4653 :    END SUBROUTINE transform_occupied_orbitals_second_big
    1529              : 
    1530              : ! **************************************************************************************************
    1531              : !> \brief ...
    1532              : !> \param dimen ...
    1533              : !> \param occupied ...
    1534              : !> \param virtual ...
    1535              : !> \param i_batch_start ...
    1536              : !> \param j_batch_start ...
    1537              : !> \param BIb ...
    1538              : !> \param C ...
    1539              : !> \param Auto ...
    1540              : !> \param elements_ij_proc ...
    1541              : !> \param ij_list_proc ...
    1542              : !> \param nspins ...
    1543              : !> \param Emp2 ...
    1544              : !> \param Emp2_Cou ...
    1545              : !> \param Emp2_ex ...
    1546              : ! **************************************************************************************************
    1547           33 :    SUBROUTINE transform_virtual_orbitals_and_accumulate(dimen, occupied, virtual, i_batch_start, &
    1548           33 :                                                         j_batch_start, BIb, C, Auto, elements_ij_proc, &
    1549           33 :                                                         ij_list_proc, nspins, Emp2, Emp2_Cou, Emp2_ex)
    1550              : 
    1551              :       INTEGER, INTENT(IN)                                :: dimen, occupied, virtual, i_batch_start, &
    1552              :                                                             j_batch_start
    1553              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
    1554              :          INTENT(INOUT)                                   :: BIb
    1555              :       REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C
    1556              :       REAL(KIND=dp), DIMENSION(dimen), INTENT(IN)        :: Auto
    1557              :       INTEGER, INTENT(IN)                                :: elements_ij_proc
    1558              :       INTEGER, DIMENSION(elements_ij_proc, 2), &
    1559              :          INTENT(IN)                                      :: ij_list_proc
    1560              :       INTEGER, INTENT(IN)                                :: nspins
    1561              :       REAL(KIND=dp), INTENT(INOUT)                       :: Emp2, Emp2_Cou, Emp2_ex
    1562              : 
    1563              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'transform_virtual_orbitals_and_accumulate'
    1564              :       REAL(KIND=dp), PARAMETER                           :: zero = 0.0_dp
    1565              : 
    1566              :       INTEGER                                            :: a, a_global, b, b_global, handle, i, &
    1567              :                                                             i_global, index_ij, j, j_global
    1568              :       REAL(KIND=dp)                                      :: iajb, ibja, parz, two
    1569           33 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: BIa
    1570              : 
    1571           33 :       CALL timeset(routineN, handle)
    1572              : 
    1573          132 :       ALLOCATE (BIa(dimen, virtual))
    1574              : 
    1575        48871 :       BIa = zero
    1576          389 :       DO index_ij = 1, elements_ij_proc
    1577              : 
    1578              :          CALL DGEMM('T', 'N', dimen, virtual, dimen, 1.0_dp, Bib(1, 1, index_ij), &
    1579          356 :                     dimen, C(1, occupied + 1), dimen, 0.0_dp, Bia(1, 1), dimen)
    1580      1100036 :          Bib(1:dimen, 1:virtual, index_ij) = Bia(1:dimen, 1:virtual)
    1581              : 
    1582              :       END DO
    1583              : 
    1584           33 :       DEALLOCATE (BIa)
    1585          132 :       ALLOCATE (BIa(virtual, virtual))
    1586              : 
    1587        43719 :       BIa = zero
    1588          389 :       DO index_ij = 1, elements_ij_proc
    1589              : 
    1590              :          CALL DGEMM('T', 'N', virtual, virtual, dimen, 1.0_dp, Bib(1, 1, index_ij), dimen, C(1, occupied + 1), dimen, 0.0_dp, &
    1591          356 :                     BIa(1, 1), virtual)
    1592       980141 :          BIb(1:virtual, 1:virtual, index_ij) = BIa(1:virtual, 1:virtual)
    1593              : 
    1594              :       END DO
    1595              : 
    1596           33 :       two = 2.0_dp/nspins
    1597          389 :       DO index_ij = 1, elements_ij_proc
    1598          356 :          i = ij_list_proc(index_ij, 1)
    1599          356 :          j = ij_list_proc(index_ij, 2)
    1600          356 :          i_global = i + i_batch_start
    1601          356 :          j_global = j + j_batch_start
    1602        16600 :          DO a = 1, virtual
    1603        16211 :             a_global = a + occupied
    1604       980108 :             DO b = 1, virtual
    1605       963541 :                b_global = b + occupied
    1606       963541 :                iajb = BIb(a, b, index_ij)
    1607       963541 :                ibja = BIb(b, a, index_ij)
    1608       963541 :                parz = iajb/(Auto(i_global) + Auto(j_global) - Auto(a_global) - Auto(b_global))
    1609              :                ! parz=parz*(two*iajb-ibja)   !Full
    1610              :                ! parz=parz*(iajb)            !Coulomb
    1611              :                ! parz=parz*(ibja)            !Coulomb
    1612              :                ! Emp2=Emp2+parz/nspins
    1613       963541 :                Emp2_Cou = Emp2_Cou + parz*two*(iajb)/nspins
    1614       963541 :                Emp2_ex = Emp2_ex - parz*(ibja)/nspins
    1615       979752 :                Emp2 = Emp2 + parz*(two*iajb - ibja)/nspins
    1616              :             END DO
    1617              :          END DO
    1618              :       END DO
    1619              : 
    1620           33 :       DEALLOCATE (BIa)
    1621              : 
    1622           33 :       CALL timestop(handle)
    1623              : 
    1624           33 :    END SUBROUTINE transform_virtual_orbitals_and_accumulate
    1625              : 
    1626              : ! **************************************************************************************************
    1627              : !> \brief ...
    1628              : !> \param dimen ...
    1629              : !> \param occ_i ...
    1630              : !> \param occ_j ...
    1631              : !> \param virt_i ...
    1632              : !> \param virt_j ...
    1633              : !> \param i_batch_start ...
    1634              : !> \param j_batch_start ...
    1635              : !> \param BIb ...
    1636              : !> \param C_i ...
    1637              : !> \param C_j ...
    1638              : !> \param Auto_i ...
    1639              : !> \param Auto_j ...
    1640              : !> \param elements_ij_proc ...
    1641              : !> \param ij_list_proc ...
    1642              : !> \param Emp2 ...
    1643              : !> \param Emp2_Cou ...
    1644              : ! **************************************************************************************************
    1645           10 :    SUBROUTINE transform_virtual_orbitals_and_accumulate_ABcase(dimen, occ_i, occ_j, virt_i, virt_j, i_batch_start, &
    1646           10 :                                                                j_batch_start, BIb, C_i, C_j, Auto_i, Auto_j, elements_ij_proc, &
    1647           10 :                                                                ij_list_proc, Emp2, Emp2_Cou)
    1648              : 
    1649              :       INTEGER, INTENT(IN)                                :: dimen, occ_i, occ_j, virt_i, virt_j, &
    1650              :                                                             i_batch_start, j_batch_start
    1651              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :), &
    1652              :          INTENT(INOUT)                                   :: BIb
    1653              :       REAL(KIND=dp), DIMENSION(dimen, dimen), INTENT(IN) :: C_i, C_j
    1654              :       REAL(KIND=dp), DIMENSION(dimen), INTENT(IN)        :: Auto_i, Auto_j
    1655              :       INTEGER, INTENT(IN)                                :: elements_ij_proc
    1656              :       INTEGER, DIMENSION(elements_ij_proc, 2), &
    1657              :          INTENT(IN)                                      :: ij_list_proc
    1658              :       REAL(KIND=dp), INTENT(INOUT)                       :: Emp2, Emp2_Cou
    1659              : 
    1660              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'transform_virtual_orbitals_and_accumulate_ABcase'
    1661              :       REAL(KIND=dp), PARAMETER                           :: two = 2.D+00, zero = 0.D+00
    1662              : 
    1663              :       INTEGER                                            :: a, a_global, b, b_global, handle, i, &
    1664              :                                                             i_global, index_ij, j, j_global, n, s
    1665              :       REAL(KIND=dp)                                      :: iajb, parz
    1666           10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: BIa
    1667              : 
    1668           10 :       CALL timeset(routineN, handle)
    1669              : 
    1670           40 :       ALLOCATE (BIa(dimen, virt_i))
    1671              : 
    1672           56 :       DO index_ij = 1, elements_ij_proc
    1673              : 
    1674         1236 :          DO a = 1, virt_i
    1675         1190 :             a_global = a + occ_i
    1676        51930 :             DO S = 1, dimen
    1677              :                parz = zero
    1678      2449752 :                DO N = 1, dimen
    1679      2449752 :                   parz = parz + C_i(N, a_global)*BIb(N, S, index_ij)
    1680              :                END DO
    1681        51884 :                BIa(S, a) = parz
    1682              :             END DO
    1683              :          END DO
    1684              : 
    1685        51940 :          BIb(1:dimen, 1:virt_i, index_ij) = BIa
    1686              : 
    1687              :       END DO
    1688              : 
    1689           10 :       DEALLOCATE (BIa)
    1690           40 :       ALLOCATE (BIa(virt_i, virt_j))
    1691              : 
    1692           56 :       DO index_ij = 1, elements_ij_proc
    1693              : 
    1694         1236 :          DO a = 1, virt_i
    1695        47824 :             DO b = 1, virt_j
    1696        46588 :                b_global = b + occ_j
    1697        46588 :                parz = zero
    1698      2257144 :                DO S = 1, dimen
    1699      2257144 :                   parz = parz + C_j(S, b_global)*BIb(S, a, index_ij)
    1700              :                END DO
    1701        47778 :                BIa(a, b) = parz
    1702              :             END DO
    1703              :          END DO
    1704              : 
    1705        47904 :          BIb(1:virt_i, 1:virt_j, index_ij) = BIa
    1706              : 
    1707              :       END DO
    1708              : 
    1709           56 :       DO index_ij = 1, elements_ij_proc
    1710           46 :          i = ij_list_proc(index_ij, 1)
    1711           46 :          j = ij_list_proc(index_ij, 2)
    1712           46 :          i_global = i + i_batch_start
    1713           46 :          j_global = j + j_batch_start
    1714         1246 :          DO a = 1, virt_i
    1715         1190 :             a_global = a + occ_i
    1716        47824 :             DO b = 1, virt_j
    1717        46588 :                b_global = b + occ_j
    1718        46588 :                iajb = BIb(a, b, index_ij)
    1719        46588 :                parz = iajb*iajb/(Auto_i(i_global) + Auto_j(j_global) - Auto_i(a_global) - Auto_j(b_global))
    1720        46588 :                Emp2_Cou = Emp2_Cou + parz/two
    1721        47778 :                Emp2 = Emp2 + parz/two
    1722              :             END DO
    1723              :          END DO
    1724              :       END DO
    1725              : 
    1726           10 :       DEALLOCATE (BIa)
    1727              : 
    1728           10 :       CALL timestop(handle)
    1729              : 
    1730           10 :    END SUBROUTINE transform_virtual_orbitals_and_accumulate_ABcase
    1731              : 
    1732              : END MODULE mp2_direct_method
        

Generated by: LCOV version 2.0-1