LCOV - code coverage report
Current view: top level - src - qs_scf_lanczos.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 57.9 % 437 253
Test Date: 2025-07-25 12:55:17 Functions: 66.7 % 3 2

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief module that contains the algorithms to perform an itrative
      10              : !>         diagonalization by the block-Lanczos approach
      11              : !> \par History
      12              : !>      05.2009 created [MI]
      13              : !> \author fawzi
      14              : ! **************************************************************************************************
      15              : MODULE qs_scf_lanczos
      16              : 
      17              :    USE cp_fm_basic_linalg,              ONLY: cp_fm_column_scale,&
      18              :                                               cp_fm_qr_factorization,&
      19              :                                               cp_fm_scale_and_add,&
      20              :                                               cp_fm_transpose,&
      21              :                                               cp_fm_triangular_multiply
      22              :    USE cp_fm_cholesky,                  ONLY: cp_fm_cholesky_decompose
      23              :    USE cp_fm_diag,                      ONLY: choose_eigv_solver
      24              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      25              :                                               cp_fm_struct_release,&
      26              :                                               cp_fm_struct_type
      27              :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      28              :                                               cp_fm_get_submatrix,&
      29              :                                               cp_fm_release,&
      30              :                                               cp_fm_set_all,&
      31              :                                               cp_fm_set_submatrix,&
      32              :                                               cp_fm_to_fm,&
      33              :                                               cp_fm_type,&
      34              :                                               cp_fm_vectorsnorm
      35              :    USE cp_log_handling,                 ONLY: cp_to_string
      36              :    USE kinds,                           ONLY: dp
      37              :    USE parallel_gemm_api,               ONLY: parallel_gemm
      38              :    USE qs_mo_types,                     ONLY: get_mo_set,&
      39              :                                               mo_set_type
      40              :    USE qs_scf_types,                    ONLY: krylov_space_type
      41              :    USE scf_control_types,               ONLY: scf_control_type
      42              : #include "./base/base_uses.f90"
      43              : 
      44              :    IMPLICIT NONE
      45              :    PRIVATE
      46              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_scf_lanczos'
      47              : 
      48              :    PUBLIC :: krylov_space_allocate, lanczos_refinement, lanczos_refinement_2v
      49              : 
      50              : CONTAINS
      51              : 
      52              : ! **************************************************************************************************
      53              : 
      54              : ! **************************************************************************************************
      55              : !> \brief  allocates matrices and vectros used in the construction of
      56              : !>        the krylov space and for the lanczos refinement
      57              : !> \param krylov_space ...
      58              : !> \param scf_control ...
      59              : !> \param mos ...
      60              : !> \param
      61              : !> \par History
      62              : !>      05.2009 created [MI]
      63              : ! **************************************************************************************************
      64              : 
      65            8 :    SUBROUTINE krylov_space_allocate(krylov_space, scf_control, mos)
      66              : 
      67              :       TYPE(krylov_space_type), INTENT(INOUT)             :: krylov_space
      68              :       TYPE(scf_control_type), POINTER                    :: scf_control
      69              :       TYPE(mo_set_type), DIMENSION(:), INTENT(IN)        :: mos
      70              : 
      71              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'krylov_space_allocate'
      72              : 
      73              :       INTEGER                                            :: handle, ik, ispin, max_nmo, nao, nblock, &
      74              :                                                             ndim, nk, nmo, nspin
      75              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
      76              :       TYPE(cp_fm_type), POINTER                          :: mo_coeff
      77              : 
      78            8 :       CALL timeset(routineN, handle)
      79              : 
      80            8 :       IF (.NOT. ASSOCIATED(krylov_space%mo_conv)) THEN
      81            4 :          NULLIFY (fm_struct_tmp, mo_coeff)
      82              : 
      83            4 :          krylov_space%nkrylov = scf_control%diagonalization%nkrylov
      84            4 :          krylov_space%nblock = scf_control%diagonalization%nblock_krylov
      85              : 
      86            4 :          nk = krylov_space%nkrylov
      87            4 :          nblock = krylov_space%nblock
      88            4 :          nspin = SIZE(mos, 1)
      89              : 
      90           16 :          ALLOCATE (krylov_space%mo_conv(nspin))
      91           16 :          ALLOCATE (krylov_space%mo_refine(nspin))
      92           16 :          ALLOCATE (krylov_space%chc_mat(nspin))
      93           16 :          ALLOCATE (krylov_space%c_vec(nspin))
      94            4 :          max_nmo = 0
      95            8 :          DO ispin = 1, nspin
      96            4 :             CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff, nao=nao, nmo=nmo)
      97            4 :             CALL cp_fm_create(krylov_space%mo_conv(ispin), mo_coeff%matrix_struct)
      98            4 :             CALL cp_fm_create(krylov_space%mo_refine(ispin), mo_coeff%matrix_struct)
      99            4 :             NULLIFY (fm_struct_tmp)
     100              :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmo, ncol_global=nmo, &
     101              :                                      para_env=mo_coeff%matrix_struct%para_env, &
     102            4 :                                      context=mo_coeff%matrix_struct%context)
     103            4 :             CALL cp_fm_create(krylov_space%chc_mat(ispin), fm_struct_tmp, "chc")
     104            4 :             CALL cp_fm_create(krylov_space%c_vec(ispin), fm_struct_tmp, "vec")
     105            4 :             CALL cp_fm_struct_release(fm_struct_tmp)
     106           12 :             max_nmo = MAX(max_nmo, nmo)
     107              :          END DO
     108              : 
     109              :          !the use of max_nmo might not be ok, in this case allocate nspin matrices
     110           12 :          ALLOCATE (krylov_space%c_eval(max_nmo))
     111              : 
     112           44 :          ALLOCATE (krylov_space%v_mat(nk))
     113              : 
     114            4 :          NULLIFY (fm_struct_tmp)
     115              :          CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nblock, &
     116              :                                   para_env=mo_coeff%matrix_struct%para_env, &
     117            4 :                                   context=mo_coeff%matrix_struct%context)
     118           36 :          DO ik = 1, nk
     119              :             CALL cp_fm_create(krylov_space%v_mat(ik), matrix_struct=fm_struct_tmp, &
     120           36 :                               name="v_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
     121              :          END DO
     122            4 :          ALLOCATE (krylov_space%tmp_mat)
     123              :          CALL cp_fm_create(krylov_space%tmp_mat, matrix_struct=fm_struct_tmp, &
     124            4 :                            name="tmp_mat")
     125            4 :          CALL cp_fm_struct_release(fm_struct_tmp)
     126              : 
     127              :          ! NOTE: the following matrices are small and could be defined
     128              : !           as standard array rather than istributed fm
     129            4 :          NULLIFY (fm_struct_tmp)
     130              :          CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nblock, ncol_global=nblock, &
     131              :                                   para_env=mo_coeff%matrix_struct%para_env, &
     132            4 :                                   context=mo_coeff%matrix_struct%context)
     133            4 :          ALLOCATE (krylov_space%block1_mat)
     134              :          CALL cp_fm_create(krylov_space%block1_mat, matrix_struct=fm_struct_tmp, &
     135            4 :                            name="a_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
     136            4 :          ALLOCATE (krylov_space%block2_mat)
     137              :          CALL cp_fm_create(krylov_space%block2_mat, matrix_struct=fm_struct_tmp, &
     138            4 :                            name="b_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
     139            4 :          ALLOCATE (krylov_space%block3_mat)
     140              :          CALL cp_fm_create(krylov_space%block3_mat, matrix_struct=fm_struct_tmp, &
     141            4 :                            name="b2_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
     142            4 :          CALL cp_fm_struct_release(fm_struct_tmp)
     143              : 
     144            4 :          ndim = nblock*nk
     145            4 :          NULLIFY (fm_struct_tmp)
     146              :          CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
     147              :                                   para_env=mo_coeff%matrix_struct%para_env, &
     148            4 :                                   context=mo_coeff%matrix_struct%context)
     149            4 :          ALLOCATE (krylov_space%block4_mat)
     150              :          CALL cp_fm_create(krylov_space%block4_mat, matrix_struct=fm_struct_tmp, &
     151            4 :                            name="t_mat")
     152            4 :          ALLOCATE (krylov_space%block5_mat)
     153              :          CALL cp_fm_create(krylov_space%block5_mat, matrix_struct=fm_struct_tmp, &
     154            4 :                            name="t_vec")
     155            4 :          CALL cp_fm_struct_release(fm_struct_tmp)
     156           12 :          ALLOCATE (krylov_space%t_eval(ndim))
     157              : 
     158              :       ELSE
     159              :          !Nothing should be done
     160              :       END IF
     161              : 
     162            8 :       CALL timestop(handle)
     163              : 
     164            8 :    END SUBROUTINE krylov_space_allocate
     165              : 
     166              : ! **************************************************************************************************
     167              : !> \brief lanczos refinement by blocks of not-converged MOs
     168              : !> \param krylov_space ...
     169              : !> \param ks ...
     170              : !> \param c0 ...
     171              : !> \param c1 ...
     172              : !> \param eval ...
     173              : !> \param nao ...
     174              : !> \param eps_iter ...
     175              : !> \param ispin ...
     176              : !> \param check_moconv_only ...
     177              : !> \param
     178              : !> \par History
     179              : !>      05.2009 created [MI]
     180              : ! **************************************************************************************************
     181              : 
     182            0 :    SUBROUTINE lanczos_refinement(krylov_space, ks, c0, c1, eval, nao, &
     183              :                                  eps_iter, ispin, check_moconv_only)
     184              : 
     185              :       TYPE(krylov_space_type), POINTER                   :: krylov_space
     186              :       TYPE(cp_fm_type), INTENT(IN)                       :: ks, c0, c1
     187              :       REAL(dp), DIMENSION(:), POINTER                    :: eval
     188              :       INTEGER, INTENT(IN)                                :: nao
     189              :       REAL(dp), INTENT(IN)                               :: eps_iter
     190              :       INTEGER, INTENT(IN)                                :: ispin
     191              :       LOGICAL, INTENT(IN), OPTIONAL                      :: check_moconv_only
     192              : 
     193              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'lanczos_refinement'
     194              :       REAL(KIND=dp), PARAMETER                           :: rmone = -1.0_dp, rone = 1.0_dp, &
     195              :                                                             rzero = 0.0_dp
     196              : 
     197              :       INTEGER :: hand1, hand2, hand3, hand4, hand5, handle, ib, ik, imo, imo_low, imo_up, it, jt, &
     198              :          nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, num_blocks
     199            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: itaken
     200              :       LOGICAL                                            :: my_check_moconv_only
     201              :       REAL(dp)                                           :: max_norm, min_norm, vmax
     202            0 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: q_mat, tblock, tvblock
     203              :       REAL(dp), DIMENSION(:), POINTER                    :: c_res, t_eval
     204              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     205              :       TYPE(cp_fm_type)                                   :: c2_tmp, c3_tmp, c_tmp, hc
     206            0 :       TYPE(cp_fm_type), DIMENSION(:), POINTER            :: v_mat
     207              :       TYPE(cp_fm_type), POINTER                          :: a_mat, b2_mat, b_mat, chc, evec, t_mat, &
     208              :                                                             t_vec
     209              : 
     210            0 :       CALL timeset(routineN, handle)
     211              : 
     212            0 :       NULLIFY (fm_struct_tmp)
     213            0 :       NULLIFY (chc, evec)
     214              :       NULLIFY (c_res, t_eval)
     215            0 :       NULLIFY (t_mat, t_vec)
     216            0 :       NULLIFY (a_mat, b_mat, b2_mat, v_mat)
     217              : 
     218            0 :       nmo = SIZE(eval, 1)
     219            0 :       my_check_moconv_only = .FALSE.
     220            0 :       IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
     221              : 
     222            0 :       chc => krylov_space%chc_mat(ispin)
     223            0 :       evec => krylov_space%c_vec(ispin)
     224            0 :       c_res => krylov_space%c_eval
     225            0 :       t_eval => krylov_space%t_eval
     226              : 
     227              :       NULLIFY (fm_struct_tmp)
     228              :       CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
     229              :                                para_env=c0%matrix_struct%para_env, &
     230            0 :                                context=c0%matrix_struct%context)
     231              :       CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
     232            0 :                         name="c_tmp")
     233              :       CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
     234            0 :                         name="hc")
     235            0 :       CALL cp_fm_struct_release(fm_struct_tmp)
     236              : 
     237              :       !Compute (C^t)HC
     238            0 :       CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
     239            0 :       CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
     240              : 
     241              :       !Diagonalize  (C^t)HC
     242            0 :       CALL timeset(routineN//"diag_chc", hand1)
     243            0 :       CALL choose_eigv_solver(chc, evec, eval)
     244            0 :       CALL timestop(hand1)
     245              : 
     246              :       !Rotate the C vectors
     247            0 :       CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
     248              : 
     249              :       !Check for converged states
     250            0 :       CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
     251            0 :       CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
     252            0 :       CALL cp_fm_column_scale(c1, eval)
     253            0 :       CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
     254            0 :       CALL cp_fm_vectorsnorm(c_tmp, c_res)
     255              : 
     256            0 :       nmo_converged = 0
     257            0 :       nmo_nc = 0
     258            0 :       max_norm = 0.0_dp
     259            0 :       min_norm = 1.e10_dp
     260            0 :       CALL cp_fm_set_all(c1, rzero)
     261            0 :       DO imo = 1, nmo
     262            0 :          max_norm = MAX(max_norm, c_res(imo))
     263            0 :          min_norm = MIN(min_norm, c_res(imo))
     264              :       END DO
     265            0 :       DO imo = 1, nmo
     266            0 :          IF (c_res(imo) <= eps_iter) THEN
     267            0 :             nmo_converged = nmo_converged + 1
     268              :          ELSE
     269            0 :             nmo_nc = nmo - nmo_converged
     270            0 :             EXIT
     271              :          END IF
     272              :       END DO
     273              : 
     274            0 :       nblock = krylov_space%nblock
     275            0 :       num_blocks = nmo_nc/nblock
     276              : 
     277            0 :       krylov_space%nmo_nc = nmo_nc
     278            0 :       krylov_space%nmo_conv = nmo_converged
     279            0 :       krylov_space%max_res_norm = max_norm
     280            0 :       krylov_space%min_res_norm = min_norm
     281              : 
     282            0 :       IF (my_check_moconv_only) THEN
     283            0 :          CALL cp_fm_release(c_tmp)
     284            0 :          CALL cp_fm_release(hc)
     285            0 :          CALL timestop(handle)
     286            0 :          RETURN
     287            0 :       ELSE IF (krylov_space%nmo_nc > 0) THEN
     288              : 
     289            0 :          CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
     290              : 
     291            0 :          nblock = krylov_space%nblock
     292            0 :          IF (MODULO(nmo_nc, nblock) > 0.0_dp) THEN
     293            0 :             num_blocks = nmo_nc/nblock + 1
     294              :          ELSE
     295            0 :             num_blocks = nmo_nc/nblock
     296              :          END IF
     297              : 
     298            0 :          DO ib = 1, num_blocks
     299              : 
     300            0 :             imo_low = (ib - 1)*nblock + 1
     301            0 :             imo_up = MIN(ib*nblock, nmo_nc)
     302            0 :             nmob = imo_up - imo_low + 1
     303            0 :             ndim = krylov_space%nkrylov*nmob
     304              : 
     305            0 :             NULLIFY (fm_struct_tmp)
     306              :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
     307              :                                      para_env=c0%matrix_struct%para_env, &
     308            0 :                                      context=c0%matrix_struct%context)
     309              :             CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
     310            0 :                               name="c2_tmp")
     311            0 :             CALL cp_fm_struct_release(fm_struct_tmp)
     312            0 :             NULLIFY (fm_struct_tmp)
     313              :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=ndim, &
     314              :                                      para_env=c0%matrix_struct%para_env, &
     315            0 :                                      context=c0%matrix_struct%context)
     316              :             CALL cp_fm_create(c3_tmp, matrix_struct=fm_struct_tmp, &
     317            0 :                               name="c3_tmp")
     318            0 :             CALL cp_fm_struct_release(fm_struct_tmp)
     319              : 
     320              :             ! Create local matrix of right size
     321            0 :             IF (nmob /= nblock) THEN
     322            0 :                NULLIFY (a_mat, b_mat, b2_mat, t_mat, t_vec, v_mat)
     323            0 :                ALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
     324            0 :                NULLIFY (fm_struct_tmp)
     325              :                CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
     326              :                                         para_env=chc%matrix_struct%para_env, &
     327            0 :                                         context=chc%matrix_struct%context)
     328              :                CALL cp_fm_create(a_mat, matrix_struct=fm_struct_tmp, &
     329            0 :                                  name="a_mat")
     330              :                CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
     331            0 :                                  name="b_mat")
     332              :                CALL cp_fm_create(b2_mat, matrix_struct=fm_struct_tmp, &
     333            0 :                                  name="b2_mat")
     334            0 :                CALL cp_fm_struct_release(fm_struct_tmp)
     335            0 :                NULLIFY (fm_struct_tmp)
     336              :                CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
     337              :                                         para_env=chc%matrix_struct%para_env, &
     338            0 :                                         context=chc%matrix_struct%context)
     339              :                CALL cp_fm_create(t_mat, matrix_struct=fm_struct_tmp, &
     340            0 :                                  name="t_mat")
     341              :                CALL cp_fm_create(t_vec, matrix_struct=fm_struct_tmp, &
     342            0 :                                  name="t_vec")
     343            0 :                CALL cp_fm_struct_release(fm_struct_tmp)
     344            0 :                NULLIFY (fm_struct_tmp)
     345              :                CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
     346              :                                         para_env=c0%matrix_struct%para_env, &
     347            0 :                                         context=c0%matrix_struct%context)
     348            0 :                ALLOCATE (v_mat(krylov_space%nkrylov))
     349            0 :                DO ik = 1, krylov_space%nkrylov
     350              :                   CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
     351            0 :                                     name="v_mat")
     352              :                END DO
     353            0 :                CALL cp_fm_struct_release(fm_struct_tmp)
     354              :             ELSE
     355            0 :                a_mat => krylov_space%block1_mat
     356            0 :                b_mat => krylov_space%block2_mat
     357            0 :                b2_mat => krylov_space%block3_mat
     358            0 :                t_mat => krylov_space%block4_mat
     359            0 :                t_vec => krylov_space%block5_mat
     360            0 :                v_mat => krylov_space%v_mat
     361              :             END IF
     362              : 
     363            0 :             ALLOCATE (tblock(nmob, nmob))
     364            0 :             ALLOCATE (tvblock(nmob, ndim))
     365              : 
     366            0 :             CALL timeset(routineN//"_kry_loop", hand2)
     367            0 :             CALL cp_fm_set_all(b_mat, rzero)
     368            0 :             CALL cp_fm_set_all(t_mat, rzero)
     369            0 :             CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
     370              : 
     371              :             !Compute A =(V^t)HV
     372            0 :             CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
     373              :             CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(1), hc, &
     374            0 :                                rzero, a_mat)
     375              : 
     376              :             !Compute the residual matrix R for next
     377              :             !factorisation
     378              :             CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(1), a_mat, &
     379            0 :                                rzero, c_tmp)
     380            0 :             CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
     381              : 
     382              :             ! Build the block tridiagonal matrix
     383            0 :             CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
     384            0 :             CALL cp_fm_set_submatrix(t_mat, tblock, 1, 1, nmob, nmob)
     385              : 
     386            0 :             DO ik = 2, krylov_space%nkrylov
     387              : 
     388              :                ! Call lapack for QR factorization
     389            0 :                CALL cp_fm_set_all(b_mat, rzero)
     390            0 :                CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
     391            0 :                CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
     392              : 
     393              :                CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.TRUE., &
     394            0 :                                               n_rows=nao, n_cols=nmob)
     395              : 
     396              :                !Compute A =(V^t)HV
     397            0 :                CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
     398            0 :                CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(ik), hc, rzero, a_mat)
     399              : 
     400              :                !Compute the !residual matrix R !for next !factorisation
     401              :                CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(ik), a_mat, &
     402            0 :                                   rzero, c_tmp)
     403            0 :                CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
     404            0 :                CALL cp_fm_to_fm(v_mat(ik - 1), hc, nmob, 1, 1)
     405              :                CALL cp_fm_triangular_multiply(b_mat, hc, side='R', transpose_tr=.TRUE., &
     406            0 :                                               n_rows=nao, n_cols=nmob, alpha=rmone)
     407            0 :                CALL cp_fm_scale_and_add(rone, c_tmp, rone, hc)
     408              : 
     409              :                ! Build the block tridiagonal matrix
     410            0 :                it = (ik - 2)*nmob + 1
     411            0 :                jt = (ik - 1)*nmob + 1
     412              : 
     413            0 :                CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
     414            0 :                CALL cp_fm_set_submatrix(t_mat, tblock, jt, jt, nmob, nmob)
     415            0 :                CALL cp_fm_transpose(b_mat, a_mat)
     416            0 :                CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
     417            0 :                CALL cp_fm_set_submatrix(t_mat, tblock, it, jt, nmob, nmob)
     418              : 
     419              :             END DO ! ik
     420            0 :             CALL timestop(hand2)
     421              : 
     422            0 :             DEALLOCATE (tblock)
     423              : 
     424            0 :             CALL timeset(routineN//"_diag_tri", hand3)
     425              : 
     426            0 :             CALL choose_eigv_solver(t_mat, t_vec, t_eval)
     427              :             ! Diagonalize the block-tridiagonal matrix
     428            0 :             CALL timestop(hand3)
     429              : 
     430            0 :             CALL timeset(routineN//"_build_cnew", hand4)
     431              : !        !Compute the refined vectors
     432            0 :             CALL cp_fm_set_all(c2_tmp, rzero)
     433            0 :             DO ik = 1, krylov_space%nkrylov
     434            0 :                jt = (ik - 1)*nmob
     435              :                CALL parallel_gemm('N', 'N', nao, ndim, nmob, rone, v_mat(ik), t_vec, rone, c2_tmp, &
     436            0 :                                   b_first_row=(jt + 1))
     437              :             END DO
     438            0 :             DEALLOCATE (tvblock)
     439              : 
     440            0 :             CALL cp_fm_set_all(c3_tmp, rzero)
     441            0 :             CALL parallel_gemm('T', 'N', nmob, ndim, nao, rone, v_mat(1), c2_tmp, rzero, c3_tmp)
     442              : 
     443              :             !Try to avoid linear dependencies
     444            0 :             ALLOCATE (q_mat(nmob, ndim))
     445              :             !get max
     446            0 :             CALL cp_fm_get_submatrix(c3_tmp, q_mat, 1, 1, nmob, ndim)
     447              : 
     448            0 :             ALLOCATE (itaken(ndim))
     449            0 :             itaken = 0
     450            0 :             DO it = 1, nmob
     451            0 :                vmax = 0.0_dp
     452              :                !select index ik
     453            0 :                DO jt = 1, ndim
     454            0 :                   IF (itaken(jt) == 0 .AND. ABS(q_mat(it, jt)) > vmax) THEN
     455            0 :                      vmax = ABS(q_mat(it, jt))
     456            0 :                      ik = jt
     457              :                   END IF
     458              :                END DO
     459            0 :                itaken(ik) = 1
     460              : 
     461            0 :                CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
     462              :             END DO
     463            0 :             DEALLOCATE (itaken)
     464            0 :             DEALLOCATE (q_mat)
     465              : 
     466              :             !Copy in the converged set to enlarge the converged subspace
     467            0 :             CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
     468            0 :             CALL timestop(hand4)
     469              : 
     470            0 :             IF (nmob < nblock) THEN
     471            0 :                CALL cp_fm_release(a_mat)
     472            0 :                CALL cp_fm_release(b_mat)
     473            0 :                CALL cp_fm_release(b2_mat)
     474            0 :                CALL cp_fm_release(t_mat)
     475            0 :                CALL cp_fm_release(t_vec)
     476            0 :                DEALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
     477            0 :                CALL cp_fm_release(v_mat)
     478              :             END IF
     479            0 :             CALL cp_fm_release(c2_tmp)
     480            0 :             CALL cp_fm_release(c3_tmp)
     481              :          END DO ! ib
     482              : 
     483            0 :          CALL timeset(routineN//"_ortho", hand5)
     484            0 :          CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
     485              : 
     486            0 :          CALL cp_fm_cholesky_decompose(chc, nmo)
     487            0 :          CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.TRUE.)
     488            0 :          CALL timestop(hand5)
     489              : 
     490            0 :          CALL cp_fm_release(c_tmp)
     491            0 :          CALL cp_fm_release(hc)
     492              :       ELSE
     493            0 :          CALL cp_fm_release(c_tmp)
     494            0 :          CALL cp_fm_release(hc)
     495            0 :          CALL timestop(handle)
     496            0 :          RETURN
     497              :       END IF
     498              : 
     499            0 :       CALL timestop(handle)
     500              : 
     501            0 :    END SUBROUTINE lanczos_refinement
     502              : 
     503              : !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
     504              : 
     505              : ! **************************************************************************************************
     506              : !> \brief ...
     507              : !> \param krylov_space ...
     508              : !> \param ks ...
     509              : !> \param c0 ...
     510              : !> \param c1 ...
     511              : !> \param eval ...
     512              : !> \param nao ...
     513              : !> \param eps_iter ...
     514              : !> \param ispin ...
     515              : !> \param check_moconv_only ...
     516              : ! **************************************************************************************************
     517          592 :    SUBROUTINE lanczos_refinement_2v(krylov_space, ks, c0, c1, eval, nao, &
     518              :                                     eps_iter, ispin, check_moconv_only)
     519              : 
     520              :       TYPE(krylov_space_type), POINTER                   :: krylov_space
     521              :       TYPE(cp_fm_type), INTENT(IN)                       :: ks, c0, c1
     522              :       REAL(dp), DIMENSION(:), POINTER                    :: eval
     523              :       INTEGER, INTENT(IN)                                :: nao
     524              :       REAL(dp), INTENT(IN)                               :: eps_iter
     525              :       INTEGER, INTENT(IN)                                :: ispin
     526              :       LOGICAL, INTENT(IN), OPTIONAL                      :: check_moconv_only
     527              : 
     528              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'lanczos_refinement_2v'
     529              :       REAL(KIND=dp), PARAMETER                           :: rmone = -1.0_dp, rone = 1.0_dp, &
     530              :                                                             rzero = 0.0_dp
     531              : 
     532              :       INTEGER :: hand1, hand2, hand3, hand4, hand5, hand6, handle, i, ia, ib, ik, imo, imo_low, &
     533              :          imo_up, info, it, j, jt, liwork, lwork, nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, &
     534              :          num_blocks
     535          592 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: itaken
     536          592 :       INTEGER, DIMENSION(:), POINTER                     :: iwork
     537              :       LOGICAL                                            :: my_check_moconv_only
     538              :       REAL(dp)                                           :: max_norm, min_norm, vmax
     539          592 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: a_block, b_block, q_mat, t_mat
     540          592 :       REAL(dp), DIMENSION(:), POINTER                    :: c_res, t_eval
     541          592 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
     542          592 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a_loc, b_loc
     543              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     544              :       TYPE(cp_fm_type)                                   :: b_mat, c2_tmp, c_tmp, hc, v_tmp
     545          592 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: v_mat
     546              :       TYPE(cp_fm_type), POINTER                          :: chc, evec
     547              : 
     548          592 :       CALL timeset(routineN, handle)
     549              : 
     550          592 :       NULLIFY (fm_struct_tmp)
     551          592 :       NULLIFY (chc, evec)
     552          592 :       NULLIFY (c_res, t_eval)
     553          592 :       NULLIFY (b_loc, a_loc)
     554              : 
     555          592 :       nmo = SIZE(eval, 1)
     556          592 :       my_check_moconv_only = .FALSE.
     557          592 :       IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
     558              : 
     559          592 :       chc => krylov_space%chc_mat(ispin)
     560          592 :       evec => krylov_space%c_vec(ispin)
     561          592 :       c_res => krylov_space%c_eval
     562          592 :       t_eval => krylov_space%t_eval
     563              : 
     564              :       NULLIFY (fm_struct_tmp)
     565              :       CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
     566              :                                para_env=c0%matrix_struct%para_env, &
     567          592 :                                context=c0%matrix_struct%context)
     568              :       CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
     569          592 :                         name="c_tmp")
     570              :       CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
     571          592 :                         name="hc")
     572          592 :       CALL cp_fm_struct_release(fm_struct_tmp)
     573              : 
     574              :       !Compute (C^t)HC
     575          592 :       CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
     576          592 :       CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
     577              : 
     578              :       !Diagonalize  (C^t)HC
     579          592 :       CALL timeset(routineN//"diag_chc", hand1)
     580          592 :       CALL choose_eigv_solver(chc, evec, eval)
     581              : 
     582          592 :       CALL timestop(hand1)
     583              : 
     584          592 :       CALL timeset(routineN//"check_conv", hand6)
     585              :       !Rotate the C vectors
     586          592 :       CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
     587              : 
     588              :       !Check for converged states
     589          592 :       CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
     590          592 :       CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
     591          592 :       CALL cp_fm_column_scale(c1, eval)
     592          592 :       CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
     593          592 :       CALL cp_fm_vectorsnorm(c_tmp, c_res)
     594              : 
     595          592 :       nmo_converged = 0
     596          592 :       nmo_nc = 0
     597          592 :       max_norm = 0.0_dp
     598          592 :       min_norm = 1.e10_dp
     599          592 :       CALL cp_fm_set_all(c1, rzero)
     600        21904 :       DO imo = 1, nmo
     601        21312 :          max_norm = MAX(max_norm, c_res(imo))
     602        21904 :          min_norm = MIN(min_norm, c_res(imo))
     603              :       END DO
     604        19144 :       DO imo = 1, nmo
     605        19144 :          IF (c_res(imo) <= eps_iter) THEN
     606        18552 :             nmo_converged = nmo_converged + 1
     607              :          ELSE
     608          582 :             nmo_nc = nmo - nmo_converged
     609          582 :             EXIT
     610              :          END IF
     611              :       END DO
     612          592 :       CALL timestop(hand6)
     613              : 
     614          592 :       CALL cp_fm_release(c_tmp)
     615          592 :       CALL cp_fm_release(hc)
     616              : 
     617          592 :       krylov_space%nmo_nc = nmo_nc
     618          592 :       krylov_space%nmo_conv = nmo_converged
     619          592 :       krylov_space%max_res_norm = max_norm
     620          592 :       krylov_space%min_res_norm = min_norm
     621              : 
     622          592 :       IF (my_check_moconv_only) THEN
     623              :          ! Do nothing
     624          592 :       ELSE IF (krylov_space%nmo_nc > 0) THEN
     625              : 
     626          582 :          CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
     627              : 
     628          582 :          nblock = krylov_space%nblock
     629          582 :          IF (MODULO(nmo_nc, nblock) > 0.0_dp) THEN
     630            0 :             num_blocks = nmo_nc/nblock + 1
     631              :          ELSE
     632          582 :             num_blocks = nmo_nc/nblock
     633              :          END IF
     634              : 
     635         3342 :          DO ib = 1, num_blocks
     636              : 
     637         2760 :             imo_low = (ib - 1)*nblock + 1
     638         2760 :             imo_up = MIN(ib*nblock, nmo_nc)
     639         2760 :             nmob = imo_up - imo_low + 1
     640         2760 :             ndim = krylov_space%nkrylov*nmob
     641              : 
     642              :             ! Allocation
     643         2760 :             CALL timeset(routineN//"alloc", hand6)
     644        11040 :             ALLOCATE (a_block(nmob, nmob))
     645         8280 :             ALLOCATE (b_block(nmob, nmob))
     646        11040 :             ALLOCATE (t_mat(ndim, ndim))
     647              : 
     648         2760 :             NULLIFY (fm_struct_tmp)
     649              :             ! by forcing ncol_block=nmo, the needed part of the matrix is distributed on a subset of processes
     650              :             ! this is due to the use of two-dimensional grids of processes
     651              :             ! nrow_global is distributed over num_pe(1)
     652              :             ! a local_data array is anyway allocated for the processes non included
     653              :             ! this should have a minimum size
     654              :             ! with ncol_local=1.
     655              :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
     656              :                                      ncol_block=nmob, &
     657              :                                      para_env=c0%matrix_struct%para_env, &
     658              :                                      context=c0%matrix_struct%context, &
     659         2760 :                                      force_block=.TRUE.)
     660              :             CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
     661         2760 :                               name="c_tmp")
     662         2760 :             CALL cp_fm_set_all(c_tmp, rzero)
     663              :             CALL cp_fm_create(v_tmp, matrix_struct=fm_struct_tmp, &
     664         2760 :                               name="v_tmp")
     665         2760 :             CALL cp_fm_set_all(v_tmp, rzero)
     666         2760 :             CALL cp_fm_struct_release(fm_struct_tmp)
     667         2760 :             NULLIFY (fm_struct_tmp)
     668        30360 :             ALLOCATE (v_mat(krylov_space%nkrylov))
     669              :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
     670              :                                      ncol_block=nmob, &
     671              :                                      para_env=c0%matrix_struct%para_env, &
     672              :                                      context=c0%matrix_struct%context, &
     673         2760 :                                      force_block=.TRUE.)
     674        24840 :             DO ik = 1, krylov_space%nkrylov
     675              :                CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
     676        24840 :                                  name="v_mat")
     677              :             END DO
     678              :             CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
     679         2760 :                               name="hc")
     680         2760 :             CALL cp_fm_struct_release(fm_struct_tmp)
     681         2760 :             NULLIFY (fm_struct_tmp)
     682              :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
     683              :                                      ncol_block=ndim, &
     684              :                                      para_env=c0%matrix_struct%para_env, &
     685              :                                      context=c0%matrix_struct%context, &
     686         2760 :                                      force_block=.TRUE.)
     687              :             CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
     688         2760 :                               name="c2_tmp")
     689         2760 :             CALL cp_fm_struct_release(fm_struct_tmp)
     690              : 
     691         2760 :             NULLIFY (fm_struct_tmp)
     692              :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
     693              :                                      para_env=c0%matrix_struct%para_env, &
     694         2760 :                                      context=c0%matrix_struct%context)
     695              :             CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
     696         2760 :                               name="b_mat")
     697         2760 :             CALL cp_fm_struct_release(fm_struct_tmp)
     698         2760 :             CALL timestop(hand6)
     699              :             !End allocation
     700              : 
     701         2760 :             CALL cp_fm_set_all(b_mat, rzero)
     702         2760 :             CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
     703              : 
     704              :             ! Here starts the construction of krylov space
     705         2760 :             CALL timeset(routineN//"_kry_loop", hand2)
     706              :             !Compute A =(V^t)HV
     707         2760 :             CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
     708              : 
     709         8280 :             a_block = 0.0_dp
     710         2760 :             a_loc => v_mat(1)%local_data
     711         2760 :             b_loc => hc%local_data
     712              : 
     713         2760 :             IF (SIZE(hc%local_data, 2) == nmob) THEN
     714              :                ! this is a work around to avoid problems due to the two dimensional grid of processes
     715              :                CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
     716         2760 :                           SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
     717              :             END IF
     718         2760 :             CALL hc%matrix_struct%para_env%sum(a_block)
     719              : 
     720              :             !Compute the residual matrix R for next
     721              :             !factorisation
     722       149040 :             c_tmp%local_data = 0.0_dp
     723         2760 :             IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
     724         2760 :                b_loc => c_tmp%local_data
     725              :                CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
     726              :                           SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
     727         2760 :                           b_loc(1, 1), SIZE(c_tmp%local_data, 1))
     728              :             END IF
     729         2760 :             CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
     730              : 
     731              :             ! Build the block tridiagonal matrix
     732       201480 :             t_mat = 0.0_dp
     733         5520 :             DO i = 1, nmob
     734         8280 :                t_mat(1:nmob, i) = a_block(1:nmob, i)
     735              :             END DO
     736              : 
     737        22080 :             DO ik = 2, krylov_space%nkrylov
     738              :                ! Call lapack for QR factorization
     739        19320 :                CALL cp_fm_set_all(b_mat, rzero)
     740        19320 :                CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
     741        19320 :                CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
     742              : 
     743              :                CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.TRUE., &
     744        19320 :                                               n_rows=nao, n_cols=nmob)
     745              : 
     746        19320 :                CALL cp_fm_get_submatrix(b_mat, b_block, 1, 1, nmob, nmob)
     747              : 
     748              :                !Compute A =(V^t)HV
     749        19320 :                CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
     750              : 
     751        57960 :                a_block = 0.0_dp
     752        19320 :                IF (SIZE(hc%local_data, 2) == nmob) THEN
     753        19320 :                   a_loc => v_mat(ik)%local_data
     754        19320 :                   b_loc => hc%local_data
     755              :                   CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
     756        19320 :                              SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
     757              :                END IF
     758        19320 :                CALL hc%matrix_struct%para_env%sum(a_block)
     759              : 
     760              :                !Compute the residual matrix R for next
     761              :                !factorisation
     762      1043280 :                c_tmp%local_data = 0.0_dp
     763        19320 :                IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
     764        19320 :                   a_loc => v_mat(ik)%local_data
     765        19320 :                   b_loc => c_tmp%local_data
     766              :                   CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
     767              :                              SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
     768        19320 :                              b_loc(1, 1), SIZE(c_tmp%local_data, 1))
     769              :                END IF
     770        19320 :                CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
     771              : 
     772        19320 :                IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
     773        19320 :                   a_loc => v_mat(ik - 1)%local_data
     774        38640 :                   DO j = 1, nmob
     775        57960 :                      DO i = 1, j
     776      1043280 :                         DO ia = 1, SIZE(c_tmp%local_data, 1)
     777      1023960 :                            b_loc(ia, i) = b_loc(ia, i) - a_loc(ia, j)*b_block(i, j)
     778              :                         END DO
     779              :                      END DO
     780              :                   END DO
     781              :                END IF
     782              : 
     783              :                ! Build the block tridiagonal matrix
     784        19320 :                it = (ik - 2)*nmob
     785        19320 :                jt = (ik - 1)*nmob
     786        41400 :                DO j = 1, nmob
     787        38640 :                   t_mat(jt + 1:jt + nmob, jt + j) = a_block(1:nmob, j)
     788        57960 :                   DO i = 1, nmob
     789        19320 :                      t_mat(it + i, jt + j) = b_block(j, i)
     790        38640 :                      t_mat(jt + j, it + i) = b_block(j, i)
     791              :                   END DO
     792              :                END DO
     793              :             END DO ! ik
     794         2760 :             CALL timestop(hand2)
     795              : 
     796         2760 :             CALL timeset(routineN//"_diag_tri", hand3)
     797         2760 :             lwork = 1 + 6*ndim + 2*ndim**2 + 5000
     798         2760 :             liwork = 5*ndim + 3
     799         8280 :             ALLOCATE (work(lwork))
     800         8280 :             ALLOCATE (iwork(liwork))
     801              : 
     802              :             ! Diagonalize the block-tridiagonal matrix
     803              :             CALL dsyevd('V', 'U', ndim, t_mat(1, 1), ndim, t_eval(1), &
     804         2760 :                         work(1), lwork, iwork(1), liwork, info)
     805         2760 :             DEALLOCATE (work)
     806         2760 :             DEALLOCATE (iwork)
     807         2760 :             CALL timestop(hand3)
     808              : 
     809         2760 :             CALL timeset(routineN//"_build_cnew", hand4)
     810              : !        !Compute the refined vectors
     811              : 
     812      1173000 :             c2_tmp%local_data = 0.0_dp
     813        11040 :             ALLOCATE (q_mat(nmob, ndim))
     814        46920 :             q_mat = 0.0_dp
     815         2760 :             b_loc => c2_tmp%local_data
     816        24840 :             DO ik = 1, krylov_space%nkrylov
     817        22080 :                CALL cp_fm_to_fm(v_mat(ik), v_tmp, nmob, 1, 1)
     818        24840 :                IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
     819              : !            a_loc => v_mat(ik)%local_data
     820        22080 :                   a_loc => v_tmp%local_data
     821        22080 :                   it = (ik - 1)*nmob
     822              :                   CALL dgemm('N', 'N', SIZE(c2_tmp%local_data, 1), ndim, nmob, 1.0_dp, a_loc(1, 1), &
     823              :                              SIZE(c2_tmp%local_data, 1), t_mat(it + 1, 1), ndim, 1.0_dp, &
     824        22080 :                              b_loc(1, 1), SIZE(c2_tmp%local_data, 1))
     825              :                END IF
     826              :             END DO !ik
     827              : 
     828              :             !Try to avoid linear dependencies
     829         2760 :             CALL cp_fm_to_fm(v_mat(1), v_tmp, nmob, 1, 1)
     830         2760 :             IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
     831              : !          a_loc => v_mat(1)%local_data
     832         2760 :                a_loc => v_tmp%local_data
     833         2760 :                b_loc => c2_tmp%local_data
     834              :                CALL dgemm('T', 'N', nmob, ndim, SIZE(v_tmp%local_data, 1), 1.0_dp, a_loc(1, 1), &
     835              :                           SIZE(v_tmp%local_data, 1), b_loc(1, 1), SIZE(v_tmp%local_data, 1), &
     836         2760 :                           0.0_dp, q_mat(1, 1), nmob)
     837              :             END IF
     838         2760 :             CALL hc%matrix_struct%para_env%sum(q_mat)
     839              : 
     840         8280 :             ALLOCATE (itaken(ndim))
     841        24840 :             itaken = 0
     842         5520 :             DO it = 1, nmob
     843              :                vmax = 0.0_dp
     844              :                !select index ik
     845        24840 :                DO jt = 1, ndim
     846        24840 :                   IF (itaken(jt) == 0 .AND. ABS(q_mat(it, jt)) > vmax) THEN
     847         3024 :                      vmax = ABS(q_mat(it, jt))
     848         3024 :                      ik = jt
     849              :                   END IF
     850              :                END DO
     851         2760 :                itaken(ik) = 1
     852              : 
     853         5520 :                CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
     854              :             END DO
     855         2760 :             DEALLOCATE (itaken)
     856         2760 :             DEALLOCATE (q_mat)
     857              : 
     858              :             !Copy in the converged set to enlarge the converged subspace
     859         2760 :             CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
     860         2760 :             CALL timestop(hand4)
     861              : 
     862         2760 :             CALL cp_fm_release(c2_tmp)
     863         2760 :             CALL cp_fm_release(c_tmp)
     864         2760 :             CALL cp_fm_release(hc)
     865         2760 :             CALL cp_fm_release(v_tmp)
     866         2760 :             CALL cp_fm_release(b_mat)
     867              : 
     868         2760 :             DEALLOCATE (t_mat)
     869         2760 :             DEALLOCATE (a_block)
     870         2760 :             DEALLOCATE (b_block)
     871              : 
     872        28182 :             CALL cp_fm_release(v_mat)
     873              : 
     874              :          END DO ! ib
     875              : 
     876          582 :          CALL timeset(routineN//"_ortho", hand5)
     877          582 :          CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
     878              : 
     879          582 :          CALL cp_fm_cholesky_decompose(chc, nmo)
     880          582 :          CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.TRUE.)
     881          582 :          CALL timestop(hand5)
     882              :       ELSE
     883              :          ! Do nothing
     884              :       END IF
     885              : 
     886          592 :       CALL timestop(handle)
     887         1184 :    END SUBROUTINE lanczos_refinement_2v
     888              : 
     889              : END MODULE qs_scf_lanczos
        

Generated by: LCOV version 2.0-1