LCOV - code coverage report
Current view: top level - src - qs_scf_lanczos.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b279b6b) Lines: 253 437 57.9 %
Date: 2024-04-24 07:13:09 Functions: 2 3 66.7 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief module that contains the algorithms to perform an itrative
      10             : !>         diagonalization by the block-Lanczos approach
      11             : !> \par History
      12             : !>      05.2009 created [MI]
      13             : !> \author fawzi
      14             : ! **************************************************************************************************
      15             : MODULE qs_scf_lanczos
      16             : 
      17             :    USE cp_fm_basic_linalg,              ONLY: cp_fm_column_scale,&
      18             :                                               cp_fm_qr_factorization,&
      19             :                                               cp_fm_scale_and_add,&
      20             :                                               cp_fm_transpose,&
      21             :                                               cp_fm_triangular_multiply
      22             :    USE cp_fm_cholesky,                  ONLY: cp_fm_cholesky_decompose
      23             :    USE cp_fm_diag,                      ONLY: choose_eigv_solver
      24             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      25             :                                               cp_fm_struct_release,&
      26             :                                               cp_fm_struct_type
      27             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      28             :                                               cp_fm_get_submatrix,&
      29             :                                               cp_fm_release,&
      30             :                                               cp_fm_set_all,&
      31             :                                               cp_fm_set_submatrix,&
      32             :                                               cp_fm_to_fm,&
      33             :                                               cp_fm_type,&
      34             :                                               cp_fm_vectorsnorm
      35             :    USE cp_log_handling,                 ONLY: cp_to_string
      36             :    USE kinds,                           ONLY: dp
      37             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      38             :    USE qs_mo_types,                     ONLY: get_mo_set,&
      39             :                                               mo_set_type
      40             :    USE qs_scf_types,                    ONLY: krylov_space_type
      41             :    USE scf_control_types,               ONLY: scf_control_type
      42             : #include "./base/base_uses.f90"
      43             : 
      44             :    IMPLICIT NONE
      45             :    PRIVATE
      46             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_scf_lanczos'
      47             : 
      48             :    PUBLIC :: krylov_space_allocate, lanczos_refinement, lanczos_refinement_2v
      49             : 
      50             : CONTAINS
      51             : 
      52             : ! **************************************************************************************************
      53             : 
      54             : ! **************************************************************************************************
      55             : !> \brief  allocates matrices and vectros used in the construction of
      56             : !>        the krylov space and for the lanczos refinement
      57             : !> \param krylov_space ...
      58             : !> \param scf_control ...
      59             : !> \param mos ...
      60             : !> \param
      61             : !> \par History
      62             : !>      05.2009 created [MI]
      63             : ! **************************************************************************************************
      64             : 
      65           8 :    SUBROUTINE krylov_space_allocate(krylov_space, scf_control, mos)
      66             : 
      67             :       TYPE(krylov_space_type), INTENT(INOUT)             :: krylov_space
      68             :       TYPE(scf_control_type), POINTER                    :: scf_control
      69             :       TYPE(mo_set_type), DIMENSION(:), INTENT(IN)        :: mos
      70             : 
      71             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'krylov_space_allocate'
      72             : 
      73             :       INTEGER                                            :: handle, ik, ispin, max_nmo, nao, nblock, &
      74             :                                                             ndim, nk, nmo, nspin
      75             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
      76             :       TYPE(cp_fm_type), POINTER                          :: mo_coeff
      77             : 
      78           8 :       CALL timeset(routineN, handle)
      79             : 
      80           8 :       IF (.NOT. ASSOCIATED(krylov_space%mo_conv)) THEN
      81           4 :          NULLIFY (fm_struct_tmp, mo_coeff)
      82             : 
      83           4 :          krylov_space%nkrylov = scf_control%diagonalization%nkrylov
      84           4 :          krylov_space%nblock = scf_control%diagonalization%nblock_krylov
      85             : 
      86           4 :          nk = krylov_space%nkrylov
      87           4 :          nblock = krylov_space%nblock
      88           4 :          nspin = SIZE(mos, 1)
      89             : 
      90          16 :          ALLOCATE (krylov_space%mo_conv(nspin))
      91          16 :          ALLOCATE (krylov_space%mo_refine(nspin))
      92          16 :          ALLOCATE (krylov_space%chc_mat(nspin))
      93          16 :          ALLOCATE (krylov_space%c_vec(nspin))
      94           4 :          max_nmo = 0
      95           8 :          DO ispin = 1, nspin
      96           4 :             CALL get_mo_set(mos(ispin), mo_coeff=mo_coeff, nao=nao, nmo=nmo)
      97           4 :             CALL cp_fm_create(krylov_space%mo_conv(ispin), mo_coeff%matrix_struct)
      98           4 :             CALL cp_fm_create(krylov_space%mo_refine(ispin), mo_coeff%matrix_struct)
      99           4 :             NULLIFY (fm_struct_tmp)
     100             :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmo, ncol_global=nmo, &
     101             :                                      para_env=mo_coeff%matrix_struct%para_env, &
     102           4 :                                      context=mo_coeff%matrix_struct%context)
     103           4 :             CALL cp_fm_create(krylov_space%chc_mat(ispin), fm_struct_tmp, "chc")
     104           4 :             CALL cp_fm_create(krylov_space%c_vec(ispin), fm_struct_tmp, "vec")
     105           4 :             CALL cp_fm_struct_release(fm_struct_tmp)
     106          12 :             max_nmo = MAX(max_nmo, nmo)
     107             :          END DO
     108             : 
     109             :          !the use of max_nmo might not be ok, in this case allocate nspin matrices
     110          12 :          ALLOCATE (krylov_space%c_eval(max_nmo))
     111             : 
     112          44 :          ALLOCATE (krylov_space%v_mat(nk))
     113             : 
     114           4 :          NULLIFY (fm_struct_tmp)
     115             :          CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nblock, &
     116             :                                   para_env=mo_coeff%matrix_struct%para_env, &
     117           4 :                                   context=mo_coeff%matrix_struct%context)
     118          36 :          DO ik = 1, nk
     119             :             CALL cp_fm_create(krylov_space%v_mat(ik), matrix_struct=fm_struct_tmp, &
     120          36 :                               name="v_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
     121             :          END DO
     122           4 :          ALLOCATE (krylov_space%tmp_mat)
     123             :          CALL cp_fm_create(krylov_space%tmp_mat, matrix_struct=fm_struct_tmp, &
     124           4 :                            name="tmp_mat")
     125           4 :          CALL cp_fm_struct_release(fm_struct_tmp)
     126             : 
     127             :          ! NOTE: the following matrices are small and could be defined
     128             : !           as standard array rather than istributed fm
     129           4 :          NULLIFY (fm_struct_tmp)
     130             :          CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nblock, ncol_global=nblock, &
     131             :                                   para_env=mo_coeff%matrix_struct%para_env, &
     132           4 :                                   context=mo_coeff%matrix_struct%context)
     133           4 :          ALLOCATE (krylov_space%block1_mat)
     134             :          CALL cp_fm_create(krylov_space%block1_mat, matrix_struct=fm_struct_tmp, &
     135           4 :                            name="a_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
     136           4 :          ALLOCATE (krylov_space%block2_mat)
     137             :          CALL cp_fm_create(krylov_space%block2_mat, matrix_struct=fm_struct_tmp, &
     138           4 :                            name="b_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
     139           4 :          ALLOCATE (krylov_space%block3_mat)
     140             :          CALL cp_fm_create(krylov_space%block3_mat, matrix_struct=fm_struct_tmp, &
     141           4 :                            name="b2_mat_"//TRIM(ADJUSTL(cp_to_string(ik))))
     142           4 :          CALL cp_fm_struct_release(fm_struct_tmp)
     143             : 
     144           4 :          ndim = nblock*nk
     145           4 :          NULLIFY (fm_struct_tmp)
     146             :          CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
     147             :                                   para_env=mo_coeff%matrix_struct%para_env, &
     148           4 :                                   context=mo_coeff%matrix_struct%context)
     149           4 :          ALLOCATE (krylov_space%block4_mat)
     150             :          CALL cp_fm_create(krylov_space%block4_mat, matrix_struct=fm_struct_tmp, &
     151           4 :                            name="t_mat")
     152           4 :          ALLOCATE (krylov_space%block5_mat)
     153             :          CALL cp_fm_create(krylov_space%block5_mat, matrix_struct=fm_struct_tmp, &
     154           4 :                            name="t_vec")
     155           4 :          CALL cp_fm_struct_release(fm_struct_tmp)
     156          12 :          ALLOCATE (krylov_space%t_eval(ndim))
     157             : 
     158             :       ELSE
     159             :          !Nothing should be done
     160             :       END IF
     161             : 
     162           8 :       CALL timestop(handle)
     163             : 
     164           8 :    END SUBROUTINE krylov_space_allocate
     165             : 
     166             : ! **************************************************************************************************
     167             : !> \brief lanczos refinement by blocks of not-converged MOs
     168             : !> \param krylov_space ...
     169             : !> \param ks ...
     170             : !> \param c0 ...
     171             : !> \param c1 ...
     172             : !> \param eval ...
     173             : !> \param nao ...
     174             : !> \param eps_iter ...
     175             : !> \param ispin ...
     176             : !> \param check_moconv_only ...
     177             : !> \param
     178             : !> \par History
     179             : !>      05.2009 created [MI]
     180             : ! **************************************************************************************************
     181             : 
     182           0 :    SUBROUTINE lanczos_refinement(krylov_space, ks, c0, c1, eval, nao, &
     183             :                                  eps_iter, ispin, check_moconv_only)
     184             : 
     185             :       TYPE(krylov_space_type), POINTER                   :: krylov_space
     186             :       TYPE(cp_fm_type), INTENT(IN)                       :: ks, c0, c1
     187             :       REAL(dp), DIMENSION(:), POINTER                    :: eval
     188             :       INTEGER, INTENT(IN)                                :: nao
     189             :       REAL(dp), INTENT(IN)                               :: eps_iter
     190             :       INTEGER, INTENT(IN)                                :: ispin
     191             :       LOGICAL, INTENT(IN), OPTIONAL                      :: check_moconv_only
     192             : 
     193             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'lanczos_refinement'
     194             :       REAL(KIND=dp), PARAMETER                           :: rmone = -1.0_dp, rone = 1.0_dp, &
     195             :                                                             rzero = 0.0_dp
     196             : 
     197             :       INTEGER :: hand1, hand2, hand3, hand4, hand5, handle, ib, ik, imo, imo_low, imo_up, it, jt, &
     198             :          nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, num_blocks
     199           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: itaken
     200             :       LOGICAL                                            :: my_check_moconv_only
     201             :       REAL(dp)                                           :: max_norm, min_norm, vmax
     202           0 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: q_mat, tblock, tvblock
     203             :       REAL(dp), DIMENSION(:), POINTER                    :: c_res, t_eval
     204             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     205             :       TYPE(cp_fm_type)                                   :: c2_tmp, c3_tmp, c_tmp, hc
     206           0 :       TYPE(cp_fm_type), DIMENSION(:), POINTER            :: v_mat
     207             :       TYPE(cp_fm_type), POINTER                          :: a_mat, b2_mat, b_mat, chc, evec, t_mat, &
     208             :                                                             t_vec
     209             : 
     210           0 :       CALL timeset(routineN, handle)
     211             : 
     212           0 :       NULLIFY (fm_struct_tmp)
     213           0 :       NULLIFY (chc, evec)
     214             :       NULLIFY (c_res, t_eval)
     215           0 :       NULLIFY (t_mat, t_vec)
     216           0 :       NULLIFY (a_mat, b_mat, b2_mat, v_mat)
     217             : 
     218           0 :       nmo = SIZE(eval, 1)
     219           0 :       my_check_moconv_only = .FALSE.
     220           0 :       IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
     221             : 
     222           0 :       chc => krylov_space%chc_mat(ispin)
     223           0 :       evec => krylov_space%c_vec(ispin)
     224           0 :       c_res => krylov_space%c_eval
     225           0 :       t_eval => krylov_space%t_eval
     226             : 
     227             :       NULLIFY (fm_struct_tmp)
     228             :       CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
     229             :                                para_env=c0%matrix_struct%para_env, &
     230           0 :                                context=c0%matrix_struct%context)
     231             :       CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
     232           0 :                         name="c_tmp")
     233             :       CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
     234           0 :                         name="hc")
     235           0 :       CALL cp_fm_struct_release(fm_struct_tmp)
     236             : 
     237             :       !Compute (C^t)HC
     238           0 :       CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
     239           0 :       CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
     240             : 
     241             :       !Diagonalize  (C^t)HC
     242           0 :       CALL timeset(routineN//"diag_chc", hand1)
     243           0 :       CALL choose_eigv_solver(chc, evec, eval)
     244           0 :       CALL timestop(hand1)
     245             : 
     246             :       !Rotate the C vectors
     247           0 :       CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
     248             : 
     249             :       !Check for converged states
     250           0 :       CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
     251           0 :       CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
     252           0 :       CALL cp_fm_column_scale(c1, eval)
     253           0 :       CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
     254           0 :       CALL cp_fm_vectorsnorm(c_tmp, c_res)
     255             : 
     256           0 :       nmo_converged = 0
     257           0 :       nmo_nc = 0
     258           0 :       max_norm = 0.0_dp
     259           0 :       min_norm = 1.e10_dp
     260           0 :       CALL cp_fm_set_all(c1, rzero)
     261           0 :       DO imo = 1, nmo
     262           0 :          max_norm = MAX(max_norm, c_res(imo))
     263           0 :          min_norm = MIN(min_norm, c_res(imo))
     264             :       END DO
     265           0 :       DO imo = 1, nmo
     266           0 :          IF (c_res(imo) <= eps_iter) THEN
     267           0 :             nmo_converged = nmo_converged + 1
     268             :          ELSE
     269           0 :             nmo_nc = nmo - nmo_converged
     270           0 :             EXIT
     271             :          END IF
     272             :       END DO
     273             : 
     274           0 :       nblock = krylov_space%nblock
     275           0 :       num_blocks = nmo_nc/nblock
     276             : 
     277           0 :       krylov_space%nmo_nc = nmo_nc
     278           0 :       krylov_space%nmo_conv = nmo_converged
     279           0 :       krylov_space%max_res_norm = max_norm
     280           0 :       krylov_space%min_res_norm = min_norm
     281             : 
     282           0 :       IF (my_check_moconv_only) THEN
     283           0 :          CALL cp_fm_release(c_tmp)
     284           0 :          CALL cp_fm_release(hc)
     285           0 :          CALL timestop(handle)
     286           0 :          RETURN
     287           0 :       ELSE IF (krylov_space%nmo_nc > 0) THEN
     288             : 
     289           0 :          CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
     290             : 
     291           0 :          nblock = krylov_space%nblock
     292           0 :          IF (MODULO(nmo_nc, nblock) > 0.0_dp) THEN
     293           0 :             num_blocks = nmo_nc/nblock + 1
     294             :          ELSE
     295           0 :             num_blocks = nmo_nc/nblock
     296             :          END IF
     297             : 
     298           0 :          DO ib = 1, num_blocks
     299             : 
     300           0 :             imo_low = (ib - 1)*nblock + 1
     301           0 :             imo_up = MIN(ib*nblock, nmo_nc)
     302           0 :             nmob = imo_up - imo_low + 1
     303           0 :             ndim = krylov_space%nkrylov*nmob
     304             : 
     305           0 :             NULLIFY (fm_struct_tmp)
     306             :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
     307             :                                      para_env=c0%matrix_struct%para_env, &
     308           0 :                                      context=c0%matrix_struct%context)
     309             :             CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
     310           0 :                               name="c2_tmp")
     311           0 :             CALL cp_fm_struct_release(fm_struct_tmp)
     312           0 :             NULLIFY (fm_struct_tmp)
     313             :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=ndim, &
     314             :                                      para_env=c0%matrix_struct%para_env, &
     315           0 :                                      context=c0%matrix_struct%context)
     316             :             CALL cp_fm_create(c3_tmp, matrix_struct=fm_struct_tmp, &
     317           0 :                               name="c3_tmp")
     318           0 :             CALL cp_fm_struct_release(fm_struct_tmp)
     319             : 
     320             :             ! Create local matrix of right size
     321           0 :             IF (nmob /= nblock) THEN
     322           0 :                NULLIFY (a_mat, b_mat, b2_mat, t_mat, t_vec, v_mat)
     323           0 :                ALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
     324           0 :                NULLIFY (fm_struct_tmp)
     325             :                CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
     326             :                                         para_env=chc%matrix_struct%para_env, &
     327           0 :                                         context=chc%matrix_struct%context)
     328             :                CALL cp_fm_create(a_mat, matrix_struct=fm_struct_tmp, &
     329           0 :                                  name="a_mat")
     330             :                CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
     331           0 :                                  name="b_mat")
     332             :                CALL cp_fm_create(b2_mat, matrix_struct=fm_struct_tmp, &
     333           0 :                                  name="b2_mat")
     334           0 :                CALL cp_fm_struct_release(fm_struct_tmp)
     335           0 :                NULLIFY (fm_struct_tmp)
     336             :                CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ndim, ncol_global=ndim, &
     337             :                                         para_env=chc%matrix_struct%para_env, &
     338           0 :                                         context=chc%matrix_struct%context)
     339             :                CALL cp_fm_create(t_mat, matrix_struct=fm_struct_tmp, &
     340           0 :                                  name="t_mat")
     341             :                CALL cp_fm_create(t_vec, matrix_struct=fm_struct_tmp, &
     342           0 :                                  name="t_vec")
     343           0 :                CALL cp_fm_struct_release(fm_struct_tmp)
     344           0 :                NULLIFY (fm_struct_tmp)
     345             :                CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
     346             :                                         para_env=c0%matrix_struct%para_env, &
     347           0 :                                         context=c0%matrix_struct%context)
     348           0 :                ALLOCATE (v_mat(krylov_space%nkrylov))
     349           0 :                DO ik = 1, krylov_space%nkrylov
     350             :                   CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
     351           0 :                                     name="v_mat")
     352             :                END DO
     353           0 :                CALL cp_fm_struct_release(fm_struct_tmp)
     354             :             ELSE
     355           0 :                a_mat => krylov_space%block1_mat
     356           0 :                b_mat => krylov_space%block2_mat
     357           0 :                b2_mat => krylov_space%block3_mat
     358           0 :                t_mat => krylov_space%block4_mat
     359           0 :                t_vec => krylov_space%block5_mat
     360           0 :                v_mat => krylov_space%v_mat
     361             :             END IF
     362             : 
     363           0 :             ALLOCATE (tblock(nmob, nmob))
     364           0 :             ALLOCATE (tvblock(nmob, ndim))
     365             : 
     366           0 :             CALL timeset(routineN//"_kry_loop", hand2)
     367           0 :             CALL cp_fm_set_all(b_mat, rzero)
     368           0 :             CALL cp_fm_set_all(t_mat, rzero)
     369           0 :             CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
     370             : 
     371             :             !Compute A =(V^t)HV
     372           0 :             CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
     373             :             CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(1), hc, &
     374           0 :                                rzero, a_mat)
     375             : 
     376             :             !Compute the residual matrix R for next
     377             :             !factorisation
     378             :             CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(1), a_mat, &
     379           0 :                                rzero, c_tmp)
     380           0 :             CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
     381             : 
     382             :             ! Build the block tridiagonal matrix
     383           0 :             CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
     384           0 :             CALL cp_fm_set_submatrix(t_mat, tblock, 1, 1, nmob, nmob)
     385             : 
     386           0 :             DO ik = 2, krylov_space%nkrylov
     387             : 
     388             :                ! Call lapack for QR factorization
     389           0 :                CALL cp_fm_set_all(b_mat, rzero)
     390           0 :                CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
     391           0 :                CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
     392             : 
     393             :                CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.TRUE., &
     394           0 :                                               n_rows=nao, n_cols=nmob)
     395             : 
     396             :                !Compute A =(V^t)HV
     397           0 :                CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
     398           0 :                CALL parallel_gemm('T', 'N', nmob, nmob, nao, rone, v_mat(ik), hc, rzero, a_mat)
     399             : 
     400             :                !Compute the !residual matrix R !for next !factorisation
     401             :                CALL parallel_gemm('N', 'N', nao, nmob, nmob, rone, v_mat(ik), a_mat, &
     402           0 :                                   rzero, c_tmp)
     403           0 :                CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
     404           0 :                CALL cp_fm_to_fm(v_mat(ik - 1), hc, nmob, 1, 1)
     405             :                CALL cp_fm_triangular_multiply(b_mat, hc, side='R', transpose_tr=.TRUE., &
     406           0 :                                               n_rows=nao, n_cols=nmob, alpha=rmone)
     407           0 :                CALL cp_fm_scale_and_add(rone, c_tmp, rone, hc)
     408             : 
     409             :                ! Build the block tridiagonal matrix
     410           0 :                it = (ik - 2)*nmob + 1
     411           0 :                jt = (ik - 1)*nmob + 1
     412             : 
     413           0 :                CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
     414           0 :                CALL cp_fm_set_submatrix(t_mat, tblock, jt, jt, nmob, nmob)
     415           0 :                CALL cp_fm_transpose(b_mat, a_mat)
     416           0 :                CALL cp_fm_get_submatrix(a_mat, tblock, 1, 1, nmob, nmob)
     417           0 :                CALL cp_fm_set_submatrix(t_mat, tblock, it, jt, nmob, nmob)
     418             : 
     419             :             END DO ! ik
     420           0 :             CALL timestop(hand2)
     421             : 
     422           0 :             DEALLOCATE (tblock)
     423             : 
     424           0 :             CALL timeset(routineN//"_diag_tri", hand3)
     425             : 
     426           0 :             CALL choose_eigv_solver(t_mat, t_vec, t_eval)
     427             :             ! Diagonalize the block-tridiagonal matrix
     428           0 :             CALL timestop(hand3)
     429             : 
     430           0 :             CALL timeset(routineN//"_build_cnew", hand4)
     431             : !        !Compute the refined vectors
     432           0 :             CALL cp_fm_set_all(c2_tmp, rzero)
     433           0 :             DO ik = 1, krylov_space%nkrylov
     434           0 :                jt = (ik - 1)*nmob
     435             :                CALL parallel_gemm('N', 'N', nao, ndim, nmob, rone, v_mat(ik), t_vec, rone, c2_tmp, &
     436           0 :                                   b_first_row=(jt + 1))
     437             :             END DO
     438           0 :             DEALLOCATE (tvblock)
     439             : 
     440           0 :             CALL cp_fm_set_all(c3_tmp, rzero)
     441           0 :             CALL parallel_gemm('T', 'N', nmob, ndim, nao, rone, v_mat(1), c2_tmp, rzero, c3_tmp)
     442             : 
     443             :             !Try to avoid linear dependencies
     444           0 :             ALLOCATE (q_mat(nmob, ndim))
     445             :             !get max
     446           0 :             CALL cp_fm_get_submatrix(c3_tmp, q_mat, 1, 1, nmob, ndim)
     447             : 
     448           0 :             ALLOCATE (itaken(ndim))
     449           0 :             itaken = 0
     450           0 :             DO it = 1, nmob
     451           0 :                vmax = 0.0_dp
     452             :                !select index ik
     453           0 :                DO jt = 1, ndim
     454           0 :                   IF (itaken(jt) == 0 .AND. ABS(q_mat(it, jt)) > vmax) THEN
     455           0 :                      vmax = ABS(q_mat(it, jt))
     456           0 :                      ik = jt
     457             :                   END IF
     458             :                END DO
     459           0 :                itaken(ik) = 1
     460             : 
     461           0 :                CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
     462             :             END DO
     463           0 :             DEALLOCATE (itaken)
     464           0 :             DEALLOCATE (q_mat)
     465             : 
     466             :             !Copy in the converged set to enlarge the converged subspace
     467           0 :             CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
     468           0 :             CALL timestop(hand4)
     469             : 
     470           0 :             IF (nmob < nblock) THEN
     471           0 :                CALL cp_fm_release(a_mat)
     472           0 :                CALL cp_fm_release(b_mat)
     473           0 :                CALL cp_fm_release(b2_mat)
     474           0 :                CALL cp_fm_release(t_mat)
     475           0 :                CALL cp_fm_release(t_vec)
     476           0 :                DEALLOCATE (a_mat, b_mat, b2_mat, t_mat, t_vec)
     477           0 :                CALL cp_fm_release(v_mat)
     478             :             END IF
     479           0 :             CALL cp_fm_release(c2_tmp)
     480           0 :             CALL cp_fm_release(c3_tmp)
     481             :          END DO ! ib
     482             : 
     483           0 :          CALL timeset(routineN//"_ortho", hand5)
     484           0 :          CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
     485             : 
     486           0 :          CALL cp_fm_cholesky_decompose(chc, nmo)
     487           0 :          CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.TRUE.)
     488           0 :          CALL timestop(hand5)
     489             : 
     490           0 :          CALL cp_fm_release(c_tmp)
     491           0 :          CALL cp_fm_release(hc)
     492             :       ELSE
     493           0 :          CALL cp_fm_release(c_tmp)
     494           0 :          CALL cp_fm_release(hc)
     495           0 :          CALL timestop(handle)
     496           0 :          RETURN
     497             :       END IF
     498             : 
     499           0 :       CALL timestop(handle)
     500             : 
     501           0 :    END SUBROUTINE lanczos_refinement
     502             : 
     503             : !+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
     504             : 
     505             : ! **************************************************************************************************
     506             : !> \brief ...
     507             : !> \param krylov_space ...
     508             : !> \param ks ...
     509             : !> \param c0 ...
     510             : !> \param c1 ...
     511             : !> \param eval ...
     512             : !> \param nao ...
     513             : !> \param eps_iter ...
     514             : !> \param ispin ...
     515             : !> \param check_moconv_only ...
     516             : ! **************************************************************************************************
     517         592 :    SUBROUTINE lanczos_refinement_2v(krylov_space, ks, c0, c1, eval, nao, &
     518             :                                     eps_iter, ispin, check_moconv_only)
     519             : 
     520             :       TYPE(krylov_space_type), POINTER                   :: krylov_space
     521             :       TYPE(cp_fm_type), INTENT(IN)                       :: ks, c0, c1
     522             :       REAL(dp), DIMENSION(:), POINTER                    :: eval
     523             :       INTEGER, INTENT(IN)                                :: nao
     524             :       REAL(dp), INTENT(IN)                               :: eps_iter
     525             :       INTEGER, INTENT(IN)                                :: ispin
     526             :       LOGICAL, INTENT(IN), OPTIONAL                      :: check_moconv_only
     527             : 
     528             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'lanczos_refinement_2v'
     529             :       REAL(KIND=dp), PARAMETER                           :: rmone = -1.0_dp, rone = 1.0_dp, &
     530             :                                                             rzero = 0.0_dp
     531             : 
     532             :       INTEGER :: hand1, hand2, hand3, hand4, hand5, hand6, handle, i, ia, ib, ik, imo, imo_low, &
     533             :          imo_up, info, it, j, jt, liwork, lwork, nblock, ndim, nmo, nmo_converged, nmo_nc, nmob, &
     534             :          num_blocks
     535         592 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: itaken
     536         592 :       INTEGER, DIMENSION(:), POINTER                     :: iwork
     537             :       LOGICAL                                            :: my_check_moconv_only
     538             :       REAL(dp)                                           :: max_norm, min_norm, vmax
     539         592 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: a_block, b_block, q_mat, t_mat
     540         592 :       REAL(dp), DIMENSION(:), POINTER                    :: c_res, t_eval
     541         592 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: work
     542         592 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: a_loc, b_loc
     543             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     544             :       TYPE(cp_fm_type)                                   :: b_mat, c2_tmp, c_tmp, hc, v_tmp
     545         592 :       TYPE(cp_fm_type), ALLOCATABLE, DIMENSION(:)        :: v_mat
     546             :       TYPE(cp_fm_type), POINTER                          :: chc, evec
     547             : 
     548         592 :       CALL timeset(routineN, handle)
     549             : 
     550         592 :       NULLIFY (fm_struct_tmp)
     551         592 :       NULLIFY (chc, evec)
     552         592 :       NULLIFY (c_res, t_eval)
     553         592 :       NULLIFY (b_loc, a_loc)
     554             : 
     555         592 :       nmo = SIZE(eval, 1)
     556         592 :       my_check_moconv_only = .FALSE.
     557         592 :       IF (PRESENT(check_moconv_only)) my_check_moconv_only = check_moconv_only
     558             : 
     559         592 :       chc => krylov_space%chc_mat(ispin)
     560         592 :       evec => krylov_space%c_vec(ispin)
     561         592 :       c_res => krylov_space%c_eval
     562         592 :       t_eval => krylov_space%t_eval
     563             : 
     564             :       NULLIFY (fm_struct_tmp)
     565             :       CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmo, &
     566             :                                para_env=c0%matrix_struct%para_env, &
     567         592 :                                context=c0%matrix_struct%context)
     568             :       CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
     569         592 :                         name="c_tmp")
     570             :       CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
     571         592 :                         name="hc")
     572         592 :       CALL cp_fm_struct_release(fm_struct_tmp)
     573             : 
     574             :       !Compute (C^t)HC
     575         592 :       CALL parallel_gemm('N', 'N', nao, nmo, nao, rone, ks, c0, rzero, hc)
     576         592 :       CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, hc, rzero, chc)
     577             : 
     578             :       !Diagonalize  (C^t)HC
     579         592 :       CALL timeset(routineN//"diag_chc", hand1)
     580         592 :       CALL choose_eigv_solver(chc, evec, eval)
     581             : 
     582         592 :       CALL timestop(hand1)
     583             : 
     584         592 :       CALL timeset(routineN//"check_conv", hand6)
     585             :       !Rotate the C vectors
     586         592 :       CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, c0, evec, rzero, c1)
     587             : 
     588             :       !Check for converged states
     589         592 :       CALL parallel_gemm('N', 'N', nao, nmo, nmo, rone, hc, evec, rzero, c_tmp)
     590         592 :       CALL cp_fm_to_fm(c1, c0, nmo, 1, 1)
     591         592 :       CALL cp_fm_column_scale(c1, eval)
     592         592 :       CALL cp_fm_scale_and_add(1.0_dp, c_tmp, rmone, c1)
     593         592 :       CALL cp_fm_vectorsnorm(c_tmp, c_res)
     594             : 
     595         592 :       nmo_converged = 0
     596         592 :       nmo_nc = 0
     597         592 :       max_norm = 0.0_dp
     598         592 :       min_norm = 1.e10_dp
     599         592 :       CALL cp_fm_set_all(c1, rzero)
     600       21904 :       DO imo = 1, nmo
     601       21312 :          max_norm = MAX(max_norm, c_res(imo))
     602       21904 :          min_norm = MIN(min_norm, c_res(imo))
     603             :       END DO
     604       19144 :       DO imo = 1, nmo
     605       19144 :          IF (c_res(imo) <= eps_iter) THEN
     606       18552 :             nmo_converged = nmo_converged + 1
     607             :          ELSE
     608         582 :             nmo_nc = nmo - nmo_converged
     609         582 :             EXIT
     610             :          END IF
     611             :       END DO
     612         592 :       CALL timestop(hand6)
     613             : 
     614         592 :       CALL cp_fm_release(c_tmp)
     615         592 :       CALL cp_fm_release(hc)
     616             : 
     617         592 :       krylov_space%nmo_nc = nmo_nc
     618         592 :       krylov_space%nmo_conv = nmo_converged
     619         592 :       krylov_space%max_res_norm = max_norm
     620         592 :       krylov_space%min_res_norm = min_norm
     621             : 
     622         592 :       IF (my_check_moconv_only) THEN
     623             :          ! Do nothing
     624         592 :       ELSE IF (krylov_space%nmo_nc > 0) THEN
     625             : 
     626         582 :          CALL cp_fm_to_fm(c0, c1, nmo_nc, nmo_converged + 1, 1)
     627             : 
     628         582 :          nblock = krylov_space%nblock
     629         582 :          IF (MODULO(nmo_nc, nblock) > 0.0_dp) THEN
     630           0 :             num_blocks = nmo_nc/nblock + 1
     631             :          ELSE
     632         582 :             num_blocks = nmo_nc/nblock
     633             :          END IF
     634             : 
     635        3342 :          DO ib = 1, num_blocks
     636             : 
     637        2760 :             imo_low = (ib - 1)*nblock + 1
     638        2760 :             imo_up = MIN(ib*nblock, nmo_nc)
     639        2760 :             nmob = imo_up - imo_low + 1
     640        2760 :             ndim = krylov_space%nkrylov*nmob
     641             : 
     642             :             ! Allocation
     643        2760 :             CALL timeset(routineN//"alloc", hand6)
     644       11040 :             ALLOCATE (a_block(nmob, nmob))
     645       11040 :             ALLOCATE (b_block(nmob, nmob))
     646       11040 :             ALLOCATE (t_mat(ndim, ndim))
     647             : 
     648        2760 :             NULLIFY (fm_struct_tmp)
     649             :             ! by forcing ncol_block=nmo, the needed part of the matrix is distributed on a subset of processes
     650             :             ! this is due to the use of two-dimensional grids of processes
     651             :             ! nrow_global is distributed over num_pe(1)
     652             :             ! a local_data array is anyway allocated for the processes non included
     653             :             ! this should have a minimum size
     654             :             ! with ncol_local=1.
     655             :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
     656             :                                      ncol_block=nmob, &
     657             :                                      para_env=c0%matrix_struct%para_env, &
     658             :                                      context=c0%matrix_struct%context, &
     659        2760 :                                      force_block=.TRUE.)
     660             :             CALL cp_fm_create(c_tmp, matrix_struct=fm_struct_tmp, &
     661        2760 :                               name="c_tmp")
     662        2760 :             CALL cp_fm_set_all(c_tmp, rzero)
     663             :             CALL cp_fm_create(v_tmp, matrix_struct=fm_struct_tmp, &
     664        2760 :                               name="v_tmp")
     665        2760 :             CALL cp_fm_set_all(v_tmp, rzero)
     666        2760 :             CALL cp_fm_struct_release(fm_struct_tmp)
     667        2760 :             NULLIFY (fm_struct_tmp)
     668       30360 :             ALLOCATE (v_mat(krylov_space%nkrylov))
     669             :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=nmob, &
     670             :                                      ncol_block=nmob, &
     671             :                                      para_env=c0%matrix_struct%para_env, &
     672             :                                      context=c0%matrix_struct%context, &
     673        2760 :                                      force_block=.TRUE.)
     674       24840 :             DO ik = 1, krylov_space%nkrylov
     675             :                CALL cp_fm_create(v_mat(ik), matrix_struct=fm_struct_tmp, &
     676       24840 :                                  name="v_mat")
     677             :             END DO
     678             :             CALL cp_fm_create(hc, matrix_struct=fm_struct_tmp, &
     679        2760 :                               name="hc")
     680        2760 :             CALL cp_fm_struct_release(fm_struct_tmp)
     681        2760 :             NULLIFY (fm_struct_tmp)
     682             :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nao, ncol_global=ndim, &
     683             :                                      ncol_block=ndim, &
     684             :                                      para_env=c0%matrix_struct%para_env, &
     685             :                                      context=c0%matrix_struct%context, &
     686        2760 :                                      force_block=.TRUE.)
     687             :             CALL cp_fm_create(c2_tmp, matrix_struct=fm_struct_tmp, &
     688        2760 :                               name="c2_tmp")
     689        2760 :             CALL cp_fm_struct_release(fm_struct_tmp)
     690             : 
     691        2760 :             NULLIFY (fm_struct_tmp)
     692             :             CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=nmob, ncol_global=nmob, &
     693             :                                      para_env=c0%matrix_struct%para_env, &
     694        2760 :                                      context=c0%matrix_struct%context)
     695             :             CALL cp_fm_create(b_mat, matrix_struct=fm_struct_tmp, &
     696        2760 :                               name="b_mat")
     697        2760 :             CALL cp_fm_struct_release(fm_struct_tmp)
     698        2760 :             CALL timestop(hand6)
     699             :             !End allocation
     700             : 
     701        2760 :             CALL cp_fm_set_all(b_mat, rzero)
     702        2760 :             CALL cp_fm_to_fm(c1, v_mat(1), nmob, imo_low, 1)
     703             : 
     704             :             ! Here starts the construction of krylov space
     705        2760 :             CALL timeset(routineN//"_kry_loop", hand2)
     706             :             !Compute A =(V^t)HV
     707        2760 :             CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(1), rzero, hc)
     708             : 
     709        8280 :             a_block = 0.0_dp
     710        2760 :             a_loc => v_mat(1)%local_data
     711        2760 :             b_loc => hc%local_data
     712             : 
     713        2760 :             IF (SIZE(hc%local_data, 2) == nmob) THEN
     714             :                ! this is a work around to avoid problems due to the two dimensional grid of processes
     715             :                CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
     716        2760 :                           SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
     717             :             END IF
     718        2760 :             CALL hc%matrix_struct%para_env%sum(a_block)
     719             : 
     720             :             !Compute the residual matrix R for next
     721             :             !factorisation
     722      149040 :             c_tmp%local_data = 0.0_dp
     723        2760 :             IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
     724        2760 :                b_loc => c_tmp%local_data
     725             :                CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
     726             :                           SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
     727        2760 :                           b_loc(1, 1), SIZE(c_tmp%local_data, 1))
     728             :             END IF
     729        2760 :             CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
     730             : 
     731             :             ! Build the block tridiagonal matrix
     732      201480 :             t_mat = 0.0_dp
     733        5520 :             DO i = 1, nmob
     734        8280 :                t_mat(1:nmob, i) = a_block(1:nmob, i)
     735             :             END DO
     736             : 
     737       22080 :             DO ik = 2, krylov_space%nkrylov
     738             :                ! Call lapack for QR factorization
     739       19320 :                CALL cp_fm_set_all(b_mat, rzero)
     740       19320 :                CALL cp_fm_to_fm(c_tmp, v_mat(ik), nmob, 1, 1)
     741       19320 :                CALL cp_fm_qr_factorization(c_tmp, b_mat, nao, nmob, 1, 1)
     742             : 
     743             :                CALL cp_fm_triangular_multiply(b_mat, v_mat(ik), side="R", invert_tr=.TRUE., &
     744       19320 :                                               n_rows=nao, n_cols=nmob)
     745             : 
     746       19320 :                CALL cp_fm_get_submatrix(b_mat, b_block, 1, 1, nmob, nmob)
     747             : 
     748             :                !Compute A =(V^t)HV
     749       19320 :                CALL parallel_gemm('N', 'N', nao, nmob, nao, rone, ks, v_mat(ik), rzero, hc)
     750             : 
     751       57960 :                a_block = 0.0_dp
     752       19320 :                IF (SIZE(hc%local_data, 2) == nmob) THEN
     753       19320 :                   a_loc => v_mat(ik)%local_data
     754       19320 :                   b_loc => hc%local_data
     755             :                   CALL dgemm('T', 'N', nmob, nmob, SIZE(hc%local_data, 1), 1.0_dp, a_loc(1, 1), &
     756       19320 :                              SIZE(hc%local_data, 1), b_loc(1, 1), SIZE(hc%local_data, 1), 0.0_dp, a_block(1, 1), nmob)
     757             :                END IF
     758       19320 :                CALL hc%matrix_struct%para_env%sum(a_block)
     759             : 
     760             :                !Compute the residual matrix R for next
     761             :                !factorisation
     762     1043280 :                c_tmp%local_data = 0.0_dp
     763       19320 :                IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
     764       19320 :                   a_loc => v_mat(ik)%local_data
     765       19320 :                   b_loc => c_tmp%local_data
     766             :                   CALL dgemm('N', 'N', SIZE(c_tmp%local_data, 1), nmob, nmob, 1.0_dp, a_loc(1, 1), &
     767             :                              SIZE(c_tmp%local_data, 1), a_block(1, 1), nmob, 0.0_dp, &
     768       19320 :                              b_loc(1, 1), SIZE(c_tmp%local_data, 1))
     769             :                END IF
     770       19320 :                CALL cp_fm_scale_and_add(rmone, c_tmp, rone, hc)
     771             : 
     772       19320 :                IF (SIZE(c_tmp%local_data, 2) == nmob) THEN
     773       19320 :                   a_loc => v_mat(ik - 1)%local_data
     774       38640 :                   DO j = 1, nmob
     775       57960 :                      DO i = 1, j
     776     1043280 :                         DO ia = 1, SIZE(c_tmp%local_data, 1)
     777     1023960 :                            b_loc(ia, i) = b_loc(ia, i) - a_loc(ia, j)*b_block(i, j)
     778             :                         END DO
     779             :                      END DO
     780             :                   END DO
     781             :                END IF
     782             : 
     783             :                ! Build the block tridiagonal matrix
     784       19320 :                it = (ik - 2)*nmob
     785       19320 :                jt = (ik - 1)*nmob
     786       41400 :                DO j = 1, nmob
     787       38640 :                   t_mat(jt + 1:jt + nmob, jt + j) = a_block(1:nmob, j)
     788       57960 :                   DO i = 1, nmob
     789       19320 :                      t_mat(it + i, jt + j) = b_block(j, i)
     790       38640 :                      t_mat(jt + j, it + i) = b_block(j, i)
     791             :                   END DO
     792             :                END DO
     793             :             END DO ! ik
     794        2760 :             CALL timestop(hand2)
     795             : 
     796        2760 :             CALL timeset(routineN//"_diag_tri", hand3)
     797        2760 :             lwork = 1 + 6*ndim + 2*ndim**2 + 5000
     798        2760 :             liwork = 5*ndim + 3
     799        8280 :             ALLOCATE (work(lwork))
     800        8280 :             ALLOCATE (iwork(liwork))
     801             : 
     802             :             ! Diagonalize the block-tridiagonal matrix
     803             :             CALL dsyevd('V', 'U', ndim, t_mat(1, 1), ndim, t_eval(1), &
     804        2760 :                         work(1), lwork, iwork(1), liwork, info)
     805        2760 :             DEALLOCATE (work)
     806        2760 :             DEALLOCATE (iwork)
     807        2760 :             CALL timestop(hand3)
     808             : 
     809        2760 :             CALL timeset(routineN//"_build_cnew", hand4)
     810             : !        !Compute the refined vectors
     811             : 
     812     1173000 :             c2_tmp%local_data = 0.0_dp
     813       11040 :             ALLOCATE (q_mat(nmob, ndim))
     814       46920 :             q_mat = 0.0_dp
     815        2760 :             b_loc => c2_tmp%local_data
     816       24840 :             DO ik = 1, krylov_space%nkrylov
     817       22080 :                CALL cp_fm_to_fm(v_mat(ik), v_tmp, nmob, 1, 1)
     818       24840 :                IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
     819             : !            a_loc => v_mat(ik)%local_data
     820       22080 :                   a_loc => v_tmp%local_data
     821       22080 :                   it = (ik - 1)*nmob
     822             :                   CALL dgemm('N', 'N', SIZE(c2_tmp%local_data, 1), ndim, nmob, 1.0_dp, a_loc(1, 1), &
     823             :                              SIZE(c2_tmp%local_data, 1), t_mat(it + 1, 1), ndim, 1.0_dp, &
     824       22080 :                              b_loc(1, 1), SIZE(c2_tmp%local_data, 1))
     825             :                END IF
     826             :             END DO !ik
     827             : 
     828             :             !Try to avoid linear dependencies
     829        2760 :             CALL cp_fm_to_fm(v_mat(1), v_tmp, nmob, 1, 1)
     830        2760 :             IF (SIZE(c2_tmp%local_data, 2) == ndim) THEN
     831             : !          a_loc => v_mat(1)%local_data
     832        2760 :                a_loc => v_tmp%local_data
     833        2760 :                b_loc => c2_tmp%local_data
     834             :                CALL dgemm('T', 'N', nmob, ndim, SIZE(v_tmp%local_data, 1), 1.0_dp, a_loc(1, 1), &
     835             :                           SIZE(v_tmp%local_data, 1), b_loc(1, 1), SIZE(v_tmp%local_data, 1), &
     836        2760 :                           0.0_dp, q_mat(1, 1), nmob)
     837             :             END IF
     838        2760 :             CALL hc%matrix_struct%para_env%sum(q_mat)
     839             : 
     840        8280 :             ALLOCATE (itaken(ndim))
     841       24840 :             itaken = 0
     842        5520 :             DO it = 1, nmob
     843             :                vmax = 0.0_dp
     844             :                !select index ik
     845       24840 :                DO jt = 1, ndim
     846       24840 :                   IF (itaken(jt) == 0 .AND. ABS(q_mat(it, jt)) > vmax) THEN
     847        3030 :                      vmax = ABS(q_mat(it, jt))
     848        3030 :                      ik = jt
     849             :                   END IF
     850             :                END DO
     851        2760 :                itaken(ik) = 1
     852             : 
     853        5520 :                CALL cp_fm_to_fm(c2_tmp, v_mat(1), 1, ik, it)
     854             :             END DO
     855        2760 :             DEALLOCATE (itaken)
     856        2760 :             DEALLOCATE (q_mat)
     857             : 
     858             :             !Copy in the converged set to enlarge the converged subspace
     859        2760 :             CALL cp_fm_to_fm(v_mat(1), c0, nmob, 1, (nmo_converged + imo_low))
     860        2760 :             CALL timestop(hand4)
     861             : 
     862        2760 :             CALL cp_fm_release(c2_tmp)
     863        2760 :             CALL cp_fm_release(c_tmp)
     864        2760 :             CALL cp_fm_release(hc)
     865        2760 :             CALL cp_fm_release(v_tmp)
     866        2760 :             CALL cp_fm_release(b_mat)
     867             : 
     868        2760 :             DEALLOCATE (t_mat)
     869        2760 :             DEALLOCATE (a_block)
     870        2760 :             DEALLOCATE (b_block)
     871             : 
     872       14382 :             CALL cp_fm_release(v_mat)
     873             : 
     874             :          END DO ! ib
     875             : 
     876         582 :          CALL timeset(routineN//"_ortho", hand5)
     877         582 :          CALL parallel_gemm('T', 'N', nmo, nmo, nao, rone, c0, c0, rzero, chc)
     878             : 
     879         582 :          CALL cp_fm_cholesky_decompose(chc, nmo)
     880         582 :          CALL cp_fm_triangular_multiply(chc, c0, 'R', invert_tr=.TRUE.)
     881         582 :          CALL timestop(hand5)
     882             :       ELSE
     883             :          ! Do nothing
     884             :       END IF
     885             : 
     886         592 :       CALL timestop(handle)
     887        1184 :    END SUBROUTINE lanczos_refinement_2v
     888             : 
     889             : END MODULE qs_scf_lanczos

Generated by: LCOV version 1.15