LCOV - CP2K Regtests (git:c9ed8e0) - src/dm_ls_scf

LCOV - code coverage report

Current view:	top level - src - dm_ls_scf_methods.F (source / functions)		Hit	Total	Coverage
Test:	CP2K Regtests (git:c9ed8e0)	Lines:	502	540	93.0 %
Date:	2025-06-28 08:00:56	Functions:	11	11	100.0 %

          Line data    Source code

       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief lower level routines for linear scaling SCF
      10             : !> \par History
      11             : !>       2010.10 created [Joost VandeVondele]
      12             : !> \author Joost VandeVondele
      13             : ! **************************************************************************************************
      14             : MODULE dm_ls_scf_methods
      15             :    USE arnoldi_api,                     ONLY: arnoldi_extremal
      16             :    USE cp_dbcsr_api,                    ONLY: &
      17             :         dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, dbcsr_filter, dbcsr_finalize, &
      18             :         dbcsr_get_occupation, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
      19             :         dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
      20             :         dbcsr_put_block, dbcsr_release, dbcsr_scale, dbcsr_set, dbcsr_type, dbcsr_type_no_symmetry
      21             :    USE cp_dbcsr_contrib,                ONLY: dbcsr_add_on_diag,&
      22             :                                               dbcsr_dot,&
      23             :                                               dbcsr_frobenius_norm,&
      24             :                                               dbcsr_trace
      25             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      26             :                                               cp_logger_get_default_unit_nr,&
      27             :                                               cp_logger_type
      28             :    USE dm_ls_scf_qs,                    ONLY: matrix_qs_to_ls
      29             :    USE dm_ls_scf_types,                 ONLY: ls_cluster_atomic,&
      30             :                                               ls_mstruct_type,&
      31             :                                               ls_scf_env_type
      32             :    USE input_constants,                 ONLY: &
      33             :         ls_cluster_atomic, ls_s_preconditioner_atomic, ls_s_preconditioner_molecular, &
      34             :         ls_s_preconditioner_none, ls_s_sqrt_ns, ls_s_sqrt_proot, ls_scf_sign_ns, &
      35             :         ls_scf_sign_proot, ls_scf_sign_submatrix, ls_scf_submatrix_sign_direct_muadj, &
      36             :         ls_scf_submatrix_sign_direct_muadj_lowmem, ls_scf_submatrix_sign_ns
      37             :    USE iterate_matrix,                  ONLY: invert_Hotelling,&
      38             :                                               matrix_sign_Newton_Schulz,&
      39             :                                               matrix_sign_proot,&
      40             :                                               matrix_sign_submatrix,&
      41             :                                               matrix_sign_submatrix_mu_adjust,&
      42             :                                               matrix_sqrt_Newton_Schulz,&
      43             :                                               matrix_sqrt_proot
      44             :    USE kinds,                           ONLY: dp,&
      45             :                                               int_8
      46             :    USE machine,                         ONLY: m_flush,&
      47             :                                               m_walltime
      48             :    USE mathlib,                         ONLY: abnormal_value
      49             : #include "./base/base_uses.f90"
      50             : 
      51             :    IMPLICIT NONE
      52             : 
      53             :    PRIVATE
      54             : 
      55             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dm_ls_scf_methods'
      56             : 
      57             :    PUBLIC :: ls_scf_init_matrix_S
      58             :    PUBLIC :: density_matrix_sign, density_matrix_sign_fixed_mu
      59             :    PUBLIC :: apply_matrix_preconditioner, compute_matrix_preconditioner
      60             :    PUBLIC :: density_matrix_trs4, density_matrix_tc2, compute_homo_lumo
      61             : 
      62             : CONTAINS
      63             : 
      64             : ! **************************************************************************************************
      65             : !> \brief initialize S matrix related properties (sqrt, inverse...)
      66             : !>        Might be factored-out since this seems common code with the other SCF.
      67             : !> \param matrix_s ...
      68             : !> \param ls_scf_env ...
      69             : !> \par History
      70             : !>       2010.10 created [Joost VandeVondele]
      71             : !> \author Joost VandeVondele
      72             : ! **************************************************************************************************
      73       12714 :    SUBROUTINE ls_scf_init_matrix_S(matrix_s, ls_scf_env)
      74             :       TYPE(dbcsr_type)                                   :: matrix_s
      75             :       TYPE(ls_scf_env_type)                              :: ls_scf_env
      76             : 
      77             :       CHARACTER(len=*), PARAMETER :: routineN = 'ls_scf_init_matrix_S'
      78             : 
      79             :       INTEGER                                            :: handle, unit_nr
      80             :       REAL(KIND=dp)                                      :: frob_matrix, frob_matrix_base
      81             :       TYPE(cp_logger_type), POINTER                      :: logger
      82             :       TYPE(dbcsr_type)                                   :: matrix_tmp1, matrix_tmp2
      83             : 
      84       12714 :       CALL timeset(routineN, handle)
      85             : 
      86             :       ! get a useful output_unit
      87       12714 :       logger => cp_get_default_logger()
      88       12714 :       IF (logger%para_env%is_source()) THEN
      89        6357 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
      90             :       ELSE
      91             :          unit_nr = -1
      92             :       END IF
      93             : 
      94             :       ! make our own copy of S
      95       12714 :       IF (ls_scf_env%has_unit_metric) THEN
      96          14 :          CALL dbcsr_set(ls_scf_env%matrix_s, 0.0_dp)
      97          14 :          CALL dbcsr_add_on_diag(ls_scf_env%matrix_s, 1.0_dp)
      98             :       ELSE
      99       12700 :          CALL matrix_qs_to_ls(ls_scf_env%matrix_s, matrix_s, ls_scf_env%ls_mstruct, covariant=.TRUE.)
     100             :       END IF
     101             : 
     102       12714 :       CALL dbcsr_filter(ls_scf_env%matrix_s, ls_scf_env%eps_filter)
     103             : 
     104             :       ! needs a preconditioner for S
     105       12714 :       IF (ls_scf_env%has_s_preconditioner) THEN
     106             :          CALL dbcsr_create(ls_scf_env%matrix_bs_sqrt, template=ls_scf_env%matrix_s, &
     107         356 :                            matrix_type=dbcsr_type_no_symmetry)
     108             :          CALL dbcsr_create(ls_scf_env%matrix_bs_sqrt_inv, template=ls_scf_env%matrix_s, &
     109         356 :                            matrix_type=dbcsr_type_no_symmetry)
     110             :          CALL compute_matrix_preconditioner(ls_scf_env%matrix_s, &
     111             :                                             ls_scf_env%s_preconditioner_type, ls_scf_env%ls_mstruct, &
     112             :                                             ls_scf_env%matrix_bs_sqrt, ls_scf_env%matrix_bs_sqrt_inv, &
     113             :                                             ls_scf_env%eps_filter, ls_scf_env%s_sqrt_order, &
     114         356 :                                             ls_scf_env%eps_lanczos, ls_scf_env%max_iter_lanczos)
     115             :       END IF
     116             : 
     117             :       ! precondition S
     118       12714 :       IF (ls_scf_env%has_s_preconditioner) THEN
     119             :          CALL apply_matrix_preconditioner(ls_scf_env%matrix_s, "forward", &
     120         356 :                                           ls_scf_env%matrix_bs_sqrt, ls_scf_env%matrix_bs_sqrt_inv)
     121             :       END IF
     122             : 
     123             :       ! compute sqrt(S) and inv(sqrt(S))
     124       12714 :       IF (ls_scf_env%use_s_sqrt) THEN
     125             : 
     126             :          CALL dbcsr_create(ls_scf_env%matrix_s_sqrt, template=ls_scf_env%matrix_s, &
     127       12710 :                            matrix_type=dbcsr_type_no_symmetry)
     128             :          CALL dbcsr_create(ls_scf_env%matrix_s_sqrt_inv, template=ls_scf_env%matrix_s, &
     129       12710 :                            matrix_type=dbcsr_type_no_symmetry)
     130             : 
     131       12718 :          SELECT CASE (ls_scf_env%s_sqrt_method)
     132             :          CASE (ls_s_sqrt_proot)
     133             :             CALL matrix_sqrt_proot(ls_scf_env%matrix_s_sqrt, ls_scf_env%matrix_s_sqrt_inv, &
     134             :                                    ls_scf_env%matrix_s, ls_scf_env%eps_filter, &
     135             :                                    ls_scf_env%s_sqrt_order, &
     136             :                                    ls_scf_env%eps_lanczos, ls_scf_env%max_iter_lanczos, &
     137           8 :                                    symmetrize=.TRUE.)
     138             :          CASE (ls_s_sqrt_ns)
     139             :             CALL matrix_sqrt_Newton_Schulz(ls_scf_env%matrix_s_sqrt, ls_scf_env%matrix_s_sqrt_inv, &
     140             :                                            ls_scf_env%matrix_s, ls_scf_env%eps_filter, &
     141             :                                            ls_scf_env%s_sqrt_order, &
     142             :                                            ls_scf_env%eps_lanczos, ls_scf_env%max_iter_lanczos, &
     143       12702 :                                            iounit=-1)
     144             :          CASE DEFAULT
     145       12710 :             CPABORT("Unknown sqrt method.")
     146             :          END SELECT
     147             : 
     148       12710 :          IF (ls_scf_env%check_s_inv) THEN
     149             :             CALL dbcsr_create(matrix_tmp1, template=ls_scf_env%matrix_s, &
     150           0 :                               matrix_type=dbcsr_type_no_symmetry)
     151             :             CALL dbcsr_create(matrix_tmp2, template=ls_scf_env%matrix_s, &
     152           0 :                               matrix_type=dbcsr_type_no_symmetry)
     153             : 
     154             :             CALL dbcsr_multiply("N", "N", 1.0_dp, ls_scf_env%matrix_s_sqrt_inv, ls_scf_env%matrix_s, &
     155           0 :                                 0.0_dp, matrix_tmp1, filter_eps=ls_scf_env%eps_filter)
     156             : 
     157             :             CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, ls_scf_env%matrix_s_sqrt_inv, &
     158           0 :                                 0.0_dp, matrix_tmp2, filter_eps=ls_scf_env%eps_filter)
     159             : 
     160           0 :             frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
     161           0 :             CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
     162           0 :             frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
     163           0 :             IF (unit_nr > 0) THEN
     164           0 :                WRITE (unit_nr, *) "Error for (inv(sqrt(S))*S*inv(sqrt(S))-I)", frob_matrix/frob_matrix_base
     165             :             END IF
     166             : 
     167           0 :             CALL dbcsr_release(matrix_tmp1)
     168           0 :             CALL dbcsr_release(matrix_tmp2)
     169             :          END IF
     170             :       END IF
     171             : 
     172             :       ! compute the inverse of S
     173       12714 :       IF (ls_scf_env%needs_s_inv) THEN
     174             :          CALL dbcsr_create(ls_scf_env%matrix_s_inv, template=ls_scf_env%matrix_s, &
     175       12684 :                            matrix_type=dbcsr_type_no_symmetry)
     176       12684 :          IF (.NOT. ls_scf_env%use_s_sqrt) THEN
     177           2 :             CALL invert_Hotelling(ls_scf_env%matrix_s_inv, ls_scf_env%matrix_s, ls_scf_env%eps_filter)
     178             :          ELSE
     179             :             CALL dbcsr_multiply("N", "N", 1.0_dp, ls_scf_env%matrix_s_sqrt_inv, ls_scf_env%matrix_s_sqrt_inv, &
     180       12682 :                                 0.0_dp, ls_scf_env%matrix_s_inv, filter_eps=ls_scf_env%eps_filter)
     181             :          END IF
     182       12684 :          IF (ls_scf_env%check_s_inv) THEN
     183             :             CALL dbcsr_create(matrix_tmp1, template=ls_scf_env%matrix_s, &
     184           0 :                               matrix_type=dbcsr_type_no_symmetry)
     185             :             CALL dbcsr_multiply("N", "N", 1.0_dp, ls_scf_env%matrix_s_inv, ls_scf_env%matrix_s, &
     186           0 :                                 0.0_dp, matrix_tmp1, filter_eps=ls_scf_env%eps_filter)
     187           0 :             frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
     188           0 :             CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
     189           0 :             frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
     190           0 :             IF (unit_nr > 0) THEN
     191           0 :                WRITE (unit_nr, *) "Error for (inv(S)*S-I)", frob_matrix/frob_matrix_base
     192             :             END IF
     193           0 :             CALL dbcsr_release(matrix_tmp1)
     194             :          END IF
     195             :       END IF
     196             : 
     197       12714 :       CALL timestop(handle)
     198       12714 :    END SUBROUTINE ls_scf_init_matrix_s
     199             : 
     200             : ! **************************************************************************************************
     201             : !> \brief compute for a block positive definite matrix s (bs)
     202             : !>        the sqrt(bs) and inv(sqrt(bs))
     203             : !> \param matrix_s ...
     204             : !> \param preconditioner_type ...
     205             : !> \param ls_mstruct ...
     206             : !> \param matrix_bs_sqrt ...
     207             : !> \param matrix_bs_sqrt_inv ...
     208             : !> \param threshold ...
     209             : !> \param order ...
     210             : !> \param eps_lanczos ...
     211             : !> \param max_iter_lanczos ...
     212             : !> \par History
     213             : !>       2010.10 created [Joost VandeVondele]
     214             : !> \author Joost VandeVondele
     215             : ! **************************************************************************************************
     216         356 :    SUBROUTINE compute_matrix_preconditioner(matrix_s, preconditioner_type, ls_mstruct, &
     217             :                                             matrix_bs_sqrt, matrix_bs_sqrt_inv, threshold, order, &
     218             :                                             eps_lanczos, max_iter_lanczos)
     219             : 
     220             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_s
     221             :       INTEGER                                            :: preconditioner_type
     222             :       TYPE(ls_mstruct_type)                              :: ls_mstruct
     223             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_bs_sqrt, matrix_bs_sqrt_inv
     224             :       REAL(KIND=dp)                                      :: threshold
     225             :       INTEGER                                            :: order
     226             :       REAL(KIND=dp)                                      :: eps_lanczos
     227             :       INTEGER                                            :: max_iter_lanczos
     228             : 
     229             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'compute_matrix_preconditioner'
     230             : 
     231             :       INTEGER                                            :: handle, iblock_col, iblock_row
     232             :       LOGICAL                                            :: block_needed
     233         356 :       REAL(dp), DIMENSION(:, :), POINTER                 :: block_dp
     234             :       TYPE(dbcsr_iterator_type)                          :: iter
     235             :       TYPE(dbcsr_type)                                   :: matrix_bs
     236             : 
     237         356 :       CALL timeset(routineN, handle)
     238             : 
     239             :       ! first generate a block diagonal copy of s
     240         356 :       CALL dbcsr_create(matrix_bs, template=matrix_s)
     241             : 
     242         712 :       SELECT CASE (preconditioner_type)
     243             :       CASE (ls_s_preconditioner_none)
     244             :       CASE (ls_s_preconditioner_atomic, ls_s_preconditioner_molecular)
     245         356 :          CALL dbcsr_iterator_start(iter, matrix_s)
     246       28007 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     247       27651 :             CALL dbcsr_iterator_next_block(iter, iblock_row, iblock_col, block_dp)
     248             : 
     249             :             ! do we need the block ?
     250             :             ! this depends on the preconditioner, but also the matrix clustering method employed
     251             :             ! for a clustered matrix, right now, we assume that atomic and molecular preconditioners
     252             :             ! are actually the same, and only require that the diagonal blocks (clustered) are present
     253             : 
     254       27651 :             block_needed = .FALSE.
     255             : 
     256       27651 :             IF (iblock_row == iblock_col) THEN
     257             :                block_needed = .TRUE.
     258             :             ELSE
     259       24745 :                IF (preconditioner_type == ls_s_preconditioner_molecular .AND. &
     260             :                    ls_mstruct%cluster_type == ls_cluster_atomic) THEN
     261        5098 :                   IF (ls_mstruct%atom_to_molecule(iblock_row) == ls_mstruct%atom_to_molecule(iblock_col)) block_needed = .TRUE.
     262             :                END IF
     263             :             END IF
     264             : 
     265             :             ! add it
     266         356 :             IF (block_needed) THEN
     267        4094 :                CALL dbcsr_put_block(matrix=matrix_bs, row=iblock_row, col=iblock_col, block=block_dp)
     268             :             END IF
     269             : 
     270             :          END DO
     271         712 :          CALL dbcsr_iterator_stop(iter)
     272             :       END SELECT
     273             : 
     274         356 :       CALL dbcsr_finalize(matrix_bs)
     275             : 
     276         356 :       SELECT CASE (preconditioner_type)
     277             :       CASE (ls_s_preconditioner_none)
     278             :          ! for now make it a simple identity matrix
     279           0 :          CALL dbcsr_copy(matrix_bs_sqrt, matrix_bs)
     280           0 :          CALL dbcsr_set(matrix_bs_sqrt, 0.0_dp)
     281           0 :          CALL dbcsr_add_on_diag(matrix_bs_sqrt, 1.0_dp)
     282             : 
     283             :          ! for now make it a simple identity matrix
     284           0 :          CALL dbcsr_copy(matrix_bs_sqrt_inv, matrix_bs)
     285           0 :          CALL dbcsr_set(matrix_bs_sqrt_inv, 0.0_dp)
     286           0 :          CALL dbcsr_add_on_diag(matrix_bs_sqrt_inv, 1.0_dp)
     287             :       CASE (ls_s_preconditioner_atomic, ls_s_preconditioner_molecular)
     288         356 :          CALL dbcsr_copy(matrix_bs_sqrt, matrix_bs)
     289         356 :          CALL dbcsr_copy(matrix_bs_sqrt_inv, matrix_bs)
     290             :          ! XXXXXXXXXXX
     291             :          ! XXXXXXXXXXX the threshold here could be done differently,
     292             :          ! XXXXXXXXXXX using eps_filter is reducing accuracy for no good reason, this is cheap
     293             :          ! XXXXXXXXXXX
     294             :          CALL matrix_sqrt_Newton_Schulz(matrix_bs_sqrt, matrix_bs_sqrt_inv, matrix_bs, &
     295             :                                         threshold=MIN(threshold, 1.0E-10_dp), order=order, &
     296             :                                         eps_lanczos=eps_lanczos, max_iter_lanczos=max_iter_lanczos, &
     297         712 :                                         iounit=-1)
     298             :       END SELECT
     299             : 
     300         356 :       CALL dbcsr_release(matrix_bs)
     301             : 
     302         356 :       CALL timestop(handle)
     303             : 
     304         356 :    END SUBROUTINE compute_matrix_preconditioner
     305             : 
     306             : ! **************************************************************************************************
     307             : !> \brief apply a preconditioner either
     308             : !>        forward (precondition)            inv(sqrt(bs)) * A * inv(sqrt(bs))
     309             : !>        backward (restore to old form)        sqrt(bs)  * A * sqrt(bs)
     310             : !> \param matrix ...
     311             : !> \param direction ...
     312             : !> \param matrix_bs_sqrt ...
     313             : !> \param matrix_bs_sqrt_inv ...
     314             : !> \par History
     315             : !>       2010.10 created [Joost VandeVondele]
     316             : !> \author Joost VandeVondele
     317             : ! **************************************************************************************************
     318        3614 :    SUBROUTINE apply_matrix_preconditioner(matrix, direction, matrix_bs_sqrt, matrix_bs_sqrt_inv)
     319             : 
     320             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
     321             :       CHARACTER(LEN=*)                                   :: direction
     322             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_bs_sqrt, matrix_bs_sqrt_inv
     323             : 
     324             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'apply_matrix_preconditioner'
     325             : 
     326             :       INTEGER                                            :: handle
     327             :       TYPE(dbcsr_type)                                   :: matrix_tmp
     328             : 
     329        3614 :       CALL timeset(routineN, handle)
     330        3614 :       CALL dbcsr_create(matrix_tmp, template=matrix, matrix_type=dbcsr_type_no_symmetry)
     331             : 
     332        3146 :       SELECT CASE (direction)
     333             :       CASE ("forward")
     334             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix, matrix_bs_sqrt_inv, &
     335        3146 :                              0.0_dp, matrix_tmp)
     336             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_bs_sqrt_inv, matrix_tmp, &
     337        3146 :                              0.0_dp, matrix)
     338             :       CASE ("backward")
     339             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix, matrix_bs_sqrt, &
     340         468 :                              0.0_dp, matrix_tmp)
     341             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_bs_sqrt, matrix_tmp, &
     342         468 :                              0.0_dp, matrix)
     343             :       CASE DEFAULT
     344        3614 :          CPABORT("")
     345             :       END SELECT
     346             : 
     347        3614 :       CALL dbcsr_release(matrix_tmp)
     348             : 
     349        3614 :       CALL timestop(handle)
     350             : 
     351        3614 :    END SUBROUTINE apply_matrix_preconditioner
     352             : 
     353             : ! **************************************************************************************************
     354             : !> \brief compute the density matrix with a trace that is close to nelectron.
     355             : !>        take a mu as input, and improve by bisection as needed.
     356             : !> \param matrix_p ...
     357             : !> \param mu ...
     358             : !> \param fixed_mu ...
     359             : !> \param sign_method ...
     360             : !> \param sign_order ...
     361             : !> \param matrix_ks ...
     362             : !> \param matrix_s ...
     363             : !> \param matrix_s_inv ...
     364             : !> \param nelectron ...
     365             : !> \param threshold ...
     366             : !> \param sign_symmetric ...
     367             : !> \param submatrix_sign_method ...
     368             : !> \param matrix_s_sqrt_inv ...
     369             : !> \par History
     370             : !>       2010.10 created [Joost VandeVondele]
     371             : !>       2020.07 support for methods with internal mu adjustment [Michael Lass]
     372             : !> \author Joost VandeVondele
     373             : ! **************************************************************************************************
     374         910 :    SUBROUTINE density_matrix_sign(matrix_p, mu, fixed_mu, sign_method, sign_order, matrix_ks, &
     375             :                                   matrix_s, matrix_s_inv, nelectron, threshold, sign_symmetric, submatrix_sign_method, &
     376             :                                   matrix_s_sqrt_inv)
     377             : 
     378             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_p
     379             :       REAL(KIND=dp), INTENT(INOUT)                       :: mu
     380             :       LOGICAL                                            :: fixed_mu
     381             :       INTEGER                                            :: sign_method, sign_order
     382             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_ks, matrix_s, matrix_s_inv
     383             :       INTEGER, INTENT(IN)                                :: nelectron
     384             :       REAL(KIND=dp), INTENT(IN)                          :: threshold
     385             :       LOGICAL, OPTIONAL                                  :: sign_symmetric
     386             :       INTEGER, OPTIONAL                                  :: submatrix_sign_method
     387             :       TYPE(dbcsr_type), INTENT(IN), OPTIONAL             :: matrix_s_sqrt_inv
     388             : 
     389             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'density_matrix_sign'
     390             :       REAL(KIND=dp), PARAMETER                           :: initial_increment = 0.01_dp
     391             : 
     392             :       INTEGER                                            :: handle, iter, unit_nr, &
     393             :                                                             used_submatrix_sign_method
     394             :       LOGICAL                                            :: do_sign_symmetric, has_mu_high, &
     395             :                                                             has_mu_low, internal_mu_adjust
     396             :       REAL(KIND=dp)                                      :: increment, mu_high, mu_low, trace
     397             :       TYPE(cp_logger_type), POINTER                      :: logger
     398             : 
     399         910 :       CALL timeset(routineN, handle)
     400             : 
     401         910 :       logger => cp_get_default_logger()
     402         910 :       IF (logger%para_env%is_source()) THEN
     403         455 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     404             :       ELSE
     405             :          unit_nr = -1
     406             :       END IF
     407             : 
     408         910 :       do_sign_symmetric = .FALSE.
     409         910 :       IF (PRESENT(sign_symmetric)) do_sign_symmetric = sign_symmetric
     410             : 
     411         910 :       used_submatrix_sign_method = ls_scf_submatrix_sign_ns
     412         910 :       IF (PRESENT(submatrix_sign_method)) used_submatrix_sign_method = submatrix_sign_method
     413             : 
     414             :       internal_mu_adjust = ((sign_method .EQ. ls_scf_sign_submatrix) .AND. &
     415             :                             (used_submatrix_sign_method .EQ. ls_scf_submatrix_sign_direct_muadj .OR. &
     416         910 :                              used_submatrix_sign_method .EQ. ls_scf_submatrix_sign_direct_muadj_lowmem))
     417             : 
     418           4 :       IF (internal_mu_adjust) THEN
     419             :          CALL density_matrix_sign_internal_mu(matrix_p, trace, mu, sign_method, &
     420             :                                               matrix_ks, matrix_s, threshold, &
     421             :                                               used_submatrix_sign_method, &
     422           4 :                                               nelectron, matrix_s_sqrt_inv)
     423             :       ELSE
     424         906 :          increment = initial_increment
     425             : 
     426         906 :          has_mu_low = .FALSE.
     427         906 :          has_mu_high = .FALSE.
     428             : 
     429             :          ! bisect if both bounds are known, otherwise find the bounds with a linear search
     430        1050 :          DO iter = 1, 30
     431        1050 :             IF (has_mu_low .AND. has_mu_high) THEN
     432          16 :                mu = (mu_low + mu_high)/2
     433          16 :                IF (ABS(mu_high - mu_low) < threshold) EXIT
     434             :             END IF
     435             : 
     436             :             CALL density_matrix_sign_fixed_mu(matrix_p, trace, mu, sign_method, sign_order, &
     437             :                                               matrix_ks, matrix_s, matrix_s_inv, threshold, &
     438             :                                               do_sign_symmetric, used_submatrix_sign_method, &
     439        1050 :                                               matrix_s_sqrt_inv)
     440        1050 :             IF (unit_nr > 0) WRITE (unit_nr, '(T2,A,I2,1X,F13.9,1X,F15.9)') &
     441         525 :                "Density matrix:  iter, mu, trace error: ", iter, mu, trace - nelectron
     442             : 
     443             :             ! OK, we can skip early if we are as close as possible to the exact result
     444             :             ! smaller differences should be considered 'noise'
     445        1050 :             IF (ABS(trace - nelectron) < 0.5_dp .OR. fixed_mu) EXIT
     446             : 
     447        2100 :             IF (trace < nelectron) THEN
     448          32 :                mu_low = mu
     449          32 :                mu = mu + increment
     450          32 :                has_mu_low = .TRUE.
     451          32 :                increment = increment*2
     452             :             ELSE
     453         112 :                mu_high = mu
     454         112 :                mu = mu - increment
     455         112 :                has_mu_high = .TRUE.
     456         112 :                increment = increment*2
     457             :             END IF
     458             :          END DO
     459             : 
     460             :       END IF
     461             : 
     462         910 :       CALL timestop(handle)
     463             : 
     464         910 :    END SUBROUTINE density_matrix_sign
     465             : 
     466             : ! **************************************************************************************************
     467             : !> \brief for a fixed mu, compute the corresponding density matrix and its trace
     468             : !> \param matrix_p ...
     469             : !> \param trace ...
     470             : !> \param mu ...
     471             : !> \param sign_method ...
     472             : !> \param sign_order ...
     473             : !> \param matrix_ks ...
     474             : !> \param matrix_s ...
     475             : !> \param matrix_s_inv ...
     476             : !> \param threshold ...
     477             : !> \param sign_symmetric ...
     478             : !> \param submatrix_sign_method ...
     479             : !> \param matrix_s_sqrt_inv ...
     480             : !> \par History
     481             : !>       2010.10 created [Joost VandeVondele]
     482             : !> \author Joost VandeVondele
     483             : ! **************************************************************************************************
     484        2144 :    SUBROUTINE density_matrix_sign_fixed_mu(matrix_p, trace, mu, sign_method, sign_order, matrix_ks, &
     485             :                                            matrix_s, matrix_s_inv, threshold, sign_symmetric, submatrix_sign_method, &
     486             :                                            matrix_s_sqrt_inv)
     487             : 
     488             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_p
     489             :       REAL(KIND=dp), INTENT(OUT)                         :: trace
     490             :       REAL(KIND=dp), INTENT(INOUT)                       :: mu
     491             :       INTEGER                                            :: sign_method, sign_order
     492             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_ks, matrix_s, matrix_s_inv
     493             :       REAL(KIND=dp), INTENT(IN)                          :: threshold
     494             :       LOGICAL                                            :: sign_symmetric
     495             :       INTEGER                                            :: submatrix_sign_method
     496             :       TYPE(dbcsr_type), INTENT(IN), OPTIONAL             :: matrix_s_sqrt_inv
     497             : 
     498             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'density_matrix_sign_fixed_mu'
     499             : 
     500             :       INTEGER                                            :: handle, unit_nr
     501             :       REAL(KIND=dp)                                      :: frob_matrix
     502             :       TYPE(cp_logger_type), POINTER                      :: logger
     503             :       TYPE(dbcsr_type) :: matrix_p_ud, matrix_sign, matrix_sinv_ks, matrix_ssqrtinv_ks_ssqrtinv, &
     504             :          matrix_ssqrtinv_ks_ssqrtinv2, matrix_tmp
     505             : 
     506        1072 :       CALL timeset(routineN, handle)
     507             : 
     508        1072 :       logger => cp_get_default_logger()
     509        1072 :       IF (logger%para_env%is_source()) THEN
     510         536 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     511             :       ELSE
     512             :          unit_nr = -1
     513             :       END IF
     514             : 
     515        1072 :       CALL dbcsr_create(matrix_sign, template=matrix_s, matrix_type=dbcsr_type_no_symmetry)
     516             : 
     517        1072 :       IF (sign_symmetric) THEN
     518             : 
     519           4 :          IF (.NOT. PRESENT(matrix_s_sqrt_inv)) &
     520           0 :             CPABORT("Argument matrix_s_sqrt_inv required if sign_symmetric is set")
     521             : 
     522           4 :          CALL dbcsr_create(matrix_ssqrtinv_ks_ssqrtinv, template=matrix_s, matrix_type=dbcsr_type_no_symmetry)
     523           4 :          CALL dbcsr_create(matrix_ssqrtinv_ks_ssqrtinv2, template=matrix_s, matrix_type=dbcsr_type_no_symmetry)
     524             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_s_sqrt_inv, matrix_ks, &
     525           4 :                              0.0_dp, matrix_ssqrtinv_ks_ssqrtinv2, filter_eps=threshold)
     526             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_ssqrtinv_ks_ssqrtinv2, matrix_s_sqrt_inv, &
     527           4 :                              0.0_dp, matrix_ssqrtinv_ks_ssqrtinv, filter_eps=threshold)
     528           4 :          CALL dbcsr_add_on_diag(matrix_ssqrtinv_ks_ssqrtinv, -mu)
     529             : 
     530           6 :          SELECT CASE (sign_method)
     531             :          CASE (ls_scf_sign_ns)
     532           2 :             CALL matrix_sign_Newton_Schulz(matrix_sign, matrix_ssqrtinv_ks_ssqrtinv, threshold, sign_order, iounit=-1)
     533             :          CASE (ls_scf_sign_proot)
     534           0 :             CALL matrix_sign_proot(matrix_sign, matrix_ssqrtinv_ks_ssqrtinv, threshold, sign_order)
     535             :          CASE (ls_scf_sign_submatrix)
     536           2 :             CALL matrix_sign_submatrix(matrix_sign, matrix_ssqrtinv_ks_ssqrtinv, threshold, sign_order, submatrix_sign_method)
     537             :          CASE DEFAULT
     538           4 :             CPABORT("Unkown sign method.")
     539             :          END SELECT
     540           4 :          CALL dbcsr_release(matrix_ssqrtinv_ks_ssqrtinv)
     541           4 :          CALL dbcsr_release(matrix_ssqrtinv_ks_ssqrtinv2)
     542             : 
     543             :       ELSE ! .NOT. sign_symmetric
     544             :          ! get inv(S)*H-I*mu
     545        1068 :          CALL dbcsr_create(matrix_sinv_ks, template=matrix_s, matrix_type=dbcsr_type_no_symmetry)
     546             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_s_inv, matrix_ks, &
     547        1068 :                              0.0_dp, matrix_sinv_ks, filter_eps=threshold)
     548        1068 :          CALL dbcsr_add_on_diag(matrix_sinv_ks, -mu)
     549             : 
     550             :          ! compute sign(inv(S)*H-I*mu)
     551        2124 :          SELECT CASE (sign_method)
     552             :          CASE (ls_scf_sign_ns)
     553        1056 :             CALL matrix_sign_Newton_Schulz(matrix_sign, matrix_sinv_ks, threshold, sign_order, iounit=-1)
     554             :          CASE (ls_scf_sign_proot)
     555           8 :             CALL matrix_sign_proot(matrix_sign, matrix_sinv_ks, threshold, sign_order)
     556             :          CASE (ls_scf_sign_submatrix)
     557           4 :             CALL matrix_sign_submatrix(matrix_sign, matrix_sinv_ks, threshold, sign_order, submatrix_sign_method)
     558             :          CASE DEFAULT
     559        1068 :             CPABORT("Unkown sign method.")
     560             :          END SELECT
     561        1068 :          CALL dbcsr_release(matrix_sinv_ks)
     562             :       END IF
     563             : 
     564             :       ! now construct the density matrix PS=0.5*(I-sign(inv(S)H-I*mu))
     565        1072 :       CALL dbcsr_create(matrix_p_ud, template=matrix_s, matrix_type=dbcsr_type_no_symmetry)
     566        1072 :       CALL dbcsr_copy(matrix_p_ud, matrix_sign)
     567        1072 :       CALL dbcsr_scale(matrix_p_ud, -0.5_dp)
     568        1072 :       CALL dbcsr_add_on_diag(matrix_p_ud, 0.5_dp)
     569        1072 :       CALL dbcsr_release(matrix_sign)
     570             : 
     571             :       ! we now have PS, lets get its trace
     572        1072 :       CALL dbcsr_trace(matrix_p_ud, trace)
     573             : 
     574             :       ! we can also check it is idempotent PS*PS=PS
     575        1072 :       CALL dbcsr_create(matrix_tmp, template=matrix_s, matrix_type=dbcsr_type_no_symmetry)
     576             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_p_ud, matrix_p_ud, &
     577        1072 :                           0.0_dp, matrix_tmp, filter_eps=threshold)
     578        1072 :       CALL dbcsr_add(matrix_tmp, matrix_p_ud, 1.0_dp, -1.0_dp)
     579        1072 :       frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
     580        1072 :       IF (unit_nr > 0 .AND. frob_matrix > 0.001_dp) &
     581          30 :          WRITE (unit_nr, '(T2,A,F20.12)') "Deviation from idempotency: ", frob_matrix
     582             : 
     583        1072 :       IF (sign_symmetric) THEN
     584             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_s_sqrt_inv, matrix_p_ud, &
     585           4 :                              0.0_dp, matrix_tmp, filter_eps=threshold)
     586             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp, matrix_s_sqrt_inv, &
     587           4 :                              0.0_dp, matrix_p, filter_eps=threshold)
     588             :       ELSE
     589             : 
     590             :          ! get P=PS*inv(S)
     591             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_p_ud, matrix_s_inv, &
     592        1068 :                              0.0_dp, matrix_p, filter_eps=threshold)
     593             :       END IF
     594        1072 :       CALL dbcsr_release(matrix_p_ud)
     595        1072 :       CALL dbcsr_release(matrix_tmp)
     596             : 
     597        1072 :       CALL timestop(handle)
     598             : 
     599        1072 :    END SUBROUTINE density_matrix_sign_fixed_mu
     600             : 
     601             : ! **************************************************************************************************
     602             : !> \brief compute the corresponding density matrix and its trace, using methods with internal mu adjustment
     603             : !> \param matrix_p ...
     604             : !> \param trace ...
     605             : !> \param mu ...
     606             : !> \param sign_method ...
     607             : !> \param matrix_ks ...
     608             : !> \param matrix_s ...
     609             : !> \param threshold ...
     610             : !> \param submatrix_sign_method ...
     611             : !> \param nelectron ...
     612             : !> \param matrix_s_sqrt_inv ...
     613             : !> \par History
     614             : !>       2020.07 created, based on density_matrix_sign_fixed_mu [Michael Lass]
     615             : !> \author Michael Lass
     616             : ! **************************************************************************************************
     617           8 :    SUBROUTINE density_matrix_sign_internal_mu(matrix_p, trace, mu, sign_method, matrix_ks, &
     618             :                                               matrix_s, threshold, submatrix_sign_method, &
     619             :                                               nelectron, matrix_s_sqrt_inv)
     620             : 
     621             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_p
     622             :       REAL(KIND=dp), INTENT(OUT)                         :: trace
     623             :       REAL(KIND=dp), INTENT(INOUT)                       :: mu
     624             :       INTEGER                                            :: sign_method
     625             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_ks, matrix_s
     626             :       REAL(KIND=dp), INTENT(IN)                          :: threshold
     627             :       INTEGER                                            :: submatrix_sign_method
     628             :       INTEGER, INTENT(IN)                                :: nelectron
     629             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_s_sqrt_inv
     630             : 
     631             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'density_matrix_sign_internal_mu'
     632             : 
     633             :       INTEGER                                            :: handle, unit_nr
     634             :       REAL(KIND=dp)                                      :: frob_matrix
     635             :       TYPE(cp_logger_type), POINTER                      :: logger
     636             :       TYPE(dbcsr_type)                                   :: matrix_p_ud, matrix_sign, &
     637             :                                                             matrix_ssqrtinv_ks_ssqrtinv, &
     638             :                                                             matrix_ssqrtinv_ks_ssqrtinv2, &
     639             :                                                             matrix_tmp
     640             : 
     641           4 :       CALL timeset(routineN, handle)
     642             : 
     643           4 :       logger => cp_get_default_logger()
     644           4 :       IF (logger%para_env%is_source()) THEN
     645           2 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     646             :       ELSE
     647             :          unit_nr = -1
     648             :       END IF
     649             : 
     650           4 :       CALL dbcsr_create(matrix_sign, template=matrix_s, matrix_type=dbcsr_type_no_symmetry)
     651             : 
     652           4 :       CALL dbcsr_create(matrix_ssqrtinv_ks_ssqrtinv, template=matrix_s, matrix_type=dbcsr_type_no_symmetry)
     653           4 :       CALL dbcsr_create(matrix_ssqrtinv_ks_ssqrtinv2, template=matrix_s, matrix_type=dbcsr_type_no_symmetry)
     654             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_s_sqrt_inv, matrix_ks, &
     655           4 :                           0.0_dp, matrix_ssqrtinv_ks_ssqrtinv2, filter_eps=threshold)
     656             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_ssqrtinv_ks_ssqrtinv2, matrix_s_sqrt_inv, &
     657           4 :                           0.0_dp, matrix_ssqrtinv_ks_ssqrtinv, filter_eps=threshold)
     658           4 :       CALL dbcsr_add_on_diag(matrix_ssqrtinv_ks_ssqrtinv, -mu)
     659             : 
     660           8 :       SELECT CASE (sign_method)
     661             :       CASE (ls_scf_sign_submatrix)
     662           8 :          SELECT CASE (submatrix_sign_method)
     663             :          CASE (ls_scf_submatrix_sign_direct_muadj, ls_scf_submatrix_sign_direct_muadj_lowmem)
     664             :             CALL matrix_sign_submatrix_mu_adjust(matrix_sign, matrix_ssqrtinv_ks_ssqrtinv, mu, nelectron, threshold, &
     665           4 :                                                  submatrix_sign_method)
     666             :          CASE DEFAULT
     667           4 :             CPABORT("density_matrix_sign_internal_mu called with invalid submatrix sign method")
     668             :          END SELECT
     669             :       CASE DEFAULT
     670           4 :          CPABORT("density_matrix_sign_internal_mu called with invalid sign method.")
     671             :       END SELECT
     672           4 :       CALL dbcsr_release(matrix_ssqrtinv_ks_ssqrtinv)
     673           4 :       CALL dbcsr_release(matrix_ssqrtinv_ks_ssqrtinv2)
     674             : 
     675             :       ! now construct the density matrix PS=0.5*(I-sign(inv(S)H-I*mu))
     676           4 :       CALL dbcsr_create(matrix_p_ud, template=matrix_s, matrix_type=dbcsr_type_no_symmetry)
     677           4 :       CALL dbcsr_copy(matrix_p_ud, matrix_sign)
     678           4 :       CALL dbcsr_scale(matrix_p_ud, -0.5_dp)
     679           4 :       CALL dbcsr_add_on_diag(matrix_p_ud, 0.5_dp)
     680           4 :       CALL dbcsr_release(matrix_sign)
     681             : 
     682             :       ! we now have PS, lets get its trace
     683           4 :       CALL dbcsr_trace(matrix_p_ud, trace)
     684             : 
     685             :       ! we can also check it is idempotent PS*PS=PS
     686           4 :       CALL dbcsr_create(matrix_tmp, template=matrix_s, matrix_type=dbcsr_type_no_symmetry)
     687             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_p_ud, matrix_p_ud, &
     688           4 :                           0.0_dp, matrix_tmp, filter_eps=threshold)
     689           4 :       CALL dbcsr_add(matrix_tmp, matrix_p_ud, 1.0_dp, -1.0_dp)
     690           4 :       frob_matrix = dbcsr_frobenius_norm(matrix_tmp)
     691           4 :       IF (unit_nr > 0 .AND. frob_matrix > 0.001_dp) &
     692           0 :          WRITE (unit_nr, '(T2,A,F20.12)') "Deviation from idempotency: ", frob_matrix
     693             : 
     694             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_s_sqrt_inv, matrix_p_ud, &
     695           4 :                           0.0_dp, matrix_tmp, filter_eps=threshold)
     696             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp, matrix_s_sqrt_inv, &
     697           4 :                           0.0_dp, matrix_p, filter_eps=threshold)
     698           4 :       CALL dbcsr_release(matrix_p_ud)
     699           4 :       CALL dbcsr_release(matrix_tmp)
     700             : 
     701           4 :       CALL timestop(handle)
     702             : 
     703           4 :    END SUBROUTINE density_matrix_sign_internal_mu
     704             : 
     705             : ! **************************************************************************************************
     706             : !> \brief compute the density matrix using a trace-resetting algorithm
     707             : !> \param matrix_p ...
     708             : !> \param matrix_ks ...
     709             : !> \param matrix_s_sqrt_inv ...
     710             : !> \param nelectron ...
     711             : !> \param threshold ...
     712             : !> \param e_homo ...
     713             : !> \param e_lumo ...
     714             : !> \param e_mu ...
     715             : !> \param dynamic_threshold ...
     716             : !> \param matrix_ks_deviation ...
     717             : !> \param max_iter_lanczos ...
     718             : !> \param eps_lanczos ...
     719             : !> \param converged ...
     720             : !> \param iounit ...
     721             : !> \par History
     722             : !>       2012.06 created [Florian Thoele]
     723             : !> \author Florian Thoele
     724             : ! **************************************************************************************************
     725       13488 :    SUBROUTINE density_matrix_trs4(matrix_p, matrix_ks, matrix_s_sqrt_inv, &
     726             :                                   nelectron, threshold, e_homo, e_lumo, e_mu, &
     727             :                                   dynamic_threshold, matrix_ks_deviation, &
     728             :                                   max_iter_lanczos, eps_lanczos, converged, iounit)
     729             : 
     730             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_p
     731             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_ks, matrix_s_sqrt_inv
     732             :       INTEGER, INTENT(IN)                                :: nelectron
     733             :       REAL(KIND=dp), INTENT(IN)                          :: threshold
     734             :       REAL(KIND=dp), INTENT(INOUT)                       :: e_homo, e_lumo, e_mu
     735             :       LOGICAL, INTENT(IN), OPTIONAL                      :: dynamic_threshold
     736             :       TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL          :: matrix_ks_deviation
     737             :       INTEGER, INTENT(IN)                                :: max_iter_lanczos
     738             :       REAL(KIND=dp), INTENT(IN)                          :: eps_lanczos
     739             :       LOGICAL, INTENT(OUT), OPTIONAL                     :: converged
     740             :       INTEGER, INTENT(IN), OPTIONAL                      :: iounit
     741             : 
     742             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'density_matrix_trs4'
     743             :       INTEGER, PARAMETER                                 :: max_iter = 100
     744             :       REAL(KIND=dp), PARAMETER                           :: gamma_max = 6.0_dp, gamma_min = 0.0_dp
     745             : 
     746             :       INTEGER                                            :: branch, estimated_steps, handle, i, j, &
     747             :                                                             unit_nr
     748             :       INTEGER(kind=int_8)                                :: flop1, flop2
     749             :       LOGICAL                                            :: arnoldi_converged, do_dyn_threshold
     750             :       REAL(KIND=dp) :: current_threshold, delta_n, eps_max, eps_min, est_threshold, frob_id, &
     751             :          frob_x, gam, homo, lumo, max_eig, max_threshold, maxdev, maxev, min_eig, minev, mmin, mu, &
     752             :          mu_a, mu_b, mu_c, mu_fa, mu_fc, occ_matrix, scaled_homo_bound, scaled_lumo_bound, t1, t2, &
     753             :          trace_fx, trace_gx, xi
     754       13488 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: gamma_values
     755             :       TYPE(cp_logger_type), POINTER                      :: logger
     756             :       TYPE(dbcsr_type)                                   :: matrix_k0, matrix_x, matrix_x_nosym, &
     757             :                                                             matrix_xidsq, matrix_xsq, tmp_gx
     758             : 
     759       13488 :       IF (nelectron == 0) THEN
     760           0 :          CALL dbcsr_set(matrix_p, 0.0_dp)
     761             :          RETURN
     762             :       END IF
     763             : 
     764       13488 :       CALL timeset(routineN, handle)
     765             : 
     766       13488 :       IF (PRESENT(iounit)) THEN
     767        1660 :          unit_nr = iounit
     768             :       ELSE
     769       11828 :          logger => cp_get_default_logger()
     770       11828 :          IF (logger%para_env%is_source()) THEN
     771        5914 :             unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     772             :          ELSE
     773        5914 :             unit_nr = -1
     774             :          END IF
     775             :       END IF
     776             : 
     777       13488 :       do_dyn_threshold = .FALSE.
     778       13488 :       IF (PRESENT(dynamic_threshold)) do_dyn_threshold = dynamic_threshold
     779             : 
     780       13488 :       IF (PRESENT(converged)) converged = .FALSE.
     781             : 
     782             :       ! init X = (eps_n*I - H)/(eps_n - eps_0)  ... H* = S^-1/2*H*S^-1/2
     783       13488 :       CALL dbcsr_create(matrix_x, template=matrix_ks, matrix_type="S")
     784             : 
     785             :       ! at some points the non-symmetric version of x is required
     786       13488 :       CALL dbcsr_create(matrix_x_nosym, template=matrix_ks, matrix_type=dbcsr_type_no_symmetry)
     787             : 
     788             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_s_sqrt_inv, matrix_ks, &
     789       13488 :                           0.0_dp, matrix_x_nosym, filter_eps=threshold)
     790             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_x_nosym, matrix_s_sqrt_inv, &
     791       13488 :                           0.0_dp, matrix_x, filter_eps=threshold)
     792       13488 :       CALL dbcsr_desymmetrize(matrix_x, matrix_x_nosym)
     793             : 
     794       13488 :       CALL dbcsr_create(matrix_k0, template=matrix_ks, matrix_type=dbcsr_type_no_symmetry)
     795       13488 :       CALL dbcsr_copy(matrix_k0, matrix_x_nosym)
     796             : 
     797             :       ! compute the deviation in the mixed matrix, as seen in the ortho basis
     798       13488 :       IF (do_dyn_threshold) THEN
     799          24 :          CPASSERT(PRESENT(matrix_ks_deviation))
     800          24 :          CALL dbcsr_add(matrix_ks_deviation, matrix_x_nosym, -1.0_dp, 1.0_dp)
     801             :          CALL arnoldi_extremal(matrix_ks_deviation, maxev, minev, max_iter=max_iter_lanczos, threshold=eps_lanczos, &
     802          24 :                                converged=arnoldi_converged)
     803          24 :          maxdev = MAX(ABS(maxev), ABS(minev))
     804          24 :          IF (unit_nr > 0) THEN
     805           0 :             WRITE (unit_nr, '(T6,A,1X,L12)') "Lanczos converged:      ", arnoldi_converged
     806           0 :             WRITE (unit_nr, '(T6,A,1X,F12.5)') "change in mixed matrix: ", maxdev
     807           0 :             WRITE (unit_nr, '(T6,A,1X,F12.5)') "HOMO upper bound:       ", e_homo + maxdev
     808           0 :             WRITE (unit_nr, '(T6,A,1X,F12.5)') "LUMO lower bound:       ", e_lumo - maxdev
     809           0 :             WRITE (unit_nr, '(T6,A,1X,L12)') "Predicts a gap ?        ", ((e_lumo - maxdev) - (e_homo + maxdev)) > 0
     810             :          END IF
     811             :          ! save the old mixed matrix
     812          24 :          CALL dbcsr_copy(matrix_ks_deviation, matrix_x_nosym)
     813             : 
     814             :       END IF
     815             : 
     816             :       ! get largest/smallest eigenvalues for scaling
     817             :       CALL arnoldi_extremal(matrix_x_nosym, max_eig, min_eig, max_iter=max_iter_lanczos, threshold=eps_lanczos, &
     818       13488 :                             converged=arnoldi_converged)
     819       19402 :       IF (unit_nr > 0) WRITE (unit_nr, '(T6,A,1X,2F12.5,1X,A,1X,L1)') "Est. extremal eigenvalues", &
     820       11828 :          min_eig, max_eig, " converged: ", arnoldi_converged
     821       13488 :       eps_max = max_eig
     822       13488 :       eps_min = min_eig
     823             : 
     824             :       ! scale KS matrix
     825       13488 :       IF (eps_max == eps_min) THEN
     826          20 :          CALL dbcsr_scale(matrix_x, 1.0_dp/eps_max)
     827             :       ELSE
     828       13468 :          CALL dbcsr_add_on_diag(matrix_x, -eps_max)
     829       13468 :          CALL dbcsr_scale(matrix_x, -1.0_dp/(eps_max - eps_min))
     830             :       END IF
     831             : 
     832       13488 :       current_threshold = threshold
     833       13488 :       IF (do_dyn_threshold) THEN
     834             :          ! scale bounds for HOMO/LUMO
     835          24 :          scaled_homo_bound = (eps_max - (e_homo + maxdev))/(eps_max - eps_min)
     836          24 :          scaled_lumo_bound = (eps_max - (e_lumo - maxdev))/(eps_max - eps_min)
     837             :       END IF
     838             : 
     839       13488 :       CALL dbcsr_create(matrix_xsq, template=matrix_ks, matrix_type="S")
     840             : 
     841       13488 :       CALL dbcsr_create(matrix_xidsq, template=matrix_ks, matrix_type="S")
     842             : 
     843       13488 :       CALL dbcsr_create(tmp_gx, template=matrix_ks, matrix_type="S")
     844             : 
     845       13488 :       ALLOCATE (gamma_values(max_iter))
     846             : 
     847       68636 :       DO i = 1, max_iter
     848       68636 :          t1 = m_walltime()
     849       68636 :          flop1 = 0; flop2 = 0
     850             : 
     851             :          ! get X*X
     852             :          CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_x, matrix_x, &
     853             :                              0.0_dp, matrix_xsq, &
     854       68636 :                              filter_eps=current_threshold, flop=flop1)
     855             : 
     856             :          ! intermediate use matrix_xidsq to compute = X*X-X
     857       68636 :          CALL dbcsr_copy(matrix_xidsq, matrix_x)
     858       68636 :          CALL dbcsr_add(matrix_xidsq, matrix_xsq, -1.0_dp, 1.0_dp)
     859       68636 :          frob_id = dbcsr_frobenius_norm(matrix_xidsq)
     860       68636 :          frob_x = dbcsr_frobenius_norm(matrix_x)
     861             : 
     862             :          ! xidsq = (1-X)*(1-X)
     863             :          ! use (1-x)*(1-x) = 1 + x*x - 2*x
     864       68636 :          CALL dbcsr_copy(matrix_xidsq, matrix_x)
     865       68636 :          CALL dbcsr_add(matrix_xidsq, matrix_xsq, -2.0_dp, 1.0_dp)
     866       68636 :          CALL dbcsr_add_on_diag(matrix_xidsq, 1.0_dp)
     867             : 
     868             :          ! tmp_gx = 4X-3X*X
     869       68636 :          CALL dbcsr_copy(tmp_gx, matrix_x)
     870       68636 :          CALL dbcsr_add(tmp_gx, matrix_xsq, 4.0_dp, -3.0_dp)
     871             : 
     872             :          ! get gamma
     873             :          ! Tr(F) = Tr(XX*tmp_gx) Tr(G) is equivalent
     874       68636 :          CALL dbcsr_dot(matrix_xsq, matrix_xidsq, trace_gx)
     875       68636 :          CALL dbcsr_dot(matrix_xsq, tmp_gx, trace_fx)
     876             : 
     877             :          ! if converged, and gam becomes noisy, fix it to 3, which results in a final McWeeny step.
     878             :          ! do this only if the electron count is reasonable.
     879             :          ! maybe tune if the current criterion is not good enough
     880       68636 :          delta_n = nelectron - trace_fx
     881             :          ! condition: ABS(frob_id/frob_x) < SQRT(threshold) ...
     882       68636 :          IF (((frob_id*frob_id) < (threshold*frob_x*frob_x)) .AND. (ABS(delta_n) < 0.5_dp)) THEN
     883       13488 :             gam = 3.0_dp
     884       55148 :          ELSE IF (ABS(delta_n) < 1e-14_dp) THEN
     885           0 :             gam = 0.0_dp ! rare case of perfect electron count
     886             :          ELSE
     887             :             ! make sure, we don't divide by zero, as soon as gam is outside the interval gam_min,gam_max, it doesn't matter
     888       55148 :             gam = delta_n/MAX(trace_gx, ABS(delta_n)/100)
     889             :          END IF
     890       68636 :          gamma_values(i) = gam
     891             : 
     892             :          IF (unit_nr > 0 .AND. .FALSE.) THEN
     893             :             WRITE (unit_nr, *) "trace_fx", trace_fx, "trace_gx", trace_gx, "gam", gam, &
     894             :                "frob_id", frob_id, "conv", ABS(frob_id/frob_x)
     895             :          END IF
     896             : 
     897       68636 :          IF (do_dyn_threshold) THEN
     898             :             ! quantities used for dynamic thresholding, when the estimated gap is larger than zero
     899         154 :             xi = (scaled_homo_bound - scaled_lumo_bound)
     900         154 :             IF (xi > 0.0_dp) THEN
     901         130 :                mmin = 0.5*(scaled_homo_bound + scaled_lumo_bound)
     902         130 :                max_threshold = ABS(1 - 2*mmin)*xi
     903             : 
     904         130 :                scaled_homo_bound = evaluate_trs4_polynomial(scaled_homo_bound, gamma_values(i:), 1)
     905         130 :                scaled_lumo_bound = evaluate_trs4_polynomial(scaled_lumo_bound, gamma_values(i:), 1)
     906         130 :                estimated_steps = estimate_steps(scaled_homo_bound, scaled_lumo_bound, threshold)
     907             : 
     908         130 :                est_threshold = (threshold/(estimated_steps + i + 1))*xi/(1 + threshold/(estimated_steps + i + 1))
     909         130 :                est_threshold = MIN(max_threshold, est_threshold)
     910         130 :                IF (i > 1) est_threshold = MAX(est_threshold, 0.1_dp*current_threshold)
     911         130 :                current_threshold = est_threshold
     912             :             ELSE
     913          24 :                current_threshold = threshold
     914             :             END IF
     915             :          END IF
     916             : 
     917       68636 :          IF (gam > gamma_max) THEN
     918             :             ! Xn+1 = 2X-X*X
     919         700 :             CALL dbcsr_add(matrix_x, matrix_xsq, 2.0_dp, -1.0_dp)
     920         700 :             CALL dbcsr_filter(matrix_x, current_threshold)
     921         700 :             branch = 1
     922       67936 :          ELSE IF (gam < gamma_min) THEN
     923             :             ! Xn+1 = X*X
     924        3058 :             CALL dbcsr_copy(matrix_x, matrix_xsq)
     925        3058 :             branch = 2
     926             :          ELSE
     927             :             ! Xn+1 = F(X) + gam*G(X)
     928       64878 :             CALL dbcsr_add(tmp_gx, matrix_xidsq, 1.0_dp, gam)
     929             :             CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_xsq, tmp_gx, &
     930             :                                 0.0_dp, matrix_x, &
     931       64878 :                                 flop=flop2, filter_eps=current_threshold)
     932       64878 :             branch = 3
     933             :          END IF
     934             : 
     935       68636 :          occ_matrix = dbcsr_get_occupation(matrix_x)
     936       68636 :          t2 = m_walltime()
     937       68636 :          IF (unit_nr > 0) THEN
     938             :             WRITE (unit_nr, &
     939       30136 :                    '(T6,A,I3,1X,F10.8,E12.3,F12.3,F13.3,E12.3)') "TRS4 it ", &
     940       30136 :                i, occ_matrix, ABS(trace_gx), t2 - t1, &
     941       60272 :                (flop1 + flop2)/(1.0E6_dp*MAX(t2 - t1, 0.001_dp)), current_threshold
     942       30136 :             CALL m_flush(unit_nr)
     943             :          END IF
     944             : 
     945       68636 :          IF (abnormal_value(trace_gx)) &
     946           0 :             CPABORT("trace_gx is an abnormal value (NaN/Inf).")
     947             : 
     948             :          ! a branch of 1 or 2 appears to lead to a less accurate electron number count and premature exit
     949             :          ! if it turns out this does not exit because we get stuck in branch 1/2 for a reason we need to refine further
     950             :          ! condition: ABS(frob_id/frob_x) < SQRT(threshold) ...
     951      137272 :          IF ((frob_id*frob_id) < (threshold*frob_x*frob_x) .AND. branch == 3 .AND. (ABS(delta_n) < 0.5_dp)) THEN
     952       13488 :             IF (PRESENT(converged)) converged = .TRUE.
     953             :             EXIT
     954             :          END IF
     955             : 
     956             :       END DO
     957             : 
     958       13488 :       occ_matrix = dbcsr_get_occupation(matrix_x)
     959       13488 :       IF (unit_nr > 0) WRITE (unit_nr, '(T6,A,I3,1X,F10.8,E12.3)') 'Final TRS4 iteration  ', i, occ_matrix, ABS(trace_gx)
     960             : 
     961             :       ! free some memory
     962       13488 :       CALL dbcsr_release(tmp_gx)
     963       13488 :       CALL dbcsr_release(matrix_xsq)
     964       13488 :       CALL dbcsr_release(matrix_xidsq)
     965             : 
     966             :       ! output to matrix_p, P = inv(S)^0.5 X inv(S)^0.5
     967             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_x, matrix_s_sqrt_inv, &
     968       13488 :                           0.0_dp, matrix_x_nosym, filter_eps=threshold)
     969             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_s_sqrt_inv, matrix_x_nosym, &
     970       13488 :                           0.0_dp, matrix_p, filter_eps=threshold)
     971             : 
     972             :       ! calculate the chemical potential by doing a bisection of fk(x0)-0.5, where fk is evaluated using the stored values for gamma
     973             :       ! E. Rubensson et al., Chem Phys Lett 432, 2006, 591-594
     974       13488 :       mu_a = 0.0_dp; mu_b = 1.0_dp; 
     975       13488 :       mu_fa = evaluate_trs4_polynomial(mu_a, gamma_values, i - 1) - 0.5_dp
     976      157712 :       DO j = 1, 40
     977      157712 :          mu_c = 0.5*(mu_a + mu_b)
     978      157712 :          mu_fc = evaluate_trs4_polynomial(mu_c, gamma_values, i - 1) - 0.5_dp ! i-1 because in the last iteration, only convergence is checked
     979      157712 :          IF (ABS(mu_fc) < 1.0E-6_dp .OR. (mu_b - mu_a)/2 < 1.0E-6_dp) EXIT !TODO: define threshold values
     980             : 
     981      157712 :          IF (mu_fc*mu_fa > 0) THEN
     982       74286 :             mu_a = mu_c
     983       74286 :             mu_fa = mu_fc
     984             :          ELSE
     985             :             mu_b = mu_c
     986             :          END IF
     987             :       END DO
     988       13488 :       mu = (eps_min - eps_max)*mu_c + eps_max
     989       13488 :       DEALLOCATE (gamma_values)
     990       13488 :       IF (unit_nr > 0) THEN
     991        5914 :          WRITE (unit_nr, '(T6,A,1X,F12.5)') 'Chemical potential (mu): ', mu
     992             :       END IF
     993       13488 :       e_mu = mu
     994             : 
     995       13488 :       IF (do_dyn_threshold) THEN
     996          24 :          CALL dbcsr_desymmetrize(matrix_x, matrix_x_nosym)
     997             :          CALL compute_homo_lumo(matrix_k0, matrix_x_nosym, eps_min, eps_max, &
     998          24 :                                 threshold, max_iter_lanczos, eps_lanczos, homo, lumo, unit_nr)
     999          24 :          e_homo = homo
    1000          24 :          e_lumo = lumo
    1001             :       END IF
    1002             : 
    1003       13488 :       CALL dbcsr_release(matrix_x)
    1004       13488 :       CALL dbcsr_release(matrix_x_nosym)
    1005       13488 :       CALL dbcsr_release(matrix_k0)
    1006       13488 :       CALL timestop(handle)
    1007             : 
    1008       26976 :    END SUBROUTINE density_matrix_trs4
    1009             : 
    1010             : ! **************************************************************************************************
    1011             : !> \brief compute the density matrix using a non monotonic trace conserving
    1012             : !>  algorithm based on SIAM DOI. 10.1137/130911585.
    1013             : !>       2014.04 created [Jonathan Mullin]
    1014             : !> \param matrix_p ...
    1015             : !> \param matrix_ks ...
    1016             : !> \param matrix_s_sqrt_inv ...
    1017             : !> \param nelectron ...
    1018             : !> \param threshold ...
    1019             : !> \param e_homo ...
    1020             : !> \param e_lumo ...
    1021             : !> \param non_monotonic ...
    1022             : !> \param eps_lanczos ...
    1023             : !> \param max_iter_lanczos ...
    1024             : !> \param iounit ...
    1025             : !> \author Jonathan Mullin
    1026             : ! **************************************************************************************************
    1027         120 :    SUBROUTINE density_matrix_tc2(matrix_p, matrix_ks, matrix_s_sqrt_inv, &
    1028             :                                  nelectron, threshold, e_homo, e_lumo, &
    1029             :                                  non_monotonic, eps_lanczos, max_iter_lanczos, iounit)
    1030             : 
    1031             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_p
    1032             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_ks, matrix_s_sqrt_inv
    1033             :       INTEGER, INTENT(IN)                                :: nelectron
    1034             :       REAL(KIND=dp), INTENT(IN)                          :: threshold
    1035             :       REAL(KIND=dp), INTENT(INOUT)                       :: e_homo, e_lumo
    1036             :       LOGICAL, INTENT(IN), OPTIONAL                      :: non_monotonic
    1037             :       REAL(KIND=dp), INTENT(IN)                          :: eps_lanczos
    1038             :       INTEGER, INTENT(IN)                                :: max_iter_lanczos
    1039             :       INTEGER, INTENT(IN), OPTIONAL                      :: iounit
    1040             : 
    1041             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'density_matrix_tc2'
    1042             :       INTEGER, PARAMETER                                 :: max_iter = 100
    1043             : 
    1044             :       INTEGER                                            :: handle, i, j, k, unit_nr
    1045             :       INTEGER(kind=int_8)                                :: flop1, flop2
    1046             :       LOGICAL                                            :: converged, do_non_monotonic
    1047             :       REAL(KIND=dp)                                      :: beta, betaB, eps_max, eps_min, gama, &
    1048             :                                                             max_eig, min_eig, occ_matrix, t1, t2, &
    1049             :                                                             trace_fx, trace_gx
    1050         120 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: alpha, lambda, nu, poly, wu, X, Y
    1051             :       TYPE(cp_logger_type), POINTER                      :: logger
    1052             :       TYPE(dbcsr_type)                                   :: matrix_tmp, matrix_x, matrix_xsq
    1053             : 
    1054         120 :       CALL timeset(routineN, handle)
    1055             : 
    1056         120 :       IF (PRESENT(iounit)) THEN
    1057         118 :          unit_nr = iounit
    1058             :       ELSE
    1059           2 :          logger => cp_get_default_logger()
    1060           2 :          IF (logger%para_env%is_source()) THEN
    1061           1 :             unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    1062             :          ELSE
    1063           1 :             unit_nr = -1
    1064             :          END IF
    1065             :       END IF
    1066             : 
    1067         120 :       do_non_monotonic = .FALSE.
    1068         120 :       IF (PRESENT(non_monotonic)) do_non_monotonic = non_monotonic
    1069             : 
    1070             :       ! init X = (eps_n*I - H)/(eps_n - eps_0)  ... H* = S^-1/2*H*S^-1/2
    1071         120 :       CALL dbcsr_create(matrix_x, template=matrix_ks, matrix_type=dbcsr_type_no_symmetry)
    1072         120 :       CALL dbcsr_create(matrix_xsq, template=matrix_ks, matrix_type=dbcsr_type_no_symmetry)
    1073             : 
    1074             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_s_sqrt_inv, matrix_ks, &
    1075         120 :                           0.0_dp, matrix_xsq, filter_eps=threshold)
    1076             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_xsq, matrix_s_sqrt_inv, &
    1077         120 :                           0.0_dp, matrix_x, filter_eps=threshold)
    1078             : 
    1079         120 :       IF (unit_nr > 0) THEN
    1080           1 :          WRITE (unit_nr, '(T6,A,1X,F12.5)') "HOMO upper bound:       ", e_homo
    1081           1 :          WRITE (unit_nr, '(T6,A,1X,F12.5)') "LUMO lower bound:       ", e_lumo
    1082           1 :          WRITE (unit_nr, '(T6,A,1X,L12)') "Predicts a gap ?        ", ((e_lumo) - (e_homo)) > 0
    1083             :       END IF
    1084             : 
    1085             :       ! get largest/smallest eigenvalues for scaling
    1086             :       CALL arnoldi_extremal(matrix_x, max_eig, min_eig, max_iter=max_iter_lanczos, threshold=eps_lanczos, &
    1087         120 :                             converged=converged)
    1088         121 :       IF (unit_nr > 0) WRITE (unit_nr, '(T6,A,1X,2F12.5,1X,A,1X,L1)') "Est. extremal eigenvalues", &
    1089           2 :          min_eig, max_eig, " converged: ", converged
    1090             : 
    1091         120 :       eps_max = max_eig
    1092         120 :       eps_min = min_eig
    1093             : 
    1094             :       ! scale KS matrix
    1095         120 :       CALL dbcsr_scale(matrix_x, -1.0_dp)
    1096         120 :       CALL dbcsr_add_on_diag(matrix_x, eps_max)
    1097         120 :       CALL dbcsr_scale(matrix_x, 1/(eps_max - eps_min))
    1098             : 
    1099         120 :       CALL dbcsr_copy(matrix_xsq, matrix_x)
    1100             : 
    1101         120 :       CALL dbcsr_create(matrix_tmp, template=matrix_ks, matrix_type=dbcsr_type_no_symmetry)
    1102             : 
    1103         120 :       ALLOCATE (poly(max_iter))
    1104         120 :       ALLOCATE (nu(max_iter))
    1105         120 :       ALLOCATE (wu(max_iter))
    1106         120 :       ALLOCATE (alpha(max_iter))
    1107         120 :       ALLOCATE (X(4))
    1108         120 :       ALLOCATE (Y(4))
    1109         120 :       ALLOCATE (lambda(4))
    1110             : 
    1111             : ! Controls over the non_monotonic bounds, First if low gap, bias slightly
    1112         120 :       beta = (eps_max - ABS(e_lumo))/(eps_max - eps_min)
    1113         120 :       betaB = (eps_max + ABS(e_homo))/(eps_max - eps_min)
    1114             : 
    1115         120 :       IF ((beta - betaB) < 0.005_dp) THEN
    1116         120 :          beta = beta - 0.002_dp
    1117         120 :          betaB = betaB + 0.002_dp
    1118             :       END IF
    1119             : ! Check if input specifies to use monotonic bounds.
    1120         120 :       IF (.NOT. do_non_monotonic) THEN
    1121          26 :          beta = 0.0_dp
    1122          26 :          betaB = 1.0_dp
    1123             :       END IF
    1124             : ! initial SCF cycle has no reliable estimate of homo/lumo, force monotinic bounds.
    1125         120 :       IF (e_homo == 0.0_dp) THEN
    1126          76 :          beta = 0.0_dp
    1127          76 :          BetaB = 1.0_dp
    1128             :       END IF
    1129             : 
    1130             :       ! init to take true branch first
    1131         120 :       trace_fx = nelectron
    1132         120 :       trace_gx = 0
    1133             : 
    1134        2194 :       DO i = 1, max_iter
    1135        2194 :          t1 = m_walltime()
    1136        2194 :          flop1 = 0; flop2 = 0
    1137             : 
    1138        2194 :          IF (ABS(trace_fx - nelectron) <= ABS(trace_gx - nelectron)) THEN
    1139             : ! Xn+1 = (aX+ (1-a)I)^2
    1140        1110 :             poly(i) = 1.0_dp
    1141        1110 :             alpha(i) = 2.0_dp/(2.0_dp - beta)
    1142             : 
    1143        1110 :             CALL dbcsr_scale(matrix_x, alpha(i))
    1144        1110 :             CALL dbcsr_add_on_diag(matrix_x, 1.0_dp - alpha(i))
    1145             :             CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_x, matrix_x, &
    1146             :                                 0.0_dp, matrix_xsq, &
    1147        1110 :                                 filter_eps=threshold, flop=flop1)
    1148             : 
    1149             : !save X for control variables
    1150        1110 :             CALL dbcsr_copy(matrix_tmp, matrix_x)
    1151             : 
    1152        1110 :             CALL dbcsr_copy(matrix_x, matrix_xsq)
    1153             : 
    1154        1110 :             beta = (1.0_dp - alpha(i)) + alpha(i)*beta
    1155        1110 :             beta = beta*beta
    1156        1110 :             betaB = (1.0_dp - alpha(i)) + alpha(i)*betaB
    1157        1110 :             betaB = betaB*betaB
    1158             :          ELSE
    1159             : ! Xn+1 = 2aX-a^2*X^2
    1160        1084 :             poly(i) = 0.0_dp
    1161        1084 :             alpha(i) = 2.0_dp/(1.0_dp + betaB)
    1162             : 
    1163        1084 :             CALL dbcsr_scale(matrix_x, alpha(i))
    1164             :             CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_x, matrix_x, &
    1165             :                                 0.0_dp, matrix_xsq, &
    1166        1084 :                                 filter_eps=threshold, flop=flop1)
    1167             : 
    1168             : !save X for control variables
    1169        1084 :             CALL dbcsr_copy(matrix_tmp, matrix_x)
    1170             : !
    1171        1084 :             CALL dbcsr_add(matrix_x, matrix_xsq, 2.0_dp, -1.0_dp)
    1172             : 
    1173        1084 :             beta = alpha(i)*beta
    1174        1084 :             beta = 2.0_dp*beta - beta*beta
    1175        1084 :             betaB = alpha(i)*betaB
    1176        1084 :             betaB = 2.0_dp*betaB - betaB*betaB
    1177             : 
    1178             :          END IF
    1179        2194 :          occ_matrix = dbcsr_get_occupation(matrix_x)
    1180        2194 :          t2 = m_walltime()
    1181        2194 :          IF (unit_nr > 0) THEN
    1182             :             WRITE (unit_nr, &
    1183          18 :                    '(T6,A,I3,1X,F10.8,E12.3,F12.3,F13.3,E12.3)') "TC2 it ", &
    1184          18 :                i, occ_matrix, t2 - t1, &
    1185          36 :                (flop1 + flop2)/(1.0E6_dp*(t2 - t1)), threshold
    1186          18 :             CALL m_flush(unit_nr)
    1187             :          END IF
    1188             : 
    1189             : ! calculate control terms
    1190        2194 :          CALL dbcsr_trace(matrix_xsq, trace_fx)
    1191             : 
    1192             : ! intermediate use matrix_xsq compute X- X*X , temorarily use trace_gx
    1193        2194 :          CALL dbcsr_add(matrix_xsq, matrix_tmp, -1.0_dp, 1.0_dp)
    1194        2194 :          CALL dbcsr_trace(matrix_xsq, trace_gx)
    1195        2194 :          nu(i) = dbcsr_frobenius_norm(matrix_xsq)
    1196        2194 :          wu(i) = trace_gx
    1197             : 
    1198             : ! intermediate use matrix_xsq to compute = 2X - X*X
    1199        2194 :          CALL dbcsr_add(matrix_xsq, matrix_tmp, 1.0_dp, 1.0_dp)
    1200        2194 :          CALL dbcsr_trace(matrix_xsq, trace_gx)
    1201             : ! TC2 has quadratic convergence, using the frobeniums norm as an idempotency deviation test.
    1202        6582 :          IF (ABS(nu(i)) < (threshold)) EXIT
    1203             :       END DO
    1204             : 
    1205         120 :       occ_matrix = dbcsr_get_occupation(matrix_x)
    1206         120 :       IF (unit_nr > 0) WRITE (unit_nr, '(T6,A,I3,1X,1F10.8,1X,1F10.8)') 'Final TC2 iteration  ', i, occ_matrix, ABS(nu(i))
    1207             : 
    1208             :       ! output to matrix_p, P = inv(S)^0.5 X inv(S)^0.5
    1209             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_x, matrix_s_sqrt_inv, &
    1210         120 :                           0.0_dp, matrix_tmp, filter_eps=threshold)
    1211             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_s_sqrt_inv, matrix_tmp, &
    1212         120 :                           0.0_dp, matrix_p, filter_eps=threshold)
    1213             : 
    1214         120 :       CALL dbcsr_release(matrix_xsq)
    1215         120 :       CALL dbcsr_release(matrix_tmp)
    1216             : 
    1217             :       ! ALGO 3 from. SIAM DOI. 10.1137/130911585
    1218         120 :       X(1) = 1.0_dp
    1219         120 :       X(2) = 1.0_dp
    1220         120 :       X(3) = 0.0_dp
    1221         120 :       X(4) = 0.0_dp
    1222             :       gama = 6.0_dp - 4.0_dp*(SQRT(2.0_dp))
    1223         120 :       gama = gama - gama*gama
    1224        1214 :       DO WHILE (nu(i) < gama)
    1225             :          ! safeguard against negative root, is skipping correct?
    1226        1094 :          IF (wu(i) < 1.0e-14_dp) THEN
    1227          34 :             i = i - 1
    1228          34 :             CYCLE
    1229             :          END IF
    1230        1060 :          IF ((1.0_dp - 4.0_dp*nu(i)*nu(i)/wu(i)) < 0.0_dp) THEN
    1231           0 :             i = i - 1
    1232           0 :             CYCLE
    1233             :          END IF
    1234        1060 :          Y(1) = 0.5_dp*(1.0_dp - SQRT(1.0_dp - 4.0_dp*nu(i)*nu(i)/wu(i)))
    1235        1060 :          Y(2) = 0.5_dp*(1.0_dp - SQRT(1.0_dp - 4.0_dp*nu(i)))
    1236        1060 :          Y(3) = 0.5_dp*(1.0_dp + SQRT(1.0_dp - 4.0_dp*nu(i)))
    1237        1060 :          Y(4) = 0.5_dp*(1.0_dp + SQRT(1.0_dp - 4.0_dp*nu(i)*nu(i)/wu(i)))
    1238        5300 :          Y(:) = MIN(1.0_dp, MAX(0.0_dp, Y(:)))
    1239       16616 :          DO j = i, 1, -1
    1240       16616 :             IF (poly(j) == 1.0_dp) THEN
    1241       38890 :                DO k = 1, 4
    1242       31112 :                   Y(k) = SQRT(Y(k))
    1243       38890 :                   Y(k) = (Y(k) - 1.0_dp + alpha(j))/alpha(j)
    1244             :                END DO ! end K
    1245             :             ELSE
    1246       38890 :                DO k = 1, 4
    1247       31112 :                   Y(k) = 1.0_dp - SQRT(1.0_dp - Y(k))
    1248       38890 :                   Y(k) = Y(k)/alpha(j)
    1249             :                END DO ! end K
    1250             :             END IF ! end poly
    1251             :          END DO ! end j
    1252        1060 :          X(1) = MIN(X(1), Y(1))
    1253        1060 :          X(2) = MIN(X(2), Y(2))
    1254        1060 :          X(3) = MAX(X(3), Y(3))
    1255        1060 :          X(4) = MAX(X(4), Y(4))
    1256        1060 :          i = i - 1
    1257             :       END DO ! end i
    1258             : !   lambda 1,2,3,4 are:: out lumo, in lumo, in homo, out homo
    1259         600 :       DO k = 1, 4
    1260         600 :          lambda(k) = eps_max - (eps_max - eps_min)*X(k)
    1261             :       END DO ! end k
    1262             : ! END  ALGO 3 from. SIAM DOI. 10.1137/130911585
    1263         120 :       e_homo = lambda(4)
    1264         120 :       e_lumo = lambda(1)
    1265         120 :       IF (unit_nr > 0) WRITE (unit_nr, '(T6,A,3E12.4)') "outer homo/lumo/gap", e_homo, e_lumo, (e_lumo - e_homo)
    1266         120 :       IF (unit_nr > 0) WRITE (unit_nr, '(T6,A,3E12.4)') "inner homo/lumo/gap", lambda(3), lambda(2), (lambda(2) - lambda(3))
    1267             : 
    1268         120 :       DEALLOCATE (poly)
    1269         120 :       DEALLOCATE (nu)
    1270         120 :       DEALLOCATE (wu)
    1271         120 :       DEALLOCATE (alpha)
    1272         120 :       DEALLOCATE (X)
    1273         120 :       DEALLOCATE (Y)
    1274         120 :       DEALLOCATE (lambda)
    1275             : 
    1276         120 :       CALL dbcsr_release(matrix_x)
    1277         120 :       CALL timestop(handle)
    1278             : 
    1279         240 :    END SUBROUTINE density_matrix_tc2
    1280             : 
    1281             : ! **************************************************************************************************
    1282             : !> \brief compute the homo and lumo given a KS matrix and a density matrix in the orthonormalized basis
    1283             : !>        and the eps_min and eps_max, min and max eigenvalue of the ks matrix
    1284             : !> \param matrix_k ...
    1285             : !> \param matrix_p ...
    1286             : !> \param eps_min ...
    1287             : !> \param eps_max ...
    1288             : !> \param threshold ...
    1289             : !> \param max_iter_lanczos ...
    1290             : !> \param eps_lanczos ...
    1291             : !> \param homo ...
    1292             : !> \param lumo ...
    1293             : !> \param unit_nr ...
    1294             : !> \par History
    1295             : !>       2012.06 created [Florian Thoele]
    1296             : !> \author Florian Thoele
    1297             : ! **************************************************************************************************
    1298         132 :    SUBROUTINE compute_homo_lumo(matrix_k, matrix_p, eps_min, eps_max, threshold, max_iter_lanczos, eps_lanczos, homo, lumo, unit_nr)
    1299             :       TYPE(dbcsr_type)                                   :: matrix_k, matrix_p
    1300             :       REAL(KIND=dp)                                      :: eps_min, eps_max, threshold
    1301             :       INTEGER, INTENT(IN)                                :: max_iter_lanczos
    1302             :       REAL(KIND=dp), INTENT(IN)                          :: eps_lanczos
    1303             :       REAL(KIND=dp)                                      :: homo, lumo
    1304             :       INTEGER                                            :: unit_nr
    1305             : 
    1306             :       LOGICAL                                            :: converged
    1307             :       REAL(KIND=dp)                                      :: max_eig, min_eig, shift1, shift2
    1308             :       TYPE(dbcsr_type)                                   :: tmp1, tmp2, tmp3
    1309             : 
    1310             : ! temporary matrices used for HOMO/LUMO calculation
    1311             : 
    1312          44 :       CALL dbcsr_create(tmp1, template=matrix_k, matrix_type=dbcsr_type_no_symmetry)
    1313             : 
    1314          44 :       CALL dbcsr_create(tmp2, template=matrix_k, matrix_type=dbcsr_type_no_symmetry)
    1315             : 
    1316          44 :       CALL dbcsr_create(tmp3, template=matrix_k, matrix_type=dbcsr_type_no_symmetry)
    1317             : 
    1318          44 :       shift1 = -eps_min
    1319          44 :       shift2 = eps_max
    1320             : 
    1321             :       ! find largest ev of P*(K+shift*1), where shift is the neg. val. of the smallest ev of K
    1322          44 :       CALL dbcsr_copy(tmp2, matrix_k)
    1323          44 :       CALL dbcsr_add_on_diag(tmp2, shift1)
    1324             :       CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_p, tmp2, &
    1325          44 :                           0.0_dp, tmp1, filter_eps=threshold)
    1326             :       CALL arnoldi_extremal(tmp1, max_eig, min_eig, converged=converged, &
    1327          44 :                             threshold=eps_lanczos, max_iter=max_iter_lanczos)
    1328          44 :       homo = max_eig - shift1
    1329          44 :       IF (unit_nr > 0) THEN
    1330          10 :          WRITE (unit_nr, '(T6,A,1X,L12)') "Lanczos converged:      ", converged
    1331             :       END IF
    1332             : 
    1333             :       ! -(1-P)*(K-shift*1) = (1-P)*(shift*1 - K), where shift is the largest ev of K
    1334          44 :       CALL dbcsr_copy(tmp3, matrix_p)
    1335          44 :       CALL dbcsr_scale(tmp3, -1.0_dp)
    1336          44 :       CALL dbcsr_add_on_diag(tmp3, 1.0_dp) !tmp3 = 1-P
    1337          44 :       CALL dbcsr_copy(tmp2, matrix_k)
    1338          44 :       CALL dbcsr_add_on_diag(tmp2, -shift2)
    1339             :       CALL dbcsr_multiply("N", "N", -1.0_dp, tmp3, tmp2, &
    1340          44 :                           0.0_dp, tmp1, filter_eps=threshold)
    1341             :       CALL arnoldi_extremal(tmp1, max_eig, min_eig, converged=converged, &
    1342          44 :                             threshold=eps_lanczos, max_iter=max_iter_lanczos)
    1343          44 :       lumo = -max_eig + shift2
    1344             : 
    1345          44 :       IF (unit_nr > 0) THEN
    1346          10 :          WRITE (unit_nr, '(T6,A,1X,L12)') "Lanczos converged:      ", converged
    1347          10 :          WRITE (unit_nr, '(T6,A,1X,3F12.5)') 'HOMO/LUMO/gap', homo, lumo, lumo - homo
    1348             :       END IF
    1349          44 :       CALL dbcsr_release(tmp1)
    1350          44 :       CALL dbcsr_release(tmp2)
    1351          44 :       CALL dbcsr_release(tmp3)
    1352             : 
    1353          44 :    END SUBROUTINE compute_homo_lumo
    1354             : 
    1355             : ! **************************************************************************************************
    1356             : !> \brief ...
    1357             : !> \param x ...
    1358             : !> \param gamma_values ...
    1359             : !> \param i ...
    1360             : !> \return ...
    1361             : ! **************************************************************************************************
    1362      171460 :    FUNCTION evaluate_trs4_polynomial(x, gamma_values, i) RESULT(xr)
    1363             :       REAL(KIND=dp)                                      :: x
    1364             :       REAL(KIND=dp), DIMENSION(:)                        :: gamma_values
    1365             :       INTEGER                                            :: i
    1366             :       REAL(KIND=dp)                                      :: xr
    1367             : 
    1368             :       REAL(KIND=dp), PARAMETER                           :: gam_max = 6.0_dp, gam_min = 0.0_dp
    1369             : 
    1370             :       INTEGER                                            :: k
    1371             : 
    1372      171460 :       xr = x
    1373     1324124 :       DO k = 1, i
    1374     1324124 :          IF (gamma_values(k) > gam_max) THEN
    1375       14640 :             xr = 2*xr - xr**2
    1376     1138024 :          ELSE IF (gamma_values(k) < gam_min) THEN
    1377       63992 :             xr = xr**2
    1378             :          ELSE
    1379     1074032 :             xr = (xr*xr)*(4*xr - 3*xr*xr) + gamma_values(k)*xr*xr*((1 - xr)**2)
    1380             :          END IF
    1381             :       END DO
    1382      171460 :    END FUNCTION evaluate_trs4_polynomial
    1383             : 
    1384             : ! **************************************************************************************************
    1385             : !> \brief ...
    1386             : !> \param homo ...
    1387             : !> \param lumo ...
    1388             : !> \param threshold ...
    1389             : !> \return ...
    1390             : ! **************************************************************************************************
    1391         130 :    FUNCTION estimate_steps(homo, lumo, threshold) RESULT(steps)
    1392             :       REAL(KIND=dp)                                      :: homo, lumo, threshold
    1393             :       INTEGER                                            :: steps
    1394             : 
    1395             :       INTEGER                                            :: i
    1396             :       REAL(KIND=dp)                                      :: h, l, m
    1397             : 
    1398         130 :       l = lumo
    1399         130 :       h = homo
    1400             : 
    1401         926 :       DO i = 1, 200
    1402         926 :          IF (ABS(l) < threshold .AND. ABS(1 - h) < threshold) EXIT
    1403         796 :          m = 0.5_dp*(h + l)
    1404         926 :          IF (m > 0.5_dp) THEN
    1405         412 :             h = h**2
    1406         412 :             l = l**2
    1407             :          ELSE
    1408         384 :             h = 2*h - h**2
    1409         384 :             l = 2*l - l**2
    1410             :          END IF
    1411             :       END DO
    1412         130 :       steps = i - 1
    1413         130 :    END FUNCTION estimate_steps
    1414             : 
    1415             : END MODULE dm_ls_scf_methods

Generated by: LCOV version 1.15