LCOV - code coverage report
Current view: top level - src - almo_scf_optimizer.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:3130539) Lines: 1720 3155 54.5 %
Date: 2025-05-14 06:57:18 Functions: 22 35 62.9 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Optimization routines for all ALMO-based SCF methods
      10             : !> \par History
      11             : !>       2011.05 created [Rustam Z Khaliullin]
      12             : !>       2014.10 as a separate file [Rustam Z Khaliullin]
      13             : !> \author Rustam Z Khaliullin
      14             : ! **************************************************************************************************
      15             : MODULE almo_scf_optimizer
      16             :    USE almo_scf_diis_types,             ONLY: almo_scf_diis_extrapolate,&
      17             :                                               almo_scf_diis_init,&
      18             :                                               almo_scf_diis_push,&
      19             :                                               almo_scf_diis_release,&
      20             :                                               almo_scf_diis_type
      21             :    USE almo_scf_lbfgs_types,            ONLY: lbfgs_create,&
      22             :                                               lbfgs_get_direction,&
      23             :                                               lbfgs_history_type,&
      24             :                                               lbfgs_release,&
      25             :                                               lbfgs_seed
      26             :    USE almo_scf_methods,                ONLY: &
      27             :         almo_scf_ks_blk_to_tv_blk, almo_scf_ks_to_ks_blk, almo_scf_ks_to_ks_xx, &
      28             :         almo_scf_ks_xx_to_tv_xx, almo_scf_p_blk_to_t_blk, almo_scf_t_rescaling, &
      29             :         almo_scf_t_to_proj, apply_domain_operators, apply_projector, &
      30             :         construct_domain_preconditioner, construct_domain_r_down, construct_domain_s_inv, &
      31             :         construct_domain_s_sqrt, fill_matrix_with_ones, get_overlap, orthogonalize_mos, &
      32             :         pseudo_invert_diagonal_blk, xalmo_initial_guess
      33             :    USE almo_scf_qs,                     ONLY: almo_dm_to_almo_ks,&
      34             :                                               almo_dm_to_qs_env,&
      35             :                                               almo_scf_update_ks_energy,&
      36             :                                               matrix_qs_to_almo
      37             :    USE almo_scf_types,                  ONLY: almo_scf_env_type,&
      38             :                                               optimizer_options_type
      39             :    USE cell_types,                      ONLY: cell_type
      40             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      41             :    USE cp_dbcsr_api,                    ONLY: &
      42             :         dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, dbcsr_distribution_get, &
      43             :         dbcsr_distribution_type, dbcsr_filter, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, &
      44             :         dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_readonly_start, &
      45             :         dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
      46             :         dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_scale, dbcsr_set, dbcsr_type, &
      47             :         dbcsr_type_no_symmetry, dbcsr_work_create
      48             :    USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
      49             :                                               cp_dbcsr_cholesky_invert,&
      50             :                                               cp_dbcsr_cholesky_restore
      51             :    USE cp_dbcsr_contrib,                ONLY: dbcsr_add_on_diag,&
      52             :                                               dbcsr_dot,&
      53             :                                               dbcsr_frobenius_norm,&
      54             :                                               dbcsr_get_diag,&
      55             :                                               dbcsr_hadamard_product,&
      56             :                                               dbcsr_maxabs,&
      57             :                                               dbcsr_set_diag
      58             :    USE cp_external_control,             ONLY: external_control
      59             :    USE cp_files,                        ONLY: close_file,&
      60             :                                               open_file
      61             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      62             :                                               cp_logger_get_default_unit_nr,&
      63             :                                               cp_logger_type,&
      64             :                                               cp_to_string
      65             :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      66             :                                               cp_print_key_unit_nr
      67             :    USE ct_methods,                      ONLY: analytic_line_search,&
      68             :                                               ct_step_execute,&
      69             :                                               diagonalize_diagonal_blocks
      70             :    USE ct_types,                        ONLY: ct_step_env_clean,&
      71             :                                               ct_step_env_get,&
      72             :                                               ct_step_env_init,&
      73             :                                               ct_step_env_set,&
      74             :                                               ct_step_env_type
      75             :    USE domain_submatrix_methods,        ONLY: add_submatrices,&
      76             :                                               construct_submatrices,&
      77             :                                               copy_submatrices,&
      78             :                                               init_submatrices,&
      79             :                                               maxnorm_submatrices,&
      80             :                                               release_submatrices
      81             :    USE domain_submatrix_types,          ONLY: domain_map_type,&
      82             :                                               domain_submatrix_type,&
      83             :                                               select_row
      84             :    USE input_constants,                 ONLY: &
      85             :         almo_scf_diag, almo_scf_dm_sign, cg_dai_yuan, cg_fletcher, cg_fletcher_reeves, &
      86             :         cg_hager_zhang, cg_hestenes_stiefel, cg_liu_storey, cg_polak_ribiere, cg_zero, &
      87             :         op_loc_berry, op_loc_pipek, trustr_cauchy, trustr_dogleg, virt_full, &
      88             :         xalmo_case_block_diag, xalmo_case_fully_deloc, xalmo_case_normal, xalmo_prec_domain, &
      89             :         xalmo_prec_full, xalmo_prec_zero
      90             :    USE input_section_types,             ONLY: section_vals_get_subs_vals,&
      91             :                                               section_vals_type
      92             :    USE iterate_matrix,                  ONLY: determinant,&
      93             :                                               invert_Hotelling,&
      94             :                                               matrix_sqrt_Newton_Schulz
      95             :    USE kinds,                           ONLY: dp
      96             :    USE machine,                         ONLY: m_flush,&
      97             :                                               m_walltime
      98             :    USE message_passing,                 ONLY: mp_comm_type,&
      99             :                                               mp_para_env_type
     100             :    USE particle_methods,                ONLY: get_particle_set
     101             :    USE particle_types,                  ONLY: particle_type
     102             :    USE qs_energy_types,                 ONLY: qs_energy_type
     103             :    USE qs_environment_types,            ONLY: get_qs_env,&
     104             :                                               qs_environment_type
     105             :    USE qs_kind_types,                   ONLY: qs_kind_type
     106             :    USE qs_loc_utils,                    ONLY: compute_berry_operator
     107             :    USE qs_localization_methods,         ONLY: initialize_weights
     108             : #include "./base/base_uses.f90"
     109             : 
     110             :    IMPLICIT NONE
     111             : 
     112             :    PRIVATE
     113             : 
     114             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'almo_scf_optimizer'
     115             : 
     116             :    PUBLIC :: almo_scf_block_diagonal, &
     117             :              almo_scf_xalmo_eigensolver, &
     118             :              almo_scf_xalmo_trustr, &
     119             :              almo_scf_xalmo_pcg, &
     120             :              almo_scf_construct_nlmos
     121             : 
     122             :    LOGICAL, PARAMETER :: debug_mode = .FALSE.
     123             :    LOGICAL, PARAMETER :: safe_mode = .FALSE.
     124             :    LOGICAL, PARAMETER :: almo_mathematica = .FALSE.
     125             :    INTEGER, PARAMETER :: hessian_path_reuse = 1, &
     126             :                          hessian_path_assemble = 2
     127             : 
     128             : CONTAINS
     129             : 
     130             : ! **************************************************************************************************
     131             : !> \brief An SCF procedure that optimizes block-diagonal ALMOs using DIIS
     132             : !> \param qs_env ...
     133             : !> \param almo_scf_env ...
     134             : !> \param optimizer ...
     135             : !> \par History
     136             : !>       2011.06 created [Rustam Z Khaliullin]
     137             : !>       2018.09 smearing support [Ruben Staub]
     138             : !> \author Rustam Z Khaliullin
     139             : ! **************************************************************************************************
     140          76 :    SUBROUTINE almo_scf_block_diagonal(qs_env, almo_scf_env, optimizer)
     141             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     142             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     143             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     144             : 
     145             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_block_diagonal'
     146             : 
     147             :       INTEGER                                            :: handle, iscf, ispin, nspin, unit_nr
     148          76 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: local_nocc_of_domain
     149             :       LOGICAL                                            :: converged, prepare_to_exit, should_stop, &
     150             :                                                             use_diis, use_prev_as_guess
     151             :       REAL(KIND=dp) :: density_rec, energy_diff, energy_new, energy_old, error_norm, &
     152             :          error_norm_ispin, kTS_sum, prev_error_norm, t1, t2, true_mixing_fraction
     153          76 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: local_mu
     154             :       TYPE(almo_scf_diis_type), ALLOCATABLE, &
     155          76 :          DIMENSION(:)                                    :: almo_diis
     156             :       TYPE(cp_logger_type), POINTER                      :: logger
     157          76 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: matrix_mixing_old_blk
     158             :       TYPE(qs_energy_type), POINTER                      :: qs_energy
     159             : 
     160          76 :       CALL timeset(routineN, handle)
     161             : 
     162             :       ! get a useful output_unit
     163          76 :       logger => cp_get_default_logger()
     164          76 :       IF (logger%para_env%is_source()) THEN
     165          38 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     166             :       ELSE
     167             :          unit_nr = -1
     168             :       END IF
     169             : 
     170             :       ! use DIIS, it's superior to simple mixing
     171          76 :       use_diis = .TRUE.
     172          76 :       use_prev_as_guess = .FALSE.
     173             : 
     174          76 :       nspin = almo_scf_env%nspins
     175         228 :       ALLOCATE (local_mu(almo_scf_env%ndomains))
     176         228 :       ALLOCATE (local_nocc_of_domain(almo_scf_env%ndomains))
     177             : 
     178             :       ! init mixing matrices
     179         304 :       ALLOCATE (matrix_mixing_old_blk(nspin))
     180         304 :       ALLOCATE (almo_diis(nspin))
     181         152 :       DO ispin = 1, nspin
     182             :          CALL dbcsr_create(matrix_mixing_old_blk(ispin), &
     183          76 :                            template=almo_scf_env%matrix_ks_blk(ispin))
     184             :          CALL almo_scf_diis_init(diis_env=almo_diis(ispin), &
     185             :                                  sample_err=almo_scf_env%matrix_ks_blk(ispin), &
     186             :                                  sample_var=almo_scf_env%matrix_s_blk(1), &
     187             :                                  error_type=1, &
     188         152 :                                  max_length=optimizer%ndiis)
     189             :       END DO
     190             : 
     191          76 :       CALL get_qs_env(qs_env, energy=qs_energy)
     192          76 :       energy_old = qs_energy%total
     193             : 
     194          76 :       iscf = 0
     195          76 :       prepare_to_exit = .FALSE.
     196          76 :       true_mixing_fraction = 0.0_dp
     197          76 :       error_norm = 1.0E+10_dp ! arbitrary big step
     198             : 
     199          76 :       IF (unit_nr > 0) THEN
     200          38 :          WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     201          76 :             " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
     202          38 :          WRITE (unit_nr, *)
     203          38 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
     204          76 :             "Total Energy", "Change", "Convergence", "Time"
     205          38 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
     206             :       END IF
     207             : 
     208             :       ! the real SCF loop
     209          76 :       t1 = m_walltime()
     210         424 :       DO
     211             : 
     212         424 :          iscf = iscf + 1
     213             : 
     214             :          ! obtain projected KS matrix and the DIIS-error vector
     215         424 :          CALL almo_scf_ks_to_ks_blk(almo_scf_env)
     216             : 
     217             :          ! inform the DIIS handler about the new KS matrix and its error vector
     218             :          IF (use_diis) THEN
     219         848 :             DO ispin = 1, nspin
     220             :                CALL almo_scf_diis_push(diis_env=almo_diis(ispin), &
     221             :                                        var=almo_scf_env%matrix_ks_blk(ispin), &
     222         848 :                                        err=almo_scf_env%matrix_err_blk(ispin))
     223             :             END DO
     224             :          END IF
     225             : 
     226             :          ! get error_norm: choose the largest of the two spins
     227         848 :          prev_error_norm = error_norm
     228         848 :          DO ispin = 1, nspin
     229             :             !error_norm=dbcsr_frobenius_norm(almo_scf_env%matrix_err_blk(ispin))
     230         424 :             error_norm_ispin = dbcsr_maxabs(almo_scf_env%matrix_err_blk(ispin))
     231         424 :             IF (ispin .EQ. 1) error_norm = error_norm_ispin
     232           0 :             IF (ispin .GT. 1 .AND. error_norm_ispin .GT. error_norm) &
     233         424 :                error_norm = error_norm_ispin
     234             :          END DO
     235             : 
     236         424 :          IF (error_norm .LT. almo_scf_env%eps_prev_guess) THEN
     237           0 :             use_prev_as_guess = .TRUE.
     238             :          ELSE
     239         424 :             use_prev_as_guess = .FALSE.
     240             :          END IF
     241             : 
     242             :          ! check convergence
     243         424 :          converged = .TRUE.
     244         424 :          IF (error_norm .GT. optimizer%eps_error) converged = .FALSE.
     245             : 
     246             :          ! check other exit criteria: max SCF steps and timing
     247             :          CALL external_control(should_stop, "SCF", &
     248             :                                start_time=qs_env%start_time, &
     249         424 :                                target_time=qs_env%target_time)
     250         424 :          IF (should_stop .OR. iscf >= optimizer%max_iter .OR. converged) THEN
     251          76 :             prepare_to_exit = .TRUE.
     252          76 :             IF (iscf == 1) energy_new = energy_old
     253             :          END IF
     254             : 
     255             :          ! if early stopping is on do at least one iteration
     256         424 :          IF (optimizer%early_stopping_on .AND. iscf .EQ. 1) &
     257             :             prepare_to_exit = .FALSE.
     258             : 
     259         424 :          IF (.NOT. prepare_to_exit) THEN ! update the ALMOs and density matrix
     260             : 
     261             :             ! perform mixing of KS matrices
     262         348 :             IF (iscf .NE. 1) THEN
     263             :                IF (use_diis) THEN ! use diis instead of mixing
     264         544 :                   DO ispin = 1, nspin
     265             :                      CALL almo_scf_diis_extrapolate(diis_env=almo_diis(ispin), &
     266         544 :                                                     extr_var=almo_scf_env%matrix_ks_blk(ispin))
     267             :                   END DO
     268             :                ELSE ! use mixing
     269             :                   true_mixing_fraction = almo_scf_env%mixing_fraction
     270             :                   DO ispin = 1, nspin
     271             :                      CALL dbcsr_add(almo_scf_env%matrix_ks_blk(ispin), &
     272             :                                     matrix_mixing_old_blk(ispin), &
     273             :                                     true_mixing_fraction, &
     274             :                                     1.0_dp - true_mixing_fraction)
     275             :                   END DO
     276             :                END IF
     277             :             END IF
     278             :             ! save the new matrix for the future mixing
     279         696 :             DO ispin = 1, nspin
     280             :                CALL dbcsr_copy(matrix_mixing_old_blk(ispin), &
     281         696 :                                almo_scf_env%matrix_ks_blk(ispin))
     282             :             END DO
     283             : 
     284             :             ! obtain ALMOs from the new KS matrix
     285         696 :             SELECT CASE (almo_scf_env%almo_update_algorithm)
     286             :             CASE (almo_scf_diag)
     287             : 
     288         348 :                CALL almo_scf_ks_blk_to_tv_blk(almo_scf_env)
     289             : 
     290             :             CASE (almo_scf_dm_sign)
     291             : 
     292             :                ! update the density matrix
     293           0 :                DO ispin = 1, nspin
     294             : 
     295           0 :                   local_nocc_of_domain(:) = almo_scf_env%nocc_of_domain(:, ispin)
     296           0 :                   local_mu(:) = almo_scf_env%mu_of_domain(:, ispin)
     297             :                   ! RZK UPDATE! the update algorithm is removed because
     298             :                   ! RZK UPDATE! it requires updating core LS_SCF routines
     299             :                   ! RZK UPDATE! (the code exists in the CVS version)
     300           0 :                   CPABORT("Density_matrix_sign has not been tested yet")
     301             :                   ! RZK UPDATE!  CALL density_matrix_sign(almo_scf_env%matrix_p_blk(ispin),&
     302             :                   ! RZK UPDATE!          local_mu,&
     303             :                   ! RZK UPDATE!          almo_scf_env%fixed_mu,&
     304             :                   ! RZK UPDATE!          almo_scf_env%matrix_ks_blk(ispin),&
     305             :                   ! RZK UPDATE!          !matrix_mixing_old_blk(ispin),&
     306             :                   ! RZK UPDATE!          almo_scf_env%matrix_s_blk(1), &
     307             :                   ! RZK UPDATE!          almo_scf_env%matrix_s_blk_inv(1), &
     308             :                   ! RZK UPDATE!          local_nocc_of_domain,&
     309             :                   ! RZK UPDATE!          almo_scf_env%eps_filter,&
     310             :                   ! RZK UPDATE!          almo_scf_env%domain_index_of_ao)
     311             :                   ! RZK UPDATE!
     312           0 :                   almo_scf_env%mu_of_domain(:, ispin) = local_mu(:)
     313             : 
     314             :                END DO
     315             : 
     316             :                ! obtain ALMOs from matrix_p_blk: T_new = P_blk S_blk T_old
     317           0 :                CALL almo_scf_p_blk_to_t_blk(almo_scf_env, ionic=.FALSE.)
     318             : 
     319         348 :                DO ispin = 1, almo_scf_env%nspins
     320             : 
     321             :                   CALL orthogonalize_mos(ket=almo_scf_env%matrix_t_blk(ispin), &
     322             :                                          overlap=almo_scf_env%matrix_sigma_blk(ispin), &
     323             :                                          metric=almo_scf_env%matrix_s_blk(1), &
     324             :                                          retain_locality=.TRUE., &
     325             :                                          only_normalize=.FALSE., &
     326             :                                          nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     327             :                                          eps_filter=almo_scf_env%eps_filter, &
     328             :                                          order_lanczos=almo_scf_env%order_lanczos, &
     329             :                                          eps_lanczos=almo_scf_env%eps_lanczos, &
     330           0 :                                          max_iter_lanczos=almo_scf_env%max_iter_lanczos)
     331             : 
     332             :                END DO
     333             : 
     334             :             END SELECT
     335             : 
     336             :             ! obtain density matrix from ALMOs
     337         696 :             DO ispin = 1, almo_scf_env%nspins
     338             : 
     339             :                !! Application of an occupation-rescaling trick for smearing, if requested
     340         348 :                IF (almo_scf_env%smear) THEN
     341             :                   CALL almo_scf_t_rescaling(matrix_t=almo_scf_env%matrix_t_blk(ispin), &
     342             :                                             mo_energies=almo_scf_env%mo_energies(:, ispin), &
     343             :                                             mu_of_domain=almo_scf_env%mu_of_domain(:, ispin), &
     344             :                                             real_ne_of_domain=almo_scf_env%real_ne_of_domain(:, ispin), &
     345             :                                             spin_kTS=almo_scf_env%kTS(ispin), &
     346             :                                             smear_e_temp=almo_scf_env%smear_e_temp, &
     347             :                                             ndomains=almo_scf_env%ndomains, &
     348          16 :                                             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin))
     349             :                END IF
     350             : 
     351             :                CALL almo_scf_t_to_proj(t=almo_scf_env%matrix_t_blk(ispin), &
     352             :                                        p=almo_scf_env%matrix_p(ispin), &
     353             :                                        eps_filter=almo_scf_env%eps_filter, &
     354             :                                        orthog_orbs=.FALSE., &
     355             :                                        nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     356             :                                        s=almo_scf_env%matrix_s(1), &
     357             :                                        sigma=almo_scf_env%matrix_sigma(ispin), &
     358             :                                        sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
     359             :                                        use_guess=use_prev_as_guess, &
     360             :                                        smear=almo_scf_env%smear, &
     361             :                                        algorithm=almo_scf_env%sigma_inv_algorithm, &
     362             :                                        inverse_accelerator=almo_scf_env%order_lanczos, &
     363             :                                        inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
     364             :                                        eps_lanczos=almo_scf_env%eps_lanczos, &
     365             :                                        max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
     366             :                                        para_env=almo_scf_env%para_env, &
     367         696 :                                        blacs_env=almo_scf_env%blacs_env)
     368             : 
     369             :             END DO
     370             : 
     371         348 :             IF (almo_scf_env%nspins == 1) THEN
     372         348 :                CALL dbcsr_scale(almo_scf_env%matrix_p(1), 2.0_dp)
     373             :                !! Rescaling electronic entropy contribution by spin_factor
     374         348 :                IF (almo_scf_env%smear) THEN
     375          16 :                   almo_scf_env%kTS(1) = almo_scf_env%kTS(1)*2.0_dp
     376             :                END IF
     377             :             END IF
     378             : 
     379         348 :             IF (almo_scf_env%smear) THEN
     380          32 :                kTS_sum = SUM(almo_scf_env%kTS)
     381             :             ELSE
     382         332 :                kTS_sum = 0.0_dp
     383             :             END IF
     384             : 
     385             :             ! compute the new KS matrix and new energy
     386             :             CALL almo_dm_to_almo_ks(qs_env, &
     387             :                                     almo_scf_env%matrix_p, &
     388             :                                     almo_scf_env%matrix_ks, &
     389             :                                     energy_new, &
     390             :                                     almo_scf_env%eps_filter, &
     391             :                                     almo_scf_env%mat_distr_aos, &
     392             :                                     smear=almo_scf_env%smear, &
     393         348 :                                     kTS_sum=kTS_sum)
     394             : 
     395             :          END IF ! prepare_to_exit
     396             : 
     397         424 :          energy_diff = energy_new - energy_old
     398         424 :          energy_old = energy_new
     399         424 :          almo_scf_env%almo_scf_energy = energy_new
     400             : 
     401         424 :          t2 = m_walltime()
     402             :          ! brief report on the current SCF loop
     403         424 :          IF (unit_nr > 0) THEN
     404         212 :             WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') "ALMO SCF DIIS", &
     405         212 :                iscf, &
     406         424 :                energy_new, energy_diff, error_norm, t2 - t1
     407             :          END IF
     408         424 :          t1 = m_walltime()
     409             : 
     410         424 :          IF (prepare_to_exit) EXIT
     411             : 
     412             :       END DO ! end scf cycle
     413             : 
     414             :       !! Print number of electrons recovered if smearing was requested
     415          76 :       IF (almo_scf_env%smear) THEN
     416           8 :          DO ispin = 1, nspin
     417           4 :             CALL dbcsr_dot(almo_scf_env%matrix_p(ispin), almo_scf_env%matrix_s(1), density_rec)
     418           8 :             IF (unit_nr > 0) THEN
     419           2 :                WRITE (unit_nr, '(T2,A20,F23.10)') "Electrons recovered:", density_rec
     420             :             END IF
     421             :          END DO
     422             :       END IF
     423             : 
     424          76 :       IF (.NOT. converged .AND. (.NOT. optimizer%early_stopping_on)) THEN
     425           0 :          IF (unit_nr > 0) THEN
     426           0 :             CPABORT("SCF for block-diagonal ALMOs not converged!")
     427             :          END IF
     428             :       END IF
     429             : 
     430         152 :       DO ispin = 1, nspin
     431          76 :          CALL dbcsr_release(matrix_mixing_old_blk(ispin))
     432         152 :          CALL almo_scf_diis_release(diis_env=almo_diis(ispin))
     433             :       END DO
     434         152 :       DEALLOCATE (almo_diis)
     435          76 :       DEALLOCATE (matrix_mixing_old_blk)
     436          76 :       DEALLOCATE (local_mu)
     437          76 :       DEALLOCATE (local_nocc_of_domain)
     438             : 
     439          76 :       CALL timestop(handle)
     440             : 
     441          76 :    END SUBROUTINE almo_scf_block_diagonal
     442             : 
     443             : ! **************************************************************************************************
     444             : !> \brief An eigensolver-based SCF to optimize extended ALMOs (i.e. ALMOs on
     445             : !>        overlapping domains)
     446             : !> \param qs_env ...
     447             : !> \param almo_scf_env ...
     448             : !> \param optimizer ...
     449             : !> \par History
     450             : !>       2013.03 created [Rustam Z Khaliullin]
     451             : !>       2018.09 smearing support [Ruben Staub]
     452             : !> \author Rustam Z Khaliullin
     453             : ! **************************************************************************************************
     454           2 :    SUBROUTINE almo_scf_xalmo_eigensolver(qs_env, almo_scf_env, optimizer)
     455             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     456             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     457             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     458             : 
     459             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_eigensolver'
     460             : 
     461             :       INTEGER                                            :: handle, iscf, ispin, nspin, unit_nr
     462             :       LOGICAL                                            :: converged, prepare_to_exit, should_stop
     463             :       REAL(KIND=dp) :: denergy_tot, density_rec, energy_diff, energy_new, energy_old, error_norm, &
     464             :          error_norm_0, kTS_sum, spin_factor, t1, t2
     465             :       REAL(KIND=dp), DIMENSION(2)                        :: denergy_spin
     466             :       TYPE(almo_scf_diis_type), ALLOCATABLE, &
     467           2 :          DIMENSION(:)                                    :: almo_diis
     468             :       TYPE(cp_logger_type), POINTER                      :: logger
     469             :       TYPE(dbcsr_type)                                   :: matrix_p_almo_scf_converged
     470             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
     471           2 :          DIMENSION(:, :)                                 :: submatrix_mixing_old_blk
     472             : 
     473           2 :       CALL timeset(routineN, handle)
     474             : 
     475             :       ! get a useful output_unit
     476           2 :       logger => cp_get_default_logger()
     477           2 :       IF (logger%para_env%is_source()) THEN
     478           1 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     479             :       ELSE
     480           1 :          unit_nr = -1
     481             :       END IF
     482             : 
     483           2 :       nspin = almo_scf_env%nspins
     484           2 :       IF (nspin == 1) THEN
     485           2 :          spin_factor = 2.0_dp
     486             :       ELSE
     487           0 :          spin_factor = 1.0_dp
     488             :       END IF
     489             : 
     490             :       ! RZK-warning domain_s_sqrt and domain_s_sqrt_inv do not have spin
     491             :       ! components yet (may be used later)
     492           2 :       ispin = 1
     493             :       CALL construct_domain_s_sqrt( &
     494             :          matrix_s=almo_scf_env%matrix_s(1), &
     495             :          subm_s_sqrt=almo_scf_env%domain_s_sqrt(:, ispin), &
     496             :          subm_s_sqrt_inv=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
     497             :          dpattern=almo_scf_env%quench_t(ispin), &
     498             :          map=almo_scf_env%domain_map(ispin), &
     499           2 :          node_of_domain=almo_scf_env%cpu_of_domain)
     500             :       ! TRY: construct s_inv
     501             :       !CALL construct_domain_s_inv(&
     502             :       !       matrix_s=almo_scf_env%matrix_s(1),&
     503             :       !       subm_s_inv=almo_scf_env%domain_s_inv(:,ispin),&
     504             :       !       dpattern=almo_scf_env%quench_t(ispin),&
     505             :       !       map=almo_scf_env%domain_map(ispin),&
     506             :       !       node_of_domain=almo_scf_env%cpu_of_domain)
     507             : 
     508             :       ! construct the domain template for the occupied orbitals
     509           4 :       DO ispin = 1, nspin
     510             :          ! RZK-warning we need only the matrix structure, not data
     511             :          ! replace construct_submatrices with lighter procedure with
     512             :          ! no heavy communications
     513             :          CALL construct_submatrices( &
     514             :             matrix=almo_scf_env%quench_t(ispin), &
     515             :             submatrix=almo_scf_env%domain_t(:, ispin), &
     516             :             distr_pattern=almo_scf_env%quench_t(ispin), &
     517             :             domain_map=almo_scf_env%domain_map(ispin), &
     518             :             node_of_domain=almo_scf_env%cpu_of_domain, &
     519           4 :             job_type=select_row)
     520             :       END DO
     521             : 
     522             :       ! init mixing matrices
     523          20 :       ALLOCATE (submatrix_mixing_old_blk(almo_scf_env%ndomains, nspin))
     524           2 :       CALL init_submatrices(submatrix_mixing_old_blk)
     525           8 :       ALLOCATE (almo_diis(nspin))
     526             : 
     527             :       ! TRY: construct block-projector
     528             :       !ALLOCATE(submatrix_tmp(almo_scf_env%ndomains))
     529             :       !DO ispin=1,nspin
     530             :       !   CALL init_submatrices(submatrix_tmp)
     531             :       !   CALL construct_domain_r_down(&
     532             :       !           matrix_t=almo_scf_env%matrix_t_blk(ispin),&
     533             :       !           matrix_sigma_inv=almo_scf_env%matrix_sigma_inv(ispin),&
     534             :       !           matrix_s=almo_scf_env%matrix_s(1),&
     535             :       !           subm_r_down=submatrix_tmp(:),&
     536             :       !           dpattern=almo_scf_env%quench_t(ispin),&
     537             :       !           map=almo_scf_env%domain_map(ispin),&
     538             :       !           node_of_domain=almo_scf_env%cpu_of_domain,&
     539             :       !           filter_eps=almo_scf_env%eps_filter)
     540             :       !   CALL multiply_submatrices('N','N',1.0_dp,&
     541             :       !           submatrix_tmp(:),&
     542             :       !           almo_scf_env%domain_s_inv(:,1),0.0_dp,&
     543             :       !           almo_scf_env%domain_r_down_up(:,ispin))
     544             :       !   CALL release_submatrices(submatrix_tmp)
     545             :       !ENDDO
     546             :       !DEALLOCATE(submatrix_tmp)
     547             : 
     548           4 :       DO ispin = 1, nspin
     549             :          ! use s_sqrt since they are already properly constructed
     550             :          ! and have the same distributions as domain_err and domain_ks_xx
     551             :          CALL almo_scf_diis_init(diis_env=almo_diis(ispin), &
     552             :                                  sample_err=almo_scf_env%domain_s_sqrt(:, ispin), &
     553             :                                  error_type=1, &
     554           4 :                                  max_length=optimizer%ndiis)
     555             :       END DO
     556             : 
     557           2 :       denergy_tot = 0.0_dp
     558           2 :       energy_old = 0.0_dp
     559           2 :       iscf = 0
     560           2 :       prepare_to_exit = .FALSE.
     561             : 
     562             :       ! the SCF loop
     563           2 :       t1 = m_walltime()
     564           2 :       DO
     565             : 
     566           2 :          iscf = iscf + 1
     567             : 
     568             :          ! obtain projected KS matrix and the DIIS-error vector
     569           2 :          CALL almo_scf_ks_to_ks_xx(almo_scf_env)
     570             : 
     571             :          ! inform the DIIS handler about the new KS matrix and its error vector
     572           4 :          DO ispin = 1, nspin
     573             :             CALL almo_scf_diis_push(diis_env=almo_diis(ispin), &
     574             :                                     d_var=almo_scf_env%domain_ks_xx(:, ispin), &
     575           4 :                                     d_err=almo_scf_env%domain_err(:, ispin))
     576             :          END DO
     577             : 
     578             :          ! check convergence
     579           2 :          converged = .TRUE.
     580           2 :          DO ispin = 1, nspin
     581             :             !error_norm=dbcsr_frobenius_norm(almo_scf_env%matrix_err_blk(ispin))
     582           2 :             error_norm = dbcsr_maxabs(almo_scf_env%matrix_err_xx(ispin))
     583             :             CALL maxnorm_submatrices(almo_scf_env%domain_err(:, ispin), &
     584           2 :                                      norm=error_norm_0)
     585           2 :             IF (error_norm .GT. optimizer%eps_error) THEN
     586             :                converged = .FALSE.
     587             :                EXIT ! no need to check the other spin
     588             :             END IF
     589             :          END DO
     590             :          ! check other exit criteria: max SCF steps and timing
     591             :          CALL external_control(should_stop, "SCF", &
     592             :                                start_time=qs_env%start_time, &
     593           2 :                                target_time=qs_env%target_time)
     594           2 :          IF (should_stop .OR. iscf >= optimizer%max_iter .OR. converged) THEN
     595           0 :             prepare_to_exit = .TRUE.
     596             :          END IF
     597             : 
     598             :          ! if early stopping is on do at least one iteration
     599           2 :          IF (optimizer%early_stopping_on .AND. iscf .EQ. 1) &
     600             :             prepare_to_exit = .FALSE.
     601             : 
     602           2 :          IF (.NOT. prepare_to_exit) THEN ! update the ALMOs and density matrix
     603             : 
     604             :             ! perform mixing of KS matrices
     605           2 :             IF (iscf .NE. 1) THEN
     606             :                IF (.FALSE.) THEN ! use diis instead of mixing
     607             :                   DO ispin = 1, nspin
     608             :                      CALL add_submatrices( &
     609             :                         almo_scf_env%mixing_fraction, &
     610             :                         almo_scf_env%domain_ks_xx(:, ispin), &
     611             :                         1.0_dp - almo_scf_env%mixing_fraction, &
     612             :                         submatrix_mixing_old_blk(:, ispin), &
     613             :                         'N')
     614             :                   END DO
     615             :                ELSE
     616           0 :                   DO ispin = 1, nspin
     617             :                      CALL almo_scf_diis_extrapolate(diis_env=almo_diis(ispin), &
     618           0 :                                                     d_extr_var=almo_scf_env%domain_ks_xx(:, ispin))
     619             :                   END DO
     620             :                END IF
     621             :             END IF
     622             :             ! save the new matrix for the future mixing
     623           4 :             DO ispin = 1, nspin
     624             :                CALL copy_submatrices( &
     625             :                   almo_scf_env%domain_ks_xx(:, ispin), &
     626             :                   submatrix_mixing_old_blk(:, ispin), &
     627           4 :                   copy_data=.TRUE.)
     628             :             END DO
     629             : 
     630             :             ! obtain a new set of ALMOs from the updated KS matrix
     631           2 :             CALL almo_scf_ks_xx_to_tv_xx(almo_scf_env)
     632             : 
     633             :             ! update the density matrix
     634           4 :             DO ispin = 1, nspin
     635             : 
     636             :                ! save the initial density matrix (to get the perturbative energy lowering)
     637           2 :                IF (iscf .EQ. 1) THEN
     638             :                   CALL dbcsr_create(matrix_p_almo_scf_converged, &
     639           2 :                                     template=almo_scf_env%matrix_p(ispin))
     640             :                   CALL dbcsr_copy(matrix_p_almo_scf_converged, &
     641           2 :                                   almo_scf_env%matrix_p(ispin))
     642             :                END IF
     643             : 
     644             :                !! Application of an occupation-rescaling trick for smearing, if requested
     645           2 :                IF (almo_scf_env%smear) THEN
     646             :                   CALL almo_scf_t_rescaling(matrix_t=almo_scf_env%matrix_t_blk(ispin), &
     647             :                                             mo_energies=almo_scf_env%mo_energies(:, ispin), &
     648             :                                             mu_of_domain=almo_scf_env%mu_of_domain(:, ispin), &
     649             :                                             real_ne_of_domain=almo_scf_env%real_ne_of_domain(:, ispin), &
     650             :                                             spin_kTS=almo_scf_env%kTS(ispin), &
     651             :                                             smear_e_temp=almo_scf_env%smear_e_temp, &
     652             :                                             ndomains=almo_scf_env%ndomains, &
     653           0 :                                             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin))
     654             :                END IF
     655             : 
     656             :                ! update now
     657             :                CALL almo_scf_t_to_proj( &
     658             :                   t=almo_scf_env%matrix_t(ispin), &
     659             :                   p=almo_scf_env%matrix_p(ispin), &
     660             :                   eps_filter=almo_scf_env%eps_filter, &
     661             :                   orthog_orbs=.FALSE., &
     662             :                   nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     663             :                   s=almo_scf_env%matrix_s(1), &
     664             :                   sigma=almo_scf_env%matrix_sigma(ispin), &
     665             :                   sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
     666             :                   use_guess=.TRUE., &
     667             :                   smear=almo_scf_env%smear, &
     668             :                   algorithm=almo_scf_env%sigma_inv_algorithm, &
     669             :                   inverse_accelerator=almo_scf_env%order_lanczos, &
     670             :                   inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
     671             :                   eps_lanczos=almo_scf_env%eps_lanczos, &
     672             :                   max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
     673             :                   para_env=almo_scf_env%para_env, &
     674           2 :                   blacs_env=almo_scf_env%blacs_env)
     675           2 :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), spin_factor)
     676             :                !! Rescaling electronic entropy contribution by spin_factor
     677           2 :                IF (almo_scf_env%smear) THEN
     678           0 :                   almo_scf_env%kTS(ispin) = almo_scf_env%kTS(ispin)*spin_factor
     679             :                END IF
     680             : 
     681             :                ! obtain perturbative estimate (at no additional cost)
     682             :                ! of the energy lowering relative to the block-diagonal ALMOs
     683           4 :                IF (iscf .EQ. 1) THEN
     684             : 
     685             :                   CALL dbcsr_add(matrix_p_almo_scf_converged, &
     686           2 :                                  almo_scf_env%matrix_p(ispin), -1.0_dp, 1.0_dp)
     687             :                   CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin), &
     688             :                                  matrix_p_almo_scf_converged, &
     689           2 :                                  denergy_spin(ispin))
     690             : 
     691           2 :                   CALL dbcsr_release(matrix_p_almo_scf_converged)
     692             : 
     693             :                   !! RS-WARNING: If smearing ALMO is requested, electronic entropy contribution should probably be included here
     694             : 
     695           2 :                   denergy_tot = denergy_tot + denergy_spin(ispin)
     696             : 
     697             :                   ! RZK-warning Energy correction can be evaluated using matrix_x
     698             :                   ! as shown in the attempt below and in the PCG procedure.
     699             :                   ! Using matrix_x allows immediate decomposition of the energy
     700             :                   ! lowering into 2-body components for EDA. However, it does not
     701             :                   ! work here because the diagonalization routine does not necessarily
     702             :                   ! produce orbitals with the same sign as the block-diagonal ALMOs
     703             :                   ! Any fixes?!
     704             : 
     705             :                   !CALL dbcsr_init(matrix_x)
     706             :                   !CALL dbcsr_create(matrix_x,&
     707             :                   !        template=almo_scf_env%matrix_t(ispin))
     708             :                   !
     709             :                   !CALL dbcsr_init(matrix_tmp_no)
     710             :                   !CALL dbcsr_create(matrix_tmp_no,&
     711             :                   !        template=almo_scf_env%matrix_t(ispin))
     712             :                   !
     713             :                   !CALL dbcsr_copy(matrix_x,&
     714             :                   !        almo_scf_env%matrix_t_blk(ispin))
     715             :                   !CALL dbcsr_add(matrix_x,almo_scf_env%matrix_t(ispin),&
     716             :                   !        -1.0_dp,1.0_dp)
     717             : 
     718             :                   !CALL dbcsr_dot(matrix_x, almo_scf_env%matrix_err_xx(ispin),denergy)
     719             : 
     720             :                   !denergy=denergy*spin_factor
     721             : 
     722             :                   !IF (unit_nr>0) THEN
     723             :                   !   WRITE(unit_nr,*) "_ENERGY-0: ", almo_scf_env%almo_scf_energy
     724             :                   !   WRITE(unit_nr,*) "_ENERGY-D: ", denergy
     725             :                   !   WRITE(unit_nr,*) "_ENERGY-F: ", almo_scf_env%almo_scf_energy+denergy
     726             :                   !ENDIF
     727             :                   !! RZK-warning update will not work since the energy is overwritten almost immediately
     728             :                   !!CALL almo_scf_update_ks_energy(qs_env,&
     729             :                   !!        almo_scf_env%almo_scf_energy+denergy)
     730             :                   !!
     731             : 
     732             :                   !! print out the results of the decomposition analysis
     733             :                   !CALL dbcsr_hadamard_product(matrix_x,&
     734             :                   !        almo_scf_env%matrix_err_xx(ispin),&
     735             :                   !        matrix_tmp_no)
     736             :                   !CALL dbcsr_scale(matrix_tmp_no,spin_factor)
     737             :                   !CALL dbcsr_filter(matrix_tmp_no,almo_scf_env%eps_filter)
     738             :                   !
     739             :                   !IF (unit_nr>0) THEN
     740             :                   !   WRITE(unit_nr,*)
     741             :                   !   WRITE(unit_nr,'(T2,A)') "DECOMPOSITION OF THE DELOCALIZATION ENERGY"
     742             :                   !ENDIF
     743             : 
     744             :                   !mynode=dbcsr_mp_mynode(dbcsr_distribution_mp(&
     745             :                   !   dbcsr_distribution(matrix_tmp_no)))
     746             :                   !WRITE(mynodestr,'(I6.6)') mynode
     747             :                   !mylogfile='EDA.'//TRIM(ADJUSTL(mynodestr))
     748             :                   !OPEN (iunit,file=mylogfile,status='REPLACE')
     749             :                   !CALL print_block_sum(matrix_tmp_no,iunit)
     750             :                   !CLOSE(iunit)
     751             :                   !
     752             :                   !CALL dbcsr_release(matrix_tmp_no)
     753             :                   !CALL dbcsr_release(matrix_x)
     754             : 
     755             :                END IF ! iscf.eq.1
     756             : 
     757             :             END DO
     758             : 
     759             :             ! print out the energy lowering
     760           2 :             IF (iscf .EQ. 1) THEN
     761             :                CALL energy_lowering_report( &
     762             :                   unit_nr=unit_nr, &
     763             :                   ref_energy=almo_scf_env%almo_scf_energy, &
     764           2 :                   energy_lowering=denergy_tot)
     765             :                CALL almo_scf_update_ks_energy(qs_env, &
     766             :                                               energy=almo_scf_env%almo_scf_energy, &
     767           2 :                                               energy_singles_corr=denergy_tot)
     768             :             END IF
     769             : 
     770             :             ! compute the new KS matrix and new energy
     771           2 :             IF (.NOT. almo_scf_env%perturbative_delocalization) THEN
     772             : 
     773           0 :                IF (almo_scf_env%smear) THEN
     774           0 :                   kTS_sum = SUM(almo_scf_env%kTS)
     775             :                ELSE
     776           0 :                   kTS_sum = 0.0_dp
     777             :                END IF
     778             : 
     779             :                CALL almo_dm_to_almo_ks(qs_env, &
     780             :                                        almo_scf_env%matrix_p, &
     781             :                                        almo_scf_env%matrix_ks, &
     782             :                                        energy_new, &
     783             :                                        almo_scf_env%eps_filter, &
     784             :                                        almo_scf_env%mat_distr_aos, &
     785             :                                        smear=almo_scf_env%smear, &
     786           0 :                                        kTS_sum=kTS_sum)
     787             :             END IF
     788             : 
     789             :          END IF ! prepare_to_exit
     790             : 
     791           2 :          IF (almo_scf_env%perturbative_delocalization) THEN
     792             : 
     793             :             ! exit after the first step if we do not need the SCF procedure
     794           2 :             CALL almo_dm_to_qs_env(qs_env, almo_scf_env%matrix_p, almo_scf_env%mat_distr_aos)
     795           2 :             converged = .TRUE.
     796           2 :             prepare_to_exit = .TRUE.
     797             : 
     798             :          ELSE ! not a perturbative treatment
     799             : 
     800           0 :             energy_diff = energy_new - energy_old
     801           0 :             energy_old = energy_new
     802           0 :             almo_scf_env%almo_scf_energy = energy_new
     803             : 
     804           0 :             t2 = m_walltime()
     805             :             ! brief report on the current SCF loop
     806           0 :             IF (unit_nr > 0) THEN
     807           0 :                WRITE (unit_nr, '(T2,A,I6,F20.9,E11.3,E11.3,E11.3,F8.2)') "ALMO SCF", &
     808           0 :                   iscf, &
     809           0 :                   energy_new, energy_diff, error_norm, error_norm_0, t2 - t1
     810             :             END IF
     811           0 :             t1 = m_walltime()
     812             : 
     813             :          END IF
     814             : 
     815           2 :          IF (prepare_to_exit) EXIT
     816             : 
     817             :       END DO ! end scf cycle
     818             : 
     819             :       !! Print number of electrons recovered if smearing was requested
     820           2 :       IF (almo_scf_env%smear) THEN
     821           0 :          DO ispin = 1, nspin
     822           0 :             CALL dbcsr_dot(almo_scf_env%matrix_p(ispin), almo_scf_env%matrix_s(1), density_rec)
     823           0 :             IF (unit_nr > 0) THEN
     824           0 :                WRITE (unit_nr, '(T2,A20,F23.10)') "Electrons recovered:", density_rec
     825             :             END IF
     826             :          END DO
     827             :       END IF
     828             : 
     829           2 :       IF (.NOT. converged .AND. .NOT. optimizer%early_stopping_on) THEN
     830           0 :          CPABORT("SCF for ALMOs on overlapping domains not converged!")
     831             :       END IF
     832             : 
     833           4 :       DO ispin = 1, nspin
     834           2 :          CALL release_submatrices(submatrix_mixing_old_blk(:, ispin))
     835           4 :          CALL almo_scf_diis_release(diis_env=almo_diis(ispin))
     836             :       END DO
     837           4 :       DEALLOCATE (almo_diis)
     838          12 :       DEALLOCATE (submatrix_mixing_old_blk)
     839             : 
     840           2 :       CALL timestop(handle)
     841             : 
     842           2 :    END SUBROUTINE almo_scf_xalmo_eigensolver
     843             : 
     844             : ! **************************************************************************************************
     845             : !> \brief Optimization of ALMOs using PCG-like minimizers
     846             : !> \param qs_env ...
     847             : !> \param almo_scf_env ...
     848             : !> \param optimizer   controls the optimization algorithm
     849             : !> \param quench_t ...
     850             : !> \param matrix_t_in ...
     851             : !> \param matrix_t_out ...
     852             : !> \param assume_t0_q0x - since it is extremely difficult to converge the iterative
     853             : !>                        procedure using T as an optimized variable, assume
     854             : !>                        T = T_0 + (1-R_0)*X and optimize X
     855             : !>                        T_0 is assumed to be the zero-delocalization reference
     856             : !> \param perturbation_only - perturbative (do not update Hamiltonian)
     857             : !> \param special_case   to reduce the overhead special cases are implemented:
     858             : !>                       xalmo_case_normal - no special case (i.e. xALMOs)
     859             : !>                       xalmo_case_block_diag
     860             : !>                       xalmo_case_fully_deloc
     861             : !> \par History
     862             : !>       2011.11 created [Rustam Z Khaliullin]
     863             : !> \author Rustam Z Khaliullin
     864             : ! **************************************************************************************************
     865          86 :    SUBROUTINE almo_scf_xalmo_pcg(qs_env, almo_scf_env, optimizer, quench_t, &
     866             :                                  matrix_t_in, matrix_t_out, assume_t0_q0x, perturbation_only, &
     867             :                                  special_case)
     868             : 
     869             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     870             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     871             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     872             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
     873             :          INTENT(INOUT)                                   :: quench_t, matrix_t_in, matrix_t_out
     874             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, perturbation_only
     875             :       INTEGER, INTENT(IN), OPTIONAL                      :: special_case
     876             : 
     877             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_pcg'
     878             : 
     879             :       CHARACTER(LEN=20)                                  :: iter_type
     880             :       INTEGER :: cg_iteration, dim_op, fixed_line_search_niter, handle, idim0, ielem, ispin, &
     881             :          iteration, line_search_iteration, max_iter, my_special_case, ndomains, nmo, nspins, &
     882             :          outer_iteration, outer_max_iter, prec_type, reim, unit_nr
     883          86 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
     884             :       LOGICAL :: blissful_neglect, converged, just_started, line_search, normalize_orbitals, &
     885             :          optimize_theta, outer_prepare_to_exit, penalty_occ_local, penalty_occ_vol, &
     886             :          prepare_to_exit, reset_conjugator, skip_grad, use_guess
     887          86 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: reim_diag, weights, z2
     888             :       REAL(kind=dp) :: appr_sec_der, beta, denom, denom2, e0, e1, energy_coeff, energy_diff, &
     889             :          energy_new, energy_old, eps_skip_gradients, fval, g0, g1, grad_norm, grad_norm_frob, &
     890             :          line_search_error, localiz_coeff, localization_obj_function, next_step_size_guess, &
     891             :          penalty_amplitude, penalty_func_new, spin_factor, step_size, t1, t2, tempreal
     892          86 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: grad_norm_spin, &
     893          86 :                                                             penalty_occ_vol_g_prefactor, &
     894          86 :                                                             penalty_occ_vol_h_prefactor
     895             :       TYPE(cell_type), POINTER                           :: cell
     896             :       TYPE(cp_logger_type), POINTER                      :: logger
     897          86 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: qs_matrix_s
     898          86 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set_almo, op_sm_set_qs
     899          86 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: FTsiginv, grad, m_sig_sqrti_ii, m_t_in_local, &
     900          86 :          m_theta, prec_vv, prev_grad, prev_minus_prec_grad, prev_step, siginvTFTsiginv, ST, step, &
     901          86 :          STsiginv_0, tempNOcc, tempNOcc_1, tempOccOcc
     902             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
     903          86 :          DIMENSION(:, :)                                 :: bad_modes_projector_down, domain_r_down
     904             :       TYPE(mp_comm_type)                                 :: group
     905             : 
     906          86 :       CALL timeset(routineN, handle)
     907             : 
     908          86 :       my_special_case = xalmo_case_normal
     909          86 :       IF (PRESENT(special_case)) my_special_case = special_case
     910             : 
     911             :       ! get a useful output_unit
     912          86 :       logger => cp_get_default_logger()
     913          86 :       IF (logger%para_env%is_source()) THEN
     914          43 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     915             :       ELSE
     916             :          unit_nr = -1
     917             :       END IF
     918             : 
     919          86 :       nspins = almo_scf_env%nspins
     920             : 
     921             :       ! if unprojected XALMOs are optimized
     922             :       ! then we must use the "blissful_neglect" procedure
     923          86 :       blissful_neglect = .FALSE.
     924          86 :       IF (my_special_case .EQ. xalmo_case_normal .AND. .NOT. assume_t0_q0x) THEN
     925          14 :          blissful_neglect = .TRUE.
     926             :       END IF
     927             : 
     928          86 :       IF (unit_nr > 0) THEN
     929          43 :          WRITE (unit_nr, *)
     930           2 :          SELECT CASE (my_special_case)
     931             :          CASE (xalmo_case_block_diag)
     932           2 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     933           4 :                " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
     934             :          CASE (xalmo_case_fully_deloc)
     935          22 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     936          44 :                " Optimization of fully delocalized MOs ", REPEAT("-", 20)
     937             :          CASE (xalmo_case_normal)
     938          43 :             IF (blissful_neglect) THEN
     939           7 :                WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 25), &
     940          14 :                   " LCP optimization of XALMOs ", REPEAT("-", 26)
     941             :             ELSE
     942          12 :                WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 27), &
     943          24 :                   " Optimization of XALMOs ", REPEAT("-", 28)
     944             :             END IF
     945             :          END SELECT
     946          43 :          WRITE (unit_nr, *)
     947          43 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
     948          86 :             "Objective Function", "Change", "Convergence", "Time"
     949          43 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
     950             :       END IF
     951             : 
     952             :       ! set local parameters using developer's keywords
     953             :       ! RZK-warning: change to normal keywords later
     954          86 :       optimize_theta = almo_scf_env%logical05
     955          86 :       eps_skip_gradients = almo_scf_env%real01
     956             : 
     957             :       ! penalty amplitude adjusts the strength of volume conservation
     958          86 :       energy_coeff = 1.0_dp !optimizer%opt_penalty%energy_coeff
     959          86 :       localiz_coeff = 0.0_dp !optimizer%opt_penalty%occ_loc_coeff
     960          86 :       penalty_amplitude = 0.0_dp !optimizer%opt_penalty%occ_vol_coeff
     961          86 :       penalty_occ_vol = .FALSE. !( optimizer%opt_penalty%occ_vol_method &
     962             :       !.NE. penalty_type_none .AND. my_special_case .EQ. xalmo_case_fully_deloc )
     963          86 :       penalty_occ_local = .FALSE. !( optimizer%opt_penalty%occ_loc_method &
     964             :       !.NE. penalty_type_none .AND. my_special_case .EQ. xalmo_case_fully_deloc )
     965          86 :       normalize_orbitals = penalty_occ_vol .OR. penalty_occ_local
     966         258 :       ALLOCATE (penalty_occ_vol_g_prefactor(nspins))
     967         172 :       ALLOCATE (penalty_occ_vol_h_prefactor(nspins))
     968         172 :       penalty_occ_vol_g_prefactor(:) = 0.0_dp
     969         172 :       penalty_occ_vol_h_prefactor(:) = 0.0_dp
     970          86 :       penalty_func_new = 0.0_dp
     971             : 
     972             :       ! preconditioner control
     973          86 :       prec_type = optimizer%preconditioner
     974             : 
     975             :       ! control of the line search
     976          86 :       fixed_line_search_niter = 0 ! init to zero, change when eps is small enough
     977             : 
     978          86 :       IF (nspins == 1) THEN
     979          86 :          spin_factor = 2.0_dp
     980             :       ELSE
     981           0 :          spin_factor = 1.0_dp
     982             :       END IF
     983             : 
     984         172 :       ALLOCATE (grad_norm_spin(nspins))
     985         258 :       ALLOCATE (nocc(nspins))
     986             : 
     987             :       ! create a local copy of matrix_t_in because
     988             :       ! matrix_t_in and matrix_t_out can be the same matrix
     989             :       ! we need to make sure data in matrix_t_in is intact
     990             :       ! after we start writing to matrix_t_out
     991         344 :       ALLOCATE (m_t_in_local(nspins))
     992         172 :       DO ispin = 1, nspins
     993             :          CALL dbcsr_create(m_t_in_local(ispin), &
     994             :                            template=matrix_t_in(ispin), &
     995          86 :                            matrix_type=dbcsr_type_no_symmetry)
     996         172 :          CALL dbcsr_copy(m_t_in_local(ispin), matrix_t_in(ispin))
     997             :       END DO
     998             : 
     999             :       ! m_theta contains a set of variational parameters
    1000             :       ! that define one-electron orbitals (simple, projected, etc.)
    1001         258 :       ALLOCATE (m_theta(nspins))
    1002         172 :       DO ispin = 1, nspins
    1003             :          CALL dbcsr_create(m_theta(ispin), &
    1004             :                            template=matrix_t_out(ispin), &
    1005         172 :                            matrix_type=dbcsr_type_no_symmetry)
    1006             :       END DO
    1007             : 
    1008             :       ! Compute localization matrices
    1009             :       IF (penalty_occ_local) THEN
    1010             : 
    1011             :          CALL get_qs_env(qs_env=qs_env, &
    1012             :                          matrix_s=qs_matrix_s, &
    1013             :                          cell=cell)
    1014             : 
    1015             :          IF (cell%orthorhombic) THEN
    1016             :             dim_op = 3
    1017             :          ELSE
    1018             :             dim_op = 6
    1019             :          END IF
    1020             :          ALLOCATE (weights(6))
    1021             :          weights = 0.0_dp
    1022             : 
    1023             :          CALL initialize_weights(cell, weights)
    1024             : 
    1025             :          ALLOCATE (op_sm_set_qs(2, dim_op))
    1026             :          ALLOCATE (op_sm_set_almo(2, dim_op))
    1027             : 
    1028             :          DO idim0 = 1, dim_op
    1029             :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    1030             :                NULLIFY (op_sm_set_qs(reim, idim0)%matrix)
    1031             :                ALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    1032             :                CALL dbcsr_copy(op_sm_set_qs(reim, idim0)%matrix, qs_matrix_s(1)%matrix, &
    1033             :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    1034             :                CALL dbcsr_set(op_sm_set_qs(reim, idim0)%matrix, 0.0_dp)
    1035             :                NULLIFY (op_sm_set_almo(reim, idim0)%matrix)
    1036             :                ALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    1037             :                CALL dbcsr_copy(op_sm_set_almo(reim, idim0)%matrix, almo_scf_env%matrix_s(1), &
    1038             :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    1039             :                CALL dbcsr_set(op_sm_set_almo(reim, idim0)%matrix, 0.0_dp)
    1040             :             END DO
    1041             :          END DO
    1042             : 
    1043             :          CALL compute_berry_operator(qs_env, cell, op_sm_set_qs, dim_op)
    1044             : 
    1045             :          !CALL matrix_qs_to_almo(op_sm_set_qs, op_sm_set_almo, almo_scf_env%mat_distr_aos)
    1046             : 
    1047             :       END IF
    1048             : 
    1049             :       ! create initial guess from the initial orbitals
    1050             :       CALL xalmo_initial_guess(m_guess=m_theta, &
    1051             :                                m_t_in=m_t_in_local, &
    1052             :                                m_t0=almo_scf_env%matrix_t_blk, &
    1053             :                                m_quench_t=quench_t, &
    1054             :                                m_overlap=almo_scf_env%matrix_s(1), &
    1055             :                                m_sigma_tmpl=almo_scf_env%matrix_sigma_inv, &
    1056             :                                nspins=nspins, &
    1057             :                                xalmo_history=almo_scf_env%xalmo_history, &
    1058             :                                assume_t0_q0x=assume_t0_q0x, &
    1059             :                                optimize_theta=optimize_theta, &
    1060             :                                envelope_amplitude=almo_scf_env%envelope_amplitude, &
    1061             :                                eps_filter=almo_scf_env%eps_filter, &
    1062             :                                order_lanczos=almo_scf_env%order_lanczos, &
    1063             :                                eps_lanczos=almo_scf_env%eps_lanczos, &
    1064             :                                max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    1065          86 :                                nocc_of_domain=almo_scf_env%nocc_of_domain)
    1066             : 
    1067          86 :       ndomains = almo_scf_env%ndomains
    1068        1028 :       ALLOCATE (domain_r_down(ndomains, nspins))
    1069          86 :       CALL init_submatrices(domain_r_down)
    1070         942 :       ALLOCATE (bad_modes_projector_down(ndomains, nspins))
    1071          86 :       CALL init_submatrices(bad_modes_projector_down)
    1072             : 
    1073         258 :       ALLOCATE (prec_vv(nspins))
    1074         258 :       ALLOCATE (siginvTFTsiginv(nspins))
    1075         258 :       ALLOCATE (STsiginv_0(nspins))
    1076         258 :       ALLOCATE (FTsiginv(nspins))
    1077         258 :       ALLOCATE (ST(nspins))
    1078         258 :       ALLOCATE (prev_grad(nspins))
    1079         344 :       ALLOCATE (grad(nspins))
    1080         258 :       ALLOCATE (prev_step(nspins))
    1081         258 :       ALLOCATE (step(nspins))
    1082         258 :       ALLOCATE (prev_minus_prec_grad(nspins))
    1083         258 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    1084         258 :       ALLOCATE (tempNOcc(nspins))
    1085         258 :       ALLOCATE (tempNOcc_1(nspins))
    1086         258 :       ALLOCATE (tempOccOcc(nspins))
    1087         172 :       DO ispin = 1, nspins
    1088             : 
    1089             :          ! init temporary storage
    1090             :          CALL dbcsr_create(prec_vv(ispin), &
    1091             :                            template=almo_scf_env%matrix_ks(ispin), &
    1092          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1093             :          CALL dbcsr_create(siginvTFTsiginv(ispin), &
    1094             :                            template=almo_scf_env%matrix_sigma(ispin), &
    1095          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1096             :          CALL dbcsr_create(STsiginv_0(ispin), &
    1097             :                            template=matrix_t_out(ispin), &
    1098          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1099             :          CALL dbcsr_create(FTsiginv(ispin), &
    1100             :                            template=matrix_t_out(ispin), &
    1101          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1102             :          CALL dbcsr_create(ST(ispin), &
    1103             :                            template=matrix_t_out(ispin), &
    1104          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1105             :          CALL dbcsr_create(prev_grad(ispin), &
    1106             :                            template=matrix_t_out(ispin), &
    1107          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1108             :          CALL dbcsr_create(grad(ispin), &
    1109             :                            template=matrix_t_out(ispin), &
    1110          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1111             :          CALL dbcsr_create(prev_step(ispin), &
    1112             :                            template=matrix_t_out(ispin), &
    1113          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1114             :          CALL dbcsr_create(step(ispin), &
    1115             :                            template=matrix_t_out(ispin), &
    1116          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1117             :          CALL dbcsr_create(prev_minus_prec_grad(ispin), &
    1118             :                            template=matrix_t_out(ispin), &
    1119          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1120             :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    1121             :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    1122          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1123             :          CALL dbcsr_create(tempNOcc(ispin), &
    1124             :                            template=matrix_t_out(ispin), &
    1125          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1126             :          CALL dbcsr_create(tempNOcc_1(ispin), &
    1127             :                            template=matrix_t_out(ispin), &
    1128          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1129             :          CALL dbcsr_create(tempOccOcc(ispin), &
    1130             :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    1131          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1132             : 
    1133          86 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    1134          86 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    1135             : 
    1136             :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
    1137          86 :                              nfullrows_total=nocc(ispin))
    1138             : 
    1139             :          ! invert S domains if necessary
    1140             :          ! Note: domains for alpha and beta electrons might be different
    1141             :          ! that is why the inversion of the AO overlap is inside the spin loop
    1142          86 :          IF (my_special_case .EQ. xalmo_case_normal) THEN
    1143             :             CALL construct_domain_s_inv( &
    1144             :                matrix_s=almo_scf_env%matrix_s(1), &
    1145             :                subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1146             :                dpattern=quench_t(ispin), &
    1147             :                map=almo_scf_env%domain_map(ispin), &
    1148          38 :                node_of_domain=almo_scf_env%cpu_of_domain)
    1149             : 
    1150             :             CALL construct_domain_s_sqrt( &
    1151             :                matrix_s=almo_scf_env%matrix_s(1), &
    1152             :                subm_s_sqrt=almo_scf_env%domain_s_sqrt(:, ispin), &
    1153             :                subm_s_sqrt_inv=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
    1154             :                dpattern=almo_scf_env%quench_t(ispin), &
    1155             :                map=almo_scf_env%domain_map(ispin), &
    1156          38 :                node_of_domain=almo_scf_env%cpu_of_domain)
    1157             : 
    1158             :          END IF
    1159             : 
    1160          86 :          IF (assume_t0_q0x) THEN
    1161             : 
    1162             :             ! save S.T_0.siginv_0
    1163          42 :             IF (my_special_case .EQ. xalmo_case_fully_deloc) THEN
    1164             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1165             :                                    almo_scf_env%matrix_s(1), &
    1166             :                                    almo_scf_env%matrix_t_blk(ispin), &
    1167             :                                    0.0_dp, ST(ispin), &
    1168          18 :                                    filter_eps=almo_scf_env%eps_filter)
    1169             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1170             :                                    ST(ispin), &
    1171             :                                    almo_scf_env%matrix_sigma_inv_0deloc(ispin), &
    1172             :                                    0.0_dp, STsiginv_0(ispin), &
    1173          18 :                                    filter_eps=almo_scf_env%eps_filter)
    1174             :             END IF
    1175             : 
    1176             :             ! construct domain-projector
    1177          42 :             IF (my_special_case .EQ. xalmo_case_normal) THEN
    1178             :                CALL construct_domain_r_down( &
    1179             :                   matrix_t=almo_scf_env%matrix_t_blk(ispin), &
    1180             :                   matrix_sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
    1181             :                   matrix_s=almo_scf_env%matrix_s(1), &
    1182             :                   subm_r_down=domain_r_down(:, ispin), &
    1183             :                   dpattern=quench_t(ispin), &
    1184             :                   map=almo_scf_env%domain_map(ispin), &
    1185             :                   node_of_domain=almo_scf_env%cpu_of_domain, &
    1186          24 :                   filter_eps=almo_scf_env%eps_filter)
    1187             :             END IF
    1188             : 
    1189             :          END IF ! assume_t0_q0x
    1190             : 
    1191             :          ! localization functional
    1192         172 :          IF (penalty_occ_local) THEN
    1193             : 
    1194             :             ! compute S.R0.B.R0.S
    1195             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1196             :                                 almo_scf_env%matrix_s(1), &
    1197             :                                 matrix_t_in(ispin), &
    1198             :                                 0.0_dp, tempNOcc(ispin), &
    1199           0 :                                 filter_eps=almo_scf_env%eps_filter)
    1200             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1201             :                                 tempNOcc(ispin), &
    1202             :                                 almo_scf_env%matrix_sigma_inv(ispin), &
    1203             :                                 0.0_dp, tempNOCC_1(ispin), &
    1204           0 :                                 filter_eps=almo_scf_env%eps_filter)
    1205             : 
    1206           0 :             DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    1207           0 :                DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    1208             : 
    1209             :                   CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, &
    1210           0 :                                          op_sm_set_almo(reim, idim0)%matrix, almo_scf_env%mat_distr_aos)
    1211             : 
    1212             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1213             :                                       op_sm_set_almo(reim, idim0)%matrix, &
    1214             :                                       matrix_t_in(ispin), &
    1215             :                                       0.0_dp, tempNOcc(ispin), &
    1216           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1217             : 
    1218             :                   CALL dbcsr_multiply("T", "N", 1.0_dp, &
    1219             :                                       matrix_t_in(ispin), &
    1220             :                                       tempNOcc(ispin), &
    1221             :                                       0.0_dp, tempOccOcc(ispin), &
    1222           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1223             : 
    1224             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1225             :                                       tempNOCC_1(ispin), &
    1226             :                                       tempOccOcc(ispin), &
    1227             :                                       0.0_dp, tempNOcc(ispin), &
    1228           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1229             : 
    1230             :                   CALL dbcsr_multiply("N", "T", 1.0_dp, &
    1231             :                                       tempNOcc(ispin), &
    1232             :                                       tempNOcc_1(ispin), &
    1233             :                                       0.0_dp, op_sm_set_almo(reim, idim0)%matrix, &
    1234           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1235             : 
    1236             :                END DO
    1237             :             END DO ! end loop over idim0
    1238             : 
    1239             :          END IF !penalty_occ_local
    1240             : 
    1241             :       END DO ! ispin
    1242             : 
    1243             :       ! start the outer SCF loop
    1244          86 :       outer_max_iter = optimizer%max_iter_outer_loop
    1245          86 :       outer_prepare_to_exit = .FALSE.
    1246          86 :       outer_iteration = 0
    1247          86 :       grad_norm = 0.0_dp
    1248          86 :       grad_norm_frob = 0.0_dp
    1249          86 :       use_guess = .FALSE.
    1250             : 
    1251             :       DO
    1252             : 
    1253             :          ! start the inner SCF loop
    1254          92 :          max_iter = optimizer%max_iter
    1255          92 :          prepare_to_exit = .FALSE.
    1256          92 :          line_search = .FALSE.
    1257          92 :          converged = .FALSE.
    1258          92 :          iteration = 0
    1259          92 :          cg_iteration = 0
    1260          92 :          line_search_iteration = 0
    1261             :          energy_new = 0.0_dp
    1262          92 :          energy_old = 0.0_dp
    1263          92 :          energy_diff = 0.0_dp
    1264             :          localization_obj_function = 0.0_dp
    1265          92 :          line_search_error = 0.0_dp
    1266             : 
    1267          92 :          t1 = m_walltime()
    1268             : 
    1269        1048 :          DO
    1270             : 
    1271        1048 :             just_started = (iteration .EQ. 0) .AND. (outer_iteration .EQ. 0)
    1272             : 
    1273             :             CALL main_var_to_xalmos_and_loss_func( &
    1274             :                almo_scf_env=almo_scf_env, &
    1275             :                qs_env=qs_env, &
    1276             :                m_main_var_in=m_theta, &
    1277             :                m_t_out=matrix_t_out, &
    1278             :                m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
    1279             :                energy_out=energy_new, &
    1280             :                penalty_out=penalty_func_new, &
    1281             :                m_FTsiginv_out=FTsiginv, &
    1282             :                m_siginvTFTsiginv_out=siginvTFTsiginv, &
    1283             :                m_ST_out=ST, &
    1284             :                m_STsiginv0_in=STsiginv_0, &
    1285             :                m_quench_t_in=quench_t, &
    1286             :                domain_r_down_in=domain_r_down, &
    1287             :                assume_t0_q0x=assume_t0_q0x, &
    1288             :                just_started=just_started, &
    1289             :                optimize_theta=optimize_theta, &
    1290             :                normalize_orbitals=normalize_orbitals, &
    1291             :                perturbation_only=perturbation_only, &
    1292             :                do_penalty=penalty_occ_vol, &
    1293        1048 :                special_case=my_special_case)
    1294        1048 :             IF (penalty_occ_vol) THEN
    1295             :                ! this is not pure energy anymore
    1296           0 :                energy_new = energy_new + penalty_func_new
    1297             :             END IF
    1298        2096 :             DO ispin = 1, nspins
    1299        2096 :                IF (penalty_occ_vol) THEN
    1300             :                   penalty_occ_vol_g_prefactor(ispin) = &
    1301           0 :                      -2.0_dp*penalty_amplitude*spin_factor*nocc(ispin)
    1302           0 :                   penalty_occ_vol_h_prefactor(ispin) = 0.0_dp
    1303             :                END IF
    1304             :             END DO
    1305             : 
    1306        1048 :             localization_obj_function = 0.0_dp
    1307             :             ! RZK-warning: This block must be combined with the loss function
    1308        1048 :             IF (penalty_occ_local) THEN
    1309           0 :                DO ispin = 1, nspins
    1310             : 
    1311             :                   ! LzL insert localization penalty
    1312           0 :                   localization_obj_function = 0.0_dp
    1313           0 :                   CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), nfullrows_total=nmo)
    1314           0 :                   ALLOCATE (z2(nmo))
    1315           0 :                   ALLOCATE (reim_diag(nmo))
    1316             : 
    1317           0 :                   CALL dbcsr_get_info(tempOccOcc(ispin), group=group)
    1318             : 
    1319           0 :                   DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    1320             : 
    1321           0 :                      z2(:) = 0.0_dp
    1322             : 
    1323           0 :                      DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    1324             : 
    1325             :                         !CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix,
    1326             :                         !                       op_sm_set_almo(reim, idim0)%matrix, &
    1327             :                         !                       almo_scf_env%mat_distr_aos)
    1328             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1329             :                                             op_sm_set_almo(reim, idim0)%matrix, &
    1330             :                                             matrix_t_out(ispin), &
    1331             :                                             0.0_dp, tempNOcc(ispin), &
    1332           0 :                                             filter_eps=almo_scf_env%eps_filter)
    1333             :                         !warning - save time by computing only the diagonal elements
    1334             :                         CALL dbcsr_multiply("T", "N", 1.0_dp, &
    1335             :                                             matrix_t_out(ispin), &
    1336             :                                             tempNOcc(ispin), &
    1337             :                                             0.0_dp, tempOccOcc(ispin), &
    1338           0 :                                             filter_eps=almo_scf_env%eps_filter)
    1339             : 
    1340           0 :                         reim_diag = 0.0_dp
    1341           0 :                         CALL dbcsr_get_diag(tempOccOcc(ispin), reim_diag)
    1342           0 :                         CALL group%sum(reim_diag)
    1343           0 :                         z2(:) = z2(:) + reim_diag(:)*reim_diag(:)
    1344             : 
    1345             :                      END DO
    1346             : 
    1347           0 :                      DO ielem = 1, nmo
    1348             :                         SELECT CASE (2) ! allows for selection of different spread functionals
    1349             :                         CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    1350           0 :                            fval = -weights(idim0)*LOG(ABS(z2(ielem)))
    1351             :                         CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    1352           0 :                            fval = weights(idim0) - weights(idim0)*ABS(z2(ielem))
    1353             :                         CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    1354             :                            fval = weights(idim0) - weights(idim0)*SQRT(ABS(z2(ielem)))
    1355             :                         END SELECT
    1356           0 :                         localization_obj_function = localization_obj_function + fval
    1357             :                      END DO
    1358             : 
    1359             :                   END DO ! end loop over idim0
    1360             : 
    1361           0 :                   DEALLOCATE (z2)
    1362           0 :                   DEALLOCATE (reim_diag)
    1363             : 
    1364           0 :                   energy_new = energy_new + localiz_coeff*localization_obj_function
    1365             : 
    1366             :                END DO ! ispin
    1367             :             END IF ! penalty_occ_local
    1368             : 
    1369        2096 :             DO ispin = 1, nspins
    1370             : 
    1371             :                IF (just_started .AND. almo_mathematica) THEN
    1372             :                   CPWARN_IF(ispin .GT. 1, "Mathematica files will be overwritten")
    1373             :                   CALL print_mathematica_matrix(almo_scf_env%matrix_s(1), "matrixS.dat")
    1374             :                   CALL print_mathematica_matrix(almo_scf_env%matrix_ks(ispin), "matrixF.dat")
    1375             :                   CALL print_mathematica_matrix(matrix_t_out(ispin), "matrixT.dat")
    1376             :                   CALL print_mathematica_matrix(quench_t(ispin), "matrixQ.dat")
    1377             :                END IF
    1378             : 
    1379             :                ! save the previous gradient to compute beta
    1380             :                ! do it only if the previous grad was computed
    1381             :                ! for .NOT.line_search
    1382        1048 :                IF (line_search_iteration .EQ. 0 .AND. iteration .NE. 0) &
    1383        1542 :                   CALL dbcsr_copy(prev_grad(ispin), grad(ispin))
    1384             : 
    1385             :             END DO ! ispin
    1386             : 
    1387             :             ! compute the energy gradient if necessary
    1388             :             skip_grad = (iteration .GT. 0 .AND. &
    1389             :                          fixed_line_search_niter .NE. 0 .AND. &
    1390        1048 :                          line_search_iteration .NE. fixed_line_search_niter)
    1391             : 
    1392             :             IF (.NOT. skip_grad) THEN
    1393             : 
    1394        2096 :                DO ispin = 1, nspins
    1395             : 
    1396             :                   CALL compute_gradient( &
    1397             :                      m_grad_out=grad(ispin), &
    1398             :                      m_ks=almo_scf_env%matrix_ks(ispin), &
    1399             :                      m_s=almo_scf_env%matrix_s(1), &
    1400             :                      m_t=matrix_t_out(ispin), &
    1401             :                      m_t0=almo_scf_env%matrix_t_blk(ispin), &
    1402             :                      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1403             :                      m_quench_t=quench_t(ispin), &
    1404             :                      m_FTsiginv=FTsiginv(ispin), &
    1405             :                      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1406             :                      m_ST=ST(ispin), &
    1407             :                      m_STsiginv0=STsiginv_0(ispin), &
    1408             :                      m_theta=m_theta(ispin), &
    1409             :                      m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    1410             :                      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1411             :                      domain_r_down=domain_r_down(:, ispin), &
    1412             :                      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1413             :                      domain_map=almo_scf_env%domain_map(ispin), &
    1414             :                      assume_t0_q0x=assume_t0_q0x, &
    1415             :                      optimize_theta=optimize_theta, &
    1416             :                      normalize_orbitals=normalize_orbitals, &
    1417             :                      penalty_occ_vol=penalty_occ_vol, &
    1418             :                      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1419             :                      envelope_amplitude=almo_scf_env%envelope_amplitude, &
    1420             :                      eps_filter=almo_scf_env%eps_filter, &
    1421             :                      spin_factor=spin_factor, &
    1422             :                      special_case=my_special_case, &
    1423             :                      penalty_occ_local=penalty_occ_local, &
    1424             :                      op_sm_set=op_sm_set_almo, &
    1425             :                      weights=weights, &
    1426             :                      energy_coeff=energy_coeff, &
    1427        2096 :                      localiz_coeff=localiz_coeff)
    1428             : 
    1429             :                END DO ! ispin
    1430             : 
    1431             :             END IF ! skip_grad
    1432             : 
    1433             :             ! if unprojected XALMOs are optimized then compute both
    1434             :             ! HessianInv/preconditioner and the "bad-mode" projector
    1435             : 
    1436        1048 :             IF (blissful_neglect) THEN
    1437         460 :                DO ispin = 1, nspins
    1438             :                   !compute the prec only for the first step,
    1439             :                   !but project the gradient every step
    1440         230 :                   IF (iteration .EQ. 0) THEN
    1441             :                      CALL compute_preconditioner( &
    1442             :                         domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    1443             :                         bad_modes_projector_down_out=bad_modes_projector_down(:, ispin), &
    1444             :                         m_prec_out=prec_vv(ispin), &
    1445             :                         m_ks=almo_scf_env%matrix_ks(ispin), &
    1446             :                         m_s=almo_scf_env%matrix_s(1), &
    1447             :                         m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1448             :                         m_quench_t=quench_t(ispin), &
    1449             :                         m_FTsiginv=FTsiginv(ispin), &
    1450             :                         m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1451             :                         m_ST=ST(ispin), &
    1452             :                         para_env=almo_scf_env%para_env, &
    1453             :                         blacs_env=almo_scf_env%blacs_env, &
    1454             :                         nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    1455             :                         domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1456             :                         domain_s_inv_half=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
    1457             :                         domain_s_half=almo_scf_env%domain_s_sqrt(:, ispin), &
    1458             :                         domain_r_down=domain_r_down(:, ispin), &
    1459             :                         cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1460             :                         domain_map=almo_scf_env%domain_map(ispin), &
    1461             :                         assume_t0_q0x=assume_t0_q0x, &
    1462             :                         penalty_occ_vol=penalty_occ_vol, &
    1463             :                         penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1464             :                         eps_filter=almo_scf_env%eps_filter, &
    1465             :                         neg_thr=optimizer%neglect_threshold, &
    1466             :                         spin_factor=spin_factor, &
    1467             :                         skip_inversion=.FALSE., &
    1468          18 :                         special_case=my_special_case)
    1469             :                   END IF
    1470             :                   ! remove bad modes from the gradient
    1471             :                   CALL apply_domain_operators( &
    1472             :                      matrix_in=grad(ispin), &
    1473             :                      matrix_out=grad(ispin), &
    1474             :                      operator1=almo_scf_env%domain_s_inv(:, ispin), &
    1475             :                      operator2=bad_modes_projector_down(:, ispin), &
    1476             :                      dpattern=quench_t(ispin), &
    1477             :                      map=almo_scf_env%domain_map(ispin), &
    1478             :                      node_of_domain=almo_scf_env%cpu_of_domain, &
    1479             :                      my_action=1, &
    1480         460 :                      filter_eps=almo_scf_env%eps_filter)
    1481             : 
    1482             :                END DO ! ispin
    1483             : 
    1484             :             END IF ! blissful neglect
    1485             : 
    1486             :             ! check convergence and other exit criteria
    1487        2096 :             DO ispin = 1, nspins
    1488        2096 :                grad_norm_spin(ispin) = dbcsr_maxabs(grad(ispin))
    1489             :             END DO ! ispin
    1490        3144 :             grad_norm = MAXVAL(grad_norm_spin)
    1491             : 
    1492        1048 :             converged = (grad_norm .LE. optimizer%eps_error)
    1493        1048 :             IF (converged .OR. (iteration .GE. max_iter)) THEN
    1494          92 :                prepare_to_exit = .TRUE.
    1495             :             END IF
    1496             :             ! if early stopping is on do at least one iteration
    1497        1048 :             IF (optimizer%early_stopping_on .AND. just_started) &
    1498           0 :                prepare_to_exit = .FALSE.
    1499             : 
    1500             :             IF (grad_norm .LT. almo_scf_env%eps_prev_guess) &
    1501        1048 :                use_guess = .TRUE.
    1502             : 
    1503             :             ! it is not time to exit just yet
    1504        1048 :             IF (.NOT. prepare_to_exit) THEN
    1505             : 
    1506             :                ! check the gradient along the step direction
    1507             :                ! and decide whether to switch to the line-search mode
    1508             :                ! do not do this in the first iteration
    1509         956 :                IF (iteration .NE. 0) THEN
    1510             : 
    1511         864 :                   IF (fixed_line_search_niter .EQ. 0) THEN
    1512             : 
    1513             :                      ! enforce at least one line search
    1514             :                      ! without even checking the error
    1515         864 :                      IF (.NOT. line_search) THEN
    1516             : 
    1517         422 :                         line_search = .TRUE.
    1518         422 :                         line_search_iteration = line_search_iteration + 1
    1519             : 
    1520             :                      ELSE
    1521             : 
    1522             :                         ! check the line-search error and decide whether to
    1523             :                         ! change the direction
    1524             :                         line_search_error = 0.0_dp
    1525             :                         denom = 0.0_dp
    1526             :                         denom2 = 0.0_dp
    1527             : 
    1528         884 :                         DO ispin = 1, nspins
    1529             : 
    1530         442 :                            CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1531         442 :                            line_search_error = line_search_error + tempreal
    1532         442 :                            CALL dbcsr_dot(grad(ispin), grad(ispin), tempreal)
    1533         442 :                            denom = denom + tempreal
    1534         442 :                            CALL dbcsr_dot(step(ispin), step(ispin), tempreal)
    1535         884 :                            denom2 = denom2 + tempreal
    1536             : 
    1537             :                         END DO ! ispin
    1538             : 
    1539             :                         ! cosine of the angle between the step and grad
    1540             :                         ! (must be close to zero at convergence)
    1541         442 :                         line_search_error = line_search_error/SQRT(denom)/SQRT(denom2)
    1542             : 
    1543         442 :                         IF (ABS(line_search_error) .GT. optimizer%lin_search_eps_error) THEN
    1544          40 :                            line_search = .TRUE.
    1545          40 :                            line_search_iteration = line_search_iteration + 1
    1546             :                         ELSE
    1547         402 :                            line_search = .FALSE.
    1548         402 :                            line_search_iteration = 0
    1549         402 :                            IF (grad_norm .LT. eps_skip_gradients) THEN
    1550           0 :                               fixed_line_search_niter = ABS(almo_scf_env%integer04)
    1551             :                            END IF
    1552             :                         END IF
    1553             : 
    1554             :                      END IF
    1555             : 
    1556             :                   ELSE ! decision for fixed_line_search_niter
    1557             : 
    1558           0 :                      IF (.NOT. line_search) THEN
    1559           0 :                         line_search = .TRUE.
    1560           0 :                         line_search_iteration = line_search_iteration + 1
    1561             :                      ELSE
    1562           0 :                         IF (line_search_iteration .EQ. fixed_line_search_niter) THEN
    1563           0 :                            line_search = .FALSE.
    1564             :                            line_search_iteration = 0
    1565           0 :                            line_search_iteration = line_search_iteration + 1
    1566             :                         END IF
    1567             :                      END IF
    1568             : 
    1569             :                   END IF ! fixed_line_search_niter fork
    1570             : 
    1571             :                END IF ! iteration.ne.0
    1572             : 
    1573         956 :                IF (line_search) THEN
    1574         462 :                   energy_diff = 0.0_dp
    1575             :                ELSE
    1576         494 :                   energy_diff = energy_new - energy_old
    1577         494 :                   energy_old = energy_new
    1578             :                END IF
    1579             : 
    1580             :                ! update the step direction
    1581         956 :                IF (.NOT. line_search) THEN
    1582             : 
    1583             :                   !IF (unit_nr>0) THEN
    1584             :                   !   WRITE(unit_nr,*) "....updating step direction...."
    1585             :                   !ENDIF
    1586             : 
    1587         988 :                   cg_iteration = cg_iteration + 1
    1588             : 
    1589             :                   ! save the previous step
    1590         988 :                   DO ispin = 1, nspins
    1591         988 :                      CALL dbcsr_copy(prev_step(ispin), step(ispin))
    1592             :                   END DO ! ispin
    1593             : 
    1594             :                   ! compute the new step (apply preconditioner if available)
    1595           0 :                   SELECT CASE (prec_type)
    1596             :                   CASE (xalmo_prec_full)
    1597             : 
    1598             :                      ! solving approximate Newton eq in the full (linearized) space
    1599             :                      CALL newton_grad_to_step( &
    1600             :                         optimizer=almo_scf_env%opt_xalmo_newton_pcg_solver, &
    1601             :                         m_grad=grad(:), &
    1602             :                         m_delta=step(:), &
    1603             :                         m_s=almo_scf_env%matrix_s(:), &
    1604             :                         m_ks=almo_scf_env%matrix_ks(:), &
    1605             :                         m_siginv=almo_scf_env%matrix_sigma_inv(:), &
    1606             :                         m_quench_t=quench_t(:), &
    1607             :                         m_FTsiginv=FTsiginv(:), &
    1608             :                         m_siginvTFTsiginv=siginvTFTsiginv(:), &
    1609             :                         m_ST=ST(:), &
    1610             :                         m_t=matrix_t_out(:), &
    1611             :                         m_sig_sqrti_ii=m_sig_sqrti_ii(:), &
    1612             :                         domain_s_inv=almo_scf_env%domain_s_inv(:, :), &
    1613             :                         domain_r_down=domain_r_down(:, :), &
    1614             :                         domain_map=almo_scf_env%domain_map(:), &
    1615             :                         cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1616             :                         nocc_of_domain=almo_scf_env%nocc_of_domain(:, :), &
    1617             :                         para_env=almo_scf_env%para_env, &
    1618             :                         blacs_env=almo_scf_env%blacs_env, &
    1619             :                         eps_filter=almo_scf_env%eps_filter, &
    1620             :                         optimize_theta=optimize_theta, &
    1621             :                         penalty_occ_vol=penalty_occ_vol, &
    1622             :                         normalize_orbitals=normalize_orbitals, &
    1623             :                         penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(:), &
    1624             :                         penalty_occ_vol_pf2=penalty_occ_vol_h_prefactor(:), &
    1625             :                         special_case=my_special_case &
    1626           0 :                         )
    1627             : 
    1628             :                   CASE (xalmo_prec_domain)
    1629             : 
    1630             :                      ! compute and invert preconditioner?
    1631         494 :                      IF (.NOT. blissful_neglect .AND. &
    1632             :                          ((just_started .AND. perturbation_only) .OR. &
    1633             :                           (iteration .EQ. 0 .AND. (.NOT. perturbation_only))) &
    1634             :                          ) THEN
    1635             : 
    1636             :                         ! computing preconditioner
    1637         148 :                         DO ispin = 1, nspins
    1638             :                            CALL compute_preconditioner( &
    1639             :                               domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    1640             :                               m_prec_out=prec_vv(ispin), &
    1641             :                               m_ks=almo_scf_env%matrix_ks(ispin), &
    1642             :                               m_s=almo_scf_env%matrix_s(1), &
    1643             :                               m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1644             :                               m_quench_t=quench_t(ispin), &
    1645             :                               m_FTsiginv=FTsiginv(ispin), &
    1646             :                               m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1647             :                               m_ST=ST(ispin), &
    1648             :                               para_env=almo_scf_env%para_env, &
    1649             :                               blacs_env=almo_scf_env%blacs_env, &
    1650             :                               nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    1651             :                               domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1652             :                               domain_r_down=domain_r_down(:, ispin), &
    1653             :                               cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1654             :                               domain_map=almo_scf_env%domain_map(ispin), &
    1655             :                               assume_t0_q0x=assume_t0_q0x, &
    1656             :                               penalty_occ_vol=penalty_occ_vol, &
    1657             :                               penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1658             :                               eps_filter=almo_scf_env%eps_filter, &
    1659             :                               neg_thr=0.5_dp, &
    1660             :                               spin_factor=spin_factor, &
    1661             :                               skip_inversion=.FALSE., &
    1662         568 :                               special_case=my_special_case)
    1663             :                         END DO ! ispin
    1664             :                      END IF ! compute_prec
    1665             : 
    1666             :                      !IF (unit_nr>0) THEN
    1667             :                      !   WRITE(unit_nr,*) "....applying precomputed preconditioner...."
    1668             :                      !ENDIF
    1669             : 
    1670         494 :                      IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    1671             :                          my_special_case .EQ. xalmo_case_fully_deloc) THEN
    1672             : 
    1673         488 :                         DO ispin = 1, nspins
    1674             : 
    1675             :                            CALL dbcsr_multiply("N", "N", -1.0_dp, &
    1676             :                                                prec_vv(ispin), &
    1677             :                                                grad(ispin), &
    1678             :                                                0.0_dp, step(ispin), &
    1679         488 :                                                filter_eps=almo_scf_env%eps_filter)
    1680             : 
    1681             :                         END DO ! ispin
    1682             : 
    1683             :                      ELSE
    1684             : 
    1685             :                         !!! RZK-warning Currently for non-theta only
    1686         250 :                         IF (optimize_theta) THEN
    1687           0 :                            CPABORT("theta is NYI")
    1688             :                         END IF
    1689             : 
    1690         500 :                         DO ispin = 1, nspins
    1691             : 
    1692             :                            CALL apply_domain_operators( &
    1693             :                               matrix_in=grad(ispin), &
    1694             :                               matrix_out=step(ispin), &
    1695             :                               operator1=almo_scf_env%domain_preconditioner(:, ispin), &
    1696             :                               dpattern=quench_t(ispin), &
    1697             :                               map=almo_scf_env%domain_map(ispin), &
    1698             :                               node_of_domain=almo_scf_env%cpu_of_domain, &
    1699             :                               my_action=0, &
    1700         250 :                               filter_eps=almo_scf_env%eps_filter)
    1701         500 :                            CALL dbcsr_scale(step(ispin), -1.0_dp)
    1702             : 
    1703             :                            !CALL dbcsr_copy(m_tmp_no_3,&
    1704             :                            !        quench_t(ispin))
    1705             :                            !CALL inverse_of_elements(m_tmp_no_3)
    1706             :                            !CALL dbcsr_copy(m_tmp_no_2,step)
    1707             :                            !CALL dbcsr_hadamard_product(&
    1708             :                            !        m_tmp_no_2,&
    1709             :                            !        m_tmp_no_3,&
    1710             :                            !        step)
    1711             :                            !CALL dbcsr_copy(m_tmp_no_3,quench_t(ispin))
    1712             : 
    1713             :                         END DO ! ispin
    1714             : 
    1715             :                      END IF ! special case
    1716             : 
    1717             :                   CASE (xalmo_prec_zero)
    1718             : 
    1719             :                      ! no preconditioner
    1720         494 :                      DO ispin = 1, nspins
    1721             : 
    1722           0 :                         CALL dbcsr_copy(step(ispin), grad(ispin))
    1723           0 :                         CALL dbcsr_scale(step(ispin), -1.0_dp)
    1724             : 
    1725             :                      END DO ! ispin
    1726             : 
    1727             :                   END SELECT ! preconditioner type fork
    1728             : 
    1729             :                   ! check whether we need to reset conjugate directions
    1730         494 :                   IF (iteration .EQ. 0) THEN
    1731          92 :                      reset_conjugator = .TRUE.
    1732             :                   END IF
    1733             : 
    1734             :                   ! compute the conjugation coefficient - beta
    1735         494 :                   IF (.NOT. reset_conjugator) THEN
    1736             : 
    1737             :                      CALL compute_cg_beta( &
    1738             :                         beta=beta, &
    1739             :                         reset_conjugator=reset_conjugator, &
    1740             :                         conjugator=optimizer%conjugator, &
    1741             :                         grad=grad(:), &
    1742             :                         prev_grad=prev_grad(:), &
    1743             :                         step=step(:), &
    1744             :                         prev_step=prev_step(:), &
    1745             :                         prev_minus_prec_grad=prev_minus_prec_grad(:) &
    1746         402 :                         )
    1747             : 
    1748             :                   END IF
    1749             : 
    1750         494 :                   IF (reset_conjugator) THEN
    1751             : 
    1752          92 :                      beta = 0.0_dp
    1753          92 :                      IF (unit_nr > 0 .AND. (.NOT. just_started)) THEN
    1754           3 :                         WRITE (unit_nr, '(T2,A35)') "Re-setting conjugator to zero"
    1755             :                      END IF
    1756          92 :                      reset_conjugator = .FALSE.
    1757             : 
    1758             :                   END IF
    1759             : 
    1760             :                   ! save the preconditioned gradient (useful for beta)
    1761         988 :                   DO ispin = 1, nspins
    1762             : 
    1763         494 :                      CALL dbcsr_copy(prev_minus_prec_grad(ispin), step(ispin))
    1764             : 
    1765             :                      !IF (unit_nr>0) THEN
    1766             :                      !   WRITE(unit_nr,*) "....final beta....", beta
    1767             :                      !ENDIF
    1768             : 
    1769             :                      ! conjugate the step direction
    1770         988 :                      CALL dbcsr_add(step(ispin), prev_step(ispin), 1.0_dp, beta)
    1771             : 
    1772             :                   END DO ! ispin
    1773             : 
    1774             :                END IF ! update the step direction
    1775             : 
    1776             :                ! estimate the step size
    1777         956 :                IF (.NOT. line_search) THEN
    1778             :                   ! we just changed the direction and
    1779             :                   ! we have only E and grad from the current step
    1780             :                   ! it is not enouhg to compute step_size - just guess it
    1781         494 :                   e0 = energy_new
    1782         494 :                   g0 = 0.0_dp
    1783         988 :                   DO ispin = 1, nspins
    1784         494 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1785         988 :                      g0 = g0 + tempreal
    1786             :                   END DO ! ispin
    1787         494 :                   IF (iteration .EQ. 0) THEN
    1788          92 :                      step_size = optimizer%lin_search_step_size_guess
    1789             :                   ELSE
    1790         402 :                      IF (next_step_size_guess .LE. 0.0_dp) THEN
    1791           2 :                         step_size = optimizer%lin_search_step_size_guess
    1792             :                      ELSE
    1793             :                         ! take the last value
    1794         400 :                         step_size = next_step_size_guess*1.05_dp
    1795             :                      END IF
    1796             :                   END IF
    1797             :                   !IF (unit_nr > 0) THEN
    1798             :                   !   WRITE (unit_nr, '(A2,3F12.5)') &
    1799             :                   !      "EG", e0, g0, step_size
    1800             :                   !ENDIF
    1801         494 :                   next_step_size_guess = step_size
    1802             :                ELSE
    1803         462 :                   IF (fixed_line_search_niter .EQ. 0) THEN
    1804         462 :                      e1 = energy_new
    1805         462 :                      g1 = 0.0_dp
    1806         924 :                      DO ispin = 1, nspins
    1807         462 :                         CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1808         924 :                         g1 = g1 + tempreal
    1809             :                      END DO ! ispin
    1810             :                      ! we have accumulated some points along this direction
    1811             :                      ! use only the most recent g0 (quadratic approximation)
    1812         462 :                      appr_sec_der = (g1 - g0)/step_size
    1813             :                      !IF (unit_nr > 0) THEN
    1814             :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    1815             :                      !      "EG", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    1816             :                      !ENDIF
    1817         462 :                      step_size = -g1/appr_sec_der
    1818         462 :                      e0 = e1
    1819         462 :                      g0 = g1
    1820             :                   ELSE
    1821             :                      ! use e0, g0 and e1 to compute g1 and make a step
    1822             :                      ! if the next iteration is also line_search
    1823             :                      ! use e1 and the calculated g1 as e0 and g0
    1824           0 :                      e1 = energy_new
    1825           0 :                      appr_sec_der = 2.0*((e1 - e0)/step_size - g0)/step_size
    1826           0 :                      g1 = appr_sec_der*step_size + g0
    1827             :                      !IF (unit_nr > 0) THEN
    1828             :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    1829             :                      !      "EG", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    1830             :                      !ENDIF
    1831             :                      !appr_sec_der=(g1-g0)/step_size
    1832           0 :                      step_size = -g1/appr_sec_der
    1833           0 :                      e0 = e1
    1834           0 :                      g0 = g1
    1835             :                   END IF
    1836         462 :                   next_step_size_guess = next_step_size_guess + step_size
    1837             :                END IF
    1838             : 
    1839             :                ! update theta
    1840        1912 :                DO ispin = 1, nspins
    1841        1912 :                   CALL dbcsr_add(m_theta(ispin), step(ispin), 1.0_dp, step_size)
    1842             :                END DO ! ispin
    1843             : 
    1844             :             END IF ! not.prepare_to_exit
    1845             : 
    1846        1048 :             IF (line_search) THEN
    1847         482 :                iter_type = "LS"
    1848             :             ELSE
    1849         566 :                iter_type = "CG"
    1850             :             END IF
    1851             : 
    1852        1048 :             t2 = m_walltime()
    1853        1048 :             IF (unit_nr > 0) THEN
    1854         524 :                iter_type = TRIM("ALMO SCF "//iter_type)
    1855             :                WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') &
    1856         524 :                   iter_type, iteration, &
    1857         524 :                   energy_new, energy_diff, grad_norm, &
    1858        1048 :                   t2 - t1
    1859         524 :                IF (penalty_occ_local .OR. penalty_occ_vol) THEN
    1860             :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1861           0 :                      "Energy component:", (energy_new - penalty_func_new - localization_obj_function)
    1862             :                END IF
    1863         524 :                IF (penalty_occ_local) THEN
    1864             :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1865           0 :                      "Localization component:", localization_obj_function
    1866             :                END IF
    1867         524 :                IF (penalty_occ_vol) THEN
    1868             :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1869           0 :                      "Penalty component:", penalty_func_new
    1870             :                END IF
    1871             :             END IF
    1872             : 
    1873        1048 :             IF (my_special_case .EQ. xalmo_case_block_diag) THEN
    1874          46 :                IF (penalty_occ_vol) THEN
    1875           0 :                   almo_scf_env%almo_scf_energy = energy_new - penalty_func_new - localization_obj_function
    1876             :                ELSE
    1877          46 :                   almo_scf_env%almo_scf_energy = energy_new - localization_obj_function
    1878             :                END IF
    1879             :             END IF
    1880             : 
    1881        1048 :             t1 = m_walltime()
    1882             : 
    1883        1048 :             iteration = iteration + 1
    1884        1048 :             IF (prepare_to_exit) EXIT
    1885             : 
    1886             :          END DO ! inner SCF loop
    1887             : 
    1888          92 :          IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
    1889          86 :             outer_prepare_to_exit = .TRUE.
    1890             :          END IF
    1891             : 
    1892          92 :          outer_iteration = outer_iteration + 1
    1893          92 :          IF (outer_prepare_to_exit) EXIT
    1894             : 
    1895             :       END DO ! outer SCF loop
    1896             : 
    1897         172 :       DO ispin = 1, nspins
    1898          86 :          IF (converged .AND. almo_mathematica) THEN
    1899             :             CPWARN_IF(ispin .GT. 1, "Mathematica files will be overwritten")
    1900             :             CALL print_mathematica_matrix(matrix_t_out(ispin), "matrixTf.dat")
    1901             :          END IF
    1902             :       END DO ! ispin
    1903             : 
    1904             :       ! post SCF-loop calculations
    1905          86 :       IF (converged) THEN
    1906             : 
    1907             :          CALL wrap_up_xalmo_scf( &
    1908             :             qs_env=qs_env, &
    1909             :             almo_scf_env=almo_scf_env, &
    1910             :             perturbation_in=perturbation_only, &
    1911             :             m_xalmo_in=matrix_t_out, &
    1912             :             m_quench_in=quench_t, &
    1913          86 :             energy_inout=energy_new)
    1914             : 
    1915             :       END IF ! if converged
    1916             : 
    1917         172 :       DO ispin = 1, nspins
    1918          86 :          CALL dbcsr_release(prec_vv(ispin))
    1919          86 :          CALL dbcsr_release(STsiginv_0(ispin))
    1920          86 :          CALL dbcsr_release(ST(ispin))
    1921          86 :          CALL dbcsr_release(FTsiginv(ispin))
    1922          86 :          CALL dbcsr_release(siginvTFTsiginv(ispin))
    1923          86 :          CALL dbcsr_release(prev_grad(ispin))
    1924          86 :          CALL dbcsr_release(prev_step(ispin))
    1925          86 :          CALL dbcsr_release(grad(ispin))
    1926          86 :          CALL dbcsr_release(step(ispin))
    1927          86 :          CALL dbcsr_release(prev_minus_prec_grad(ispin))
    1928          86 :          CALL dbcsr_release(m_theta(ispin))
    1929          86 :          CALL dbcsr_release(m_t_in_local(ispin))
    1930          86 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
    1931          86 :          CALL release_submatrices(domain_r_down(:, ispin))
    1932          86 :          CALL release_submatrices(bad_modes_projector_down(:, ispin))
    1933          86 :          CALL dbcsr_release(tempNOcc(ispin))
    1934          86 :          CALL dbcsr_release(tempNOcc_1(ispin))
    1935         172 :          CALL dbcsr_release(tempOccOcc(ispin))
    1936             :       END DO ! ispin
    1937             : 
    1938          86 :       DEALLOCATE (tempNOcc)
    1939          86 :       DEALLOCATE (tempNOcc_1)
    1940          86 :       DEALLOCATE (tempOccOcc)
    1941          86 :       DEALLOCATE (prec_vv)
    1942          86 :       DEALLOCATE (siginvTFTsiginv)
    1943          86 :       DEALLOCATE (STsiginv_0)
    1944          86 :       DEALLOCATE (FTsiginv)
    1945          86 :       DEALLOCATE (ST)
    1946          86 :       DEALLOCATE (prev_grad)
    1947          86 :       DEALLOCATE (grad)
    1948          86 :       DEALLOCATE (prev_step)
    1949          86 :       DEALLOCATE (step)
    1950          86 :       DEALLOCATE (prev_minus_prec_grad)
    1951          86 :       DEALLOCATE (m_sig_sqrti_ii)
    1952             : 
    1953         684 :       DEALLOCATE (domain_r_down)
    1954         684 :       DEALLOCATE (bad_modes_projector_down)
    1955             : 
    1956          86 :       DEALLOCATE (penalty_occ_vol_g_prefactor)
    1957          86 :       DEALLOCATE (penalty_occ_vol_h_prefactor)
    1958          86 :       DEALLOCATE (grad_norm_spin)
    1959          86 :       DEALLOCATE (nocc)
    1960             : 
    1961          86 :       DEALLOCATE (m_theta, m_t_in_local)
    1962          86 :       IF (penalty_occ_local) THEN
    1963           0 :          DO idim0 = 1, dim_op
    1964           0 :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    1965           0 :                DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    1966           0 :                DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    1967             :             END DO
    1968             :          END DO
    1969           0 :          DEALLOCATE (op_sm_set_qs)
    1970           0 :          DEALLOCATE (op_sm_set_almo)
    1971           0 :          DEALLOCATE (weights)
    1972             :       END IF
    1973             : 
    1974          86 :       IF (.NOT. converged .AND. .NOT. optimizer%early_stopping_on) THEN
    1975           0 :          CPABORT("Optimization not converged! ")
    1976             :       END IF
    1977             : 
    1978          86 :       CALL timestop(handle)
    1979             : 
    1980         172 :    END SUBROUTINE almo_scf_xalmo_pcg
    1981             : 
    1982             : ! **************************************************************************************************
    1983             : !> \brief Optimization of NLMOs using PCG minimizers
    1984             : !> \param qs_env ...
    1985             : !> \param optimizer   controls the optimization algorithm
    1986             : !> \param matrix_s - AO overlap (NAOs x NAOs)
    1987             : !> \param matrix_mo_in - initial MOs (NAOs x NMOs)
    1988             : !> \param matrix_mo_out - final MOs (NAOs x NMOs)
    1989             : !> \param template_matrix_sigma - template (NMOs x NMOs)
    1990             : !> \param overlap_determinant - the determinant of the MOs overlap
    1991             : !> \param mat_distr_aos - info on the distribution of AOs
    1992             : !> \param virtuals ...
    1993             : !> \param eps_filter ...
    1994             : !> \par History
    1995             : !>       2018.10 created [Rustam Z Khaliullin]
    1996             : !> \author Rustam Z Khaliullin
    1997             : ! **************************************************************************************************
    1998           8 :    SUBROUTINE almo_scf_construct_nlmos(qs_env, optimizer, &
    1999             :                                        matrix_s, matrix_mo_in, matrix_mo_out, &
    2000             :                                        template_matrix_sigma, overlap_determinant, &
    2001             :                                        mat_distr_aos, virtuals, eps_filter)
    2002             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2003             :       TYPE(optimizer_options_type), INTENT(INOUT)        :: optimizer
    2004             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_s
    2005             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
    2006             :          INTENT(INOUT)                                   :: matrix_mo_in, matrix_mo_out
    2007             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
    2008             :          INTENT(IN)                                      :: template_matrix_sigma
    2009             :       REAL(KIND=dp), INTENT(INOUT)                       :: overlap_determinant
    2010             :       INTEGER, INTENT(IN)                                :: mat_distr_aos
    2011             :       LOGICAL, INTENT(IN)                                :: virtuals
    2012             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    2013             : 
    2014             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_construct_nlmos'
    2015             : 
    2016             :       CHARACTER(LEN=30)                                  :: iter_type, print_string
    2017             :       INTEGER :: cg_iteration, dim_op, handle, iatom, idim0, isgf, ispin, iteration, &
    2018             :          line_search_iteration, linear_search_type, max_iter, natom, ncol, nspins, &
    2019             :          outer_iteration, outer_max_iter, prec_type, reim, unit_nr
    2020          16 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_sgf, last_sgf, nocc, nsgf
    2021             :       LOGICAL                                            :: converged, d_bfgs, just_started, l_bfgs, &
    2022             :                                                             line_search, outer_prepare_to_exit, &
    2023             :                                                             prepare_to_exit, reset_conjugator
    2024             :       REAL(KIND=dp) :: appr_sec_der, beta, bfgs_rho, bfgs_sum, denom, denom2, e0, e1, g0, g0sign, &
    2025             :          g1, g1sign, grad_norm, line_search_error, localization_obj_function, &
    2026             :          localization_obj_function_ispin, next_step_size_guess, obj_function_ispin, objf_diff, &
    2027             :          objf_new, objf_old, penalty_amplitude, penalty_func_ispin, penalty_func_new, spin_factor, &
    2028             :          step_size, t1, t2, tempreal
    2029           8 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: diagonal, grad_norm_spin, &
    2030           8 :                                                             penalty_vol_prefactor, &
    2031           8 :                                                             suggested_vol_penalty, weights
    2032             :       TYPE(cell_type), POINTER                           :: cell
    2033             :       TYPE(cp_logger_type), POINTER                      :: logger
    2034           8 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: qs_matrix_s
    2035           8 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set_almo, op_sm_set_qs
    2036           8 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: approx_inv_hessian, bfgs_s, bfgs_y, grad, &
    2037           8 :          m_S0, m_sig_sqrti_ii, m_siginv, m_sigma, m_t_mo_local, m_theta, m_theta_normalized, &
    2038           8 :          prev_grad, prev_m_theta, prev_minus_prec_grad, prev_step, step, tempNOcc1, tempOccOcc1, &
    2039           8 :          tempOccOcc2, tempOccOcc3
    2040           8 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :, :)  :: m_B0
    2041          24 :       TYPE(lbfgs_history_type)                           :: nlmo_lbfgs_history
    2042             :       TYPE(mp_comm_type)                                 :: group
    2043           8 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    2044           8 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    2045             : 
    2046           8 :       CALL timeset(routineN, handle)
    2047             : 
    2048             :       ! get a useful output_unit
    2049           8 :       logger => cp_get_default_logger()
    2050           8 :       IF (logger%para_env%is_source()) THEN
    2051           4 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    2052             :       ELSE
    2053             :          unit_nr = -1
    2054             :       END IF
    2055             : 
    2056           8 :       nspins = SIZE(matrix_mo_in)
    2057             : 
    2058           8 :       IF (unit_nr > 0) THEN
    2059           4 :          WRITE (unit_nr, *)
    2060           4 :          IF (.NOT. virtuals) THEN
    2061           4 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 24), &
    2062           8 :                " Optimization of occupied NLMOs ", REPEAT("-", 23)
    2063             :          ELSE
    2064           0 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 24), &
    2065           0 :                " Optimization of virtual NLMOs ", REPEAT("-", 24)
    2066             :          END IF
    2067           4 :          WRITE (unit_nr, *)
    2068           4 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
    2069           8 :             "Objective Function", "Change", "Convergence", "Time"
    2070           4 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
    2071             :       END IF
    2072             : 
    2073           8 :       NULLIFY (particle_set)
    2074             : 
    2075             :       CALL get_qs_env(qs_env=qs_env, &
    2076             :                       matrix_s=qs_matrix_s, &
    2077             :                       cell=cell, &
    2078             :                       particle_set=particle_set, &
    2079           8 :                       qs_kind_set=qs_kind_set)
    2080             : 
    2081           8 :       natom = SIZE(particle_set, 1)
    2082          24 :       ALLOCATE (first_sgf(natom))
    2083          16 :       ALLOCATE (last_sgf(natom))
    2084          16 :       ALLOCATE (nsgf(natom))
    2085             :       !   construction of
    2086             :       CALL get_particle_set(particle_set, qs_kind_set, &
    2087           8 :                             first_sgf=first_sgf, last_sgf=last_sgf, nsgf=nsgf)
    2088             : 
    2089             :       ! m_theta contains a set of variational parameters
    2090             :       ! that define one-electron orbitals
    2091          32 :       ALLOCATE (m_theta(nspins))
    2092          16 :       DO ispin = 1, nspins
    2093             :          CALL dbcsr_create(m_theta(ispin), &
    2094             :                            template=template_matrix_sigma(ispin), &
    2095           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2096             :          ! create initial guess for the main variable - identity matrix
    2097           8 :          CALL dbcsr_set(m_theta(ispin), 0.0_dp)
    2098          16 :          CALL dbcsr_add_on_diag(m_theta(ispin), 1.0_dp)
    2099             :       END DO
    2100             : 
    2101           8 :       SELECT CASE (optimizer%opt_penalty%operator_type)
    2102             :       CASE (op_loc_berry)
    2103             : 
    2104           0 :          IF (cell%orthorhombic) THEN
    2105           0 :             dim_op = 3
    2106             :          ELSE
    2107           0 :             dim_op = 6
    2108             :          END IF
    2109           0 :          ALLOCATE (weights(6))
    2110           0 :          weights = 0.0_dp
    2111           0 :          CALL initialize_weights(cell, weights)
    2112           0 :          ALLOCATE (op_sm_set_qs(2, dim_op))
    2113           0 :          ALLOCATE (op_sm_set_almo(2, dim_op))
    2114             :          ! allocate space for T0^t.B.T0
    2115           0 :          ALLOCATE (m_B0(2, dim_op, nspins))
    2116           0 :          DO idim0 = 1, dim_op
    2117           0 :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    2118           0 :                NULLIFY (op_sm_set_qs(reim, idim0)%matrix, op_sm_set_almo(reim, idim0)%matrix)
    2119           0 :                ALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2120           0 :                ALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2121             :                CALL dbcsr_copy(op_sm_set_qs(reim, idim0)%matrix, qs_matrix_s(1)%matrix, &
    2122           0 :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    2123           0 :                CALL dbcsr_set(op_sm_set_qs(reim, idim0)%matrix, 0.0_dp)
    2124             :                CALL dbcsr_copy(op_sm_set_almo(reim, idim0)%matrix, matrix_s, &
    2125           0 :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    2126           0 :                CALL dbcsr_set(op_sm_set_almo(reim, idim0)%matrix, 0.0_dp)
    2127           0 :                DO ispin = 1, nspins
    2128             :                   CALL dbcsr_create(m_B0(reim, idim0, ispin), &
    2129             :                                     template=m_theta(ispin), &
    2130           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    2131           0 :                   CALL dbcsr_set(m_B0(reim, idim0, ispin), 0.0_dp)
    2132             :                END DO
    2133             :             END DO
    2134             :          END DO
    2135             : 
    2136           0 :          CALL compute_berry_operator(qs_env, cell, op_sm_set_qs, dim_op)
    2137             : 
    2138             :       CASE (op_loc_pipek)
    2139             : 
    2140           8 :          dim_op = natom
    2141          24 :          ALLOCATE (weights(dim_op))
    2142          80 :          weights = 1.0_dp
    2143             : 
    2144         184 :          ALLOCATE (m_B0(1, dim_op, nspins))
    2145             :          !m_B0 first dim is 1 now!
    2146          88 :          DO idim0 = 1, dim_op
    2147         152 :             DO reim = 1, 1 !SIZE(op_sm_set_qs, 1)
    2148         216 :                DO ispin = 1, nspins
    2149             :                   CALL dbcsr_create(m_B0(reim, idim0, ispin), &
    2150             :                                     template=m_theta(ispin), &
    2151          72 :                                     matrix_type=dbcsr_type_no_symmetry)
    2152         144 :                   CALL dbcsr_set(m_B0(reim, idim0, ispin), 0.0_dp)
    2153             :                END DO
    2154             :             END DO
    2155             :          END DO
    2156             : 
    2157             :       END SELECT
    2158             : 
    2159             :       ! penalty amplitude adjusts the strenght of volume conservation
    2160           8 :       penalty_amplitude = optimizer%opt_penalty%penalty_strength
    2161             :       !penalty_occ_vol = ( optimizer%opt_penalty%occ_vol_method .NE. penalty_type_none )
    2162             :       !penalty_local = ( optimizer%opt_penalty%occ_loc_method .NE. penalty_type_none )
    2163             : 
    2164             :       ! preconditioner control
    2165           8 :       prec_type = optimizer%preconditioner
    2166             : 
    2167             :       ! use diagonal BFGS if preconditioner is set
    2168           8 :       d_bfgs = .FALSE.
    2169           8 :       l_bfgs = .FALSE.
    2170           8 :       IF (prec_type .NE. xalmo_prec_zero) l_bfgs = .TRUE.
    2171           8 :       IF (l_bfgs .AND. (optimizer%conjugator .NE. cg_zero)) THEN
    2172           0 :          CPABORT("Cannot use conjugators with BFGS")
    2173             :       END IF
    2174           8 :       IF (l_bfgs) THEN
    2175           8 :          CALL lbfgs_create(nlmo_lbfgs_history, nspins, nstore=10)
    2176             :       END IF
    2177             : 
    2178             :       IF (nspins == 1) THEN
    2179             :          spin_factor = 2.0_dp
    2180             :       ELSE
    2181             :          spin_factor = 1.0_dp
    2182             :       END IF
    2183             : 
    2184          24 :       ALLOCATE (grad_norm_spin(nspins))
    2185          24 :       ALLOCATE (nocc(nspins))
    2186          16 :       ALLOCATE (penalty_vol_prefactor(nspins))
    2187          16 :       ALLOCATE (suggested_vol_penalty(nspins))
    2188             : 
    2189             :       ! create a local copy of matrix_mo_in because
    2190             :       ! matrix_mo_in and matrix_mo_out can be the same matrix
    2191             :       ! we need to make sure data in matrix_mo_in is intact
    2192             :       ! after we start writing to matrix_mo_out
    2193          24 :       ALLOCATE (m_t_mo_local(nspins))
    2194          16 :       DO ispin = 1, nspins
    2195             :          CALL dbcsr_create(m_t_mo_local(ispin), &
    2196             :                            template=matrix_mo_in(ispin), &
    2197           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2198          16 :          CALL dbcsr_copy(m_t_mo_local(ispin), matrix_mo_in(ispin))
    2199             :       END DO
    2200             : 
    2201          24 :       ALLOCATE (approx_inv_hessian(nspins))
    2202          24 :       ALLOCATE (m_theta_normalized(nspins))
    2203          32 :       ALLOCATE (prev_m_theta(nspins))
    2204          24 :       ALLOCATE (m_S0(nspins))
    2205          24 :       ALLOCATE (prev_grad(nspins))
    2206          24 :       ALLOCATE (grad(nspins))
    2207          24 :       ALLOCATE (prev_step(nspins))
    2208          24 :       ALLOCATE (step(nspins))
    2209          24 :       ALLOCATE (prev_minus_prec_grad(nspins))
    2210          24 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    2211          24 :       ALLOCATE (m_sigma(nspins))
    2212          24 :       ALLOCATE (m_siginv(nspins))
    2213          32 :       ALLOCATE (tempNOcc1(nspins))
    2214          24 :       ALLOCATE (tempOccOcc1(nspins))
    2215          24 :       ALLOCATE (tempOccOcc2(nspins))
    2216          24 :       ALLOCATE (tempOccOcc3(nspins))
    2217          24 :       ALLOCATE (bfgs_y(nspins))
    2218          24 :       ALLOCATE (bfgs_s(nspins))
    2219             : 
    2220          16 :       DO ispin = 1, nspins
    2221             : 
    2222             :          ! init temporary storage
    2223             :          CALL dbcsr_create(tempNOcc1(ispin), &
    2224             :                            template=matrix_mo_out(ispin), &
    2225           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2226             :          CALL dbcsr_create(approx_inv_hessian(ispin), &
    2227             :                            template=m_theta(ispin), &
    2228           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2229             :          CALL dbcsr_create(m_theta_normalized(ispin), &
    2230             :                            template=m_theta(ispin), &
    2231           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2232             :          CALL dbcsr_create(prev_m_theta(ispin), &
    2233             :                            template=m_theta(ispin), &
    2234           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2235             :          CALL dbcsr_create(m_S0(ispin), &
    2236             :                            template=m_theta(ispin), &
    2237           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2238             :          CALL dbcsr_create(prev_grad(ispin), &
    2239             :                            template=m_theta(ispin), &
    2240           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2241             :          CALL dbcsr_create(grad(ispin), &
    2242             :                            template=m_theta(ispin), &
    2243           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2244             :          CALL dbcsr_create(prev_step(ispin), &
    2245             :                            template=m_theta(ispin), &
    2246           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2247             :          CALL dbcsr_create(step(ispin), &
    2248             :                            template=m_theta(ispin), &
    2249           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2250             :          CALL dbcsr_create(prev_minus_prec_grad(ispin), &
    2251             :                            template=m_theta(ispin), &
    2252           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2253             :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    2254             :                            template=m_theta(ispin), &
    2255           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2256             :          CALL dbcsr_create(m_sigma(ispin), &
    2257             :                            template=m_theta(ispin), &
    2258           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2259             :          CALL dbcsr_create(m_siginv(ispin), &
    2260             :                            template=m_theta(ispin), &
    2261           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2262             :          CALL dbcsr_create(tempOccOcc1(ispin), &
    2263             :                            template=m_theta(ispin), &
    2264           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2265             :          CALL dbcsr_create(tempOccOcc2(ispin), &
    2266             :                            template=m_theta(ispin), &
    2267           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2268             :          CALL dbcsr_create(tempOccOcc3(ispin), &
    2269             :                            template=m_theta(ispin), &
    2270           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2271             :          CALL dbcsr_create(bfgs_s(ispin), &
    2272             :                            template=m_theta(ispin), &
    2273           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2274             :          CALL dbcsr_create(bfgs_y(ispin), &
    2275             :                            template=m_theta(ispin), &
    2276           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2277             : 
    2278           8 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    2279           8 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    2280             : 
    2281             :          CALL dbcsr_get_info(template_matrix_sigma(ispin), &
    2282           8 :                              nfullrows_total=nocc(ispin))
    2283             : 
    2284           8 :          penalty_vol_prefactor(ispin) = -penalty_amplitude !KEEP: * spin_factor * nocc(ispin)
    2285             : 
    2286             :          ! compute m_S0=T0^t.S.T0
    2287             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2288             :                              matrix_s, &
    2289             :                              m_t_mo_local(ispin), &
    2290             :                              0.0_dp, tempNOcc1(ispin), &
    2291           8 :                              filter_eps=eps_filter)
    2292             :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2293             :                              m_t_mo_local(ispin), &
    2294             :                              tempNOcc1(ispin), &
    2295             :                              0.0_dp, m_S0(ispin), &
    2296           8 :                              filter_eps=eps_filter)
    2297             : 
    2298           8 :          SELECT CASE (optimizer%opt_penalty%operator_type)
    2299             : 
    2300             :          CASE (op_loc_berry)
    2301             : 
    2302             :             ! compute m_B0=T0^t.B.T0
    2303           0 :             DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    2304             : 
    2305           0 :                DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    2306             : 
    2307             :                   CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, &
    2308           0 :                                          op_sm_set_almo(reim, idim0)%matrix, mat_distr_aos)
    2309             : 
    2310             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2311             :                                       op_sm_set_almo(reim, idim0)%matrix, &
    2312             :                                       m_t_mo_local(ispin), &
    2313             :                                       0.0_dp, tempNOcc1(ispin), &
    2314           0 :                                       filter_eps=eps_filter)
    2315             : 
    2316             :                   CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2317             :                                       m_t_mo_local(ispin), &
    2318             :                                       tempNOcc1(ispin), &
    2319             :                                       0.0_dp, m_B0(reim, idim0, ispin), &
    2320           0 :                                       filter_eps=eps_filter)
    2321             : 
    2322           0 :                   DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2323           0 :                   DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2324             : 
    2325             :                END DO
    2326             : 
    2327             :             END DO ! end loop over idim0
    2328             : 
    2329             :          CASE (op_loc_pipek)
    2330             : 
    2331             :             ! compute m_B0=T0^t.B.T0
    2332          80 :             DO iatom = 1, natom ! this loop is over "miller" ind
    2333             : 
    2334          72 :                isgf = first_sgf(iatom)
    2335          72 :                ncol = nsgf(iatom)
    2336             : 
    2337             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2338             :                                    matrix_s, &
    2339             :                                    m_t_mo_local(ispin), &
    2340             :                                    0.0_dp, tempNOcc1(ispin), &
    2341          72 :                                    filter_eps=eps_filter)
    2342             : 
    2343             :                CALL dbcsr_multiply("T", "N", 0.5_dp, &
    2344             :                                    m_t_mo_local(ispin), &
    2345             :                                    tempNOcc1(ispin), &
    2346             :                                    0.0_dp, m_B0(1, iatom, ispin), &
    2347             :                                    first_k=isgf, last_k=isgf + ncol - 1, &
    2348          72 :                                    filter_eps=eps_filter)
    2349             : 
    2350             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2351             :                                    matrix_s, &
    2352             :                                    m_t_mo_local(ispin), &
    2353             :                                    0.0_dp, tempNOcc1(ispin), &
    2354             :                                    first_k=isgf, last_k=isgf + ncol - 1, &
    2355          72 :                                    filter_eps=eps_filter)
    2356             : 
    2357             :                CALL dbcsr_multiply("T", "N", 0.5_dp, &
    2358             :                                    m_t_mo_local(ispin), &
    2359             :                                    tempNOcc1(ispin), &
    2360             :                                    1.0_dp, m_B0(1, iatom, ispin), &
    2361          80 :                                    filter_eps=eps_filter)
    2362             : 
    2363             :             END DO ! end loop over iatom
    2364             : 
    2365             :          END SELECT
    2366             : 
    2367             :       END DO ! ispin
    2368             : 
    2369           8 :       IF (optimizer%opt_penalty%operator_type .EQ. op_loc_berry) THEN
    2370           0 :          DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    2371           0 :             DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    2372           0 :                DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2373           0 :                DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2374             :             END DO
    2375             :          END DO
    2376           0 :          DEALLOCATE (op_sm_set_qs, op_sm_set_almo)
    2377             :       END IF
    2378             : 
    2379             :       ! start the outer SCF loop
    2380           8 :       outer_max_iter = optimizer%max_iter_outer_loop
    2381           8 :       outer_prepare_to_exit = .FALSE.
    2382           8 :       outer_iteration = 0
    2383           8 :       grad_norm = 0.0_dp
    2384           8 :       penalty_func_new = 0.0_dp
    2385           8 :       linear_search_type = 1 ! safe restart, no quadratic assumption, takes more steps
    2386             :       localization_obj_function = 0.0_dp
    2387             :       penalty_func_new = 0.0_dp
    2388             : 
    2389             :       DO
    2390             : 
    2391             :          ! start the inner SCF loop
    2392           8 :          max_iter = optimizer%max_iter
    2393           8 :          prepare_to_exit = .FALSE.
    2394           8 :          line_search = .FALSE.
    2395           8 :          converged = .FALSE.
    2396           8 :          iteration = 0
    2397           8 :          cg_iteration = 0
    2398           8 :          line_search_iteration = 0
    2399           8 :          obj_function_ispin = 0.0_dp
    2400           8 :          objf_new = 0.0_dp
    2401           8 :          objf_old = 0.0_dp
    2402           8 :          objf_diff = 0.0_dp
    2403           8 :          line_search_error = 0.0_dp
    2404           8 :          t1 = m_walltime()
    2405           8 :          next_step_size_guess = 0.0_dp
    2406             : 
    2407             :          DO
    2408             : 
    2409          82 :             just_started = (iteration .EQ. 0) .AND. (outer_iteration .EQ. 0)
    2410             : 
    2411         164 :             DO ispin = 1, nspins
    2412             : 
    2413          82 :                CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), group=group)
    2414             : 
    2415             :                ! compute diagonal (a^t.sigma0.a)^(-1/2)
    2416             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2417             :                                    m_S0(ispin), m_theta(ispin), 0.0_dp, &
    2418             :                                    tempOccOcc1(ispin), &
    2419          82 :                                    filter_eps=eps_filter)
    2420          82 :                CALL dbcsr_set(m_sig_sqrti_ii(ispin), 0.0_dp)
    2421          82 :                CALL dbcsr_add_on_diag(m_sig_sqrti_ii(ispin), 1.0_dp)
    2422             :                CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2423             :                                    m_theta(ispin), tempOccOcc1(ispin), 0.0_dp, &
    2424             :                                    m_sig_sqrti_ii(ispin), &
    2425          82 :                                    retain_sparsity=.TRUE.)
    2426         246 :                ALLOCATE (diagonal(nocc(ispin)))
    2427          82 :                CALL dbcsr_get_diag(m_sig_sqrti_ii(ispin), diagonal)
    2428          82 :                CALL group%sum(diagonal)
    2429             :                ! TODO: works for zero diagonal elements?
    2430        1368 :                diagonal(:) = 1.0_dp/SQRT(diagonal(:))
    2431          82 :                CALL dbcsr_set(m_sig_sqrti_ii(ispin), 0.0_dp)
    2432          82 :                CALL dbcsr_set_diag(m_sig_sqrti_ii(ispin), diagonal)
    2433          82 :                DEALLOCATE (diagonal)
    2434             : 
    2435             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2436             :                                    m_theta(ispin), &
    2437             :                                    m_sig_sqrti_ii(ispin), &
    2438             :                                    0.0_dp, m_theta_normalized(ispin), &
    2439          82 :                                    filter_eps=eps_filter)
    2440             : 
    2441             :                ! compute new orbitals
    2442             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2443             :                                    m_t_mo_local(ispin), &
    2444             :                                    m_theta_normalized(ispin), &
    2445             :                                    0.0_dp, matrix_mo_out(ispin), &
    2446         246 :                                    filter_eps=eps_filter)
    2447             : 
    2448             :             END DO
    2449             : 
    2450             :             ! compute objective function
    2451          82 :             localization_obj_function = 0.0_dp
    2452          82 :             penalty_func_new = 0.0_dp
    2453         164 :             DO ispin = 1, nspins
    2454             : 
    2455             :                CALL compute_obj_nlmos( &
    2456             :                   !obj_function_ispin=obj_function_ispin, &
    2457             :                   localization_obj_function_ispin=localization_obj_function_ispin, &
    2458             :                   penalty_func_ispin=penalty_func_ispin, &
    2459             :                   overlap_determinant=overlap_determinant, &
    2460             :                   m_sigma=m_sigma(ispin), &
    2461             :                   nocc=nocc(ispin), &
    2462             :                   m_B0=m_B0(:, :, ispin), &
    2463             :                   m_theta_normalized=m_theta_normalized(ispin), &
    2464             :                   template_matrix_mo=matrix_mo_out(ispin), &
    2465             :                   weights=weights, &
    2466             :                   m_S0=m_S0(ispin), &
    2467             :                   just_started=just_started, &
    2468             :                   penalty_vol_prefactor=penalty_vol_prefactor(ispin), &
    2469             :                   penalty_amplitude=penalty_amplitude, &
    2470          82 :                   eps_filter=eps_filter)
    2471             : 
    2472          82 :                localization_obj_function = localization_obj_function + localization_obj_function_ispin
    2473         164 :                penalty_func_new = penalty_func_new + penalty_func_ispin
    2474             : 
    2475             :             END DO ! ispin
    2476          82 :             objf_new = penalty_func_new + localization_obj_function
    2477             : 
    2478         164 :             DO ispin = 1, nspins
    2479             :                ! save the previous gradient to compute beta
    2480             :                ! do it only if the previous grad was computed
    2481             :                ! for .NOT.line_search
    2482         164 :                IF (line_search_iteration .EQ. 0 .AND. iteration .NE. 0) THEN
    2483          30 :                   CALL dbcsr_copy(prev_grad(ispin), grad(ispin))
    2484             :                END IF
    2485             : 
    2486             :             END DO ! ispin
    2487             : 
    2488             :             ! compute the gradient
    2489         164 :             DO ispin = 1, nspins
    2490             : 
    2491             :                CALL invert_Hotelling( &
    2492             :                   matrix_inverse=m_siginv(ispin), &
    2493             :                   matrix=m_sigma(ispin), &
    2494             :                   threshold=eps_filter*10.0_dp, &
    2495             :                   filter_eps=eps_filter, &
    2496          82 :                   silent=.FALSE.)
    2497             : 
    2498             :                CALL compute_gradient_nlmos( &
    2499             :                   m_grad_out=grad(ispin), &
    2500             :                   m_B0=m_B0(:, :, ispin), &
    2501             :                   weights=weights, &
    2502             :                   m_S0=m_S0(ispin), &
    2503             :                   m_theta_normalized=m_theta_normalized(ispin), &
    2504             :                   m_siginv=m_siginv(ispin), &
    2505             :                   m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    2506             :                   penalty_vol_prefactor=penalty_vol_prefactor(ispin), &
    2507             :                   eps_filter=eps_filter, &
    2508         164 :                   suggested_vol_penalty=suggested_vol_penalty(ispin))
    2509             : 
    2510             :             END DO ! ispin
    2511             : 
    2512             :             ! check convergence and other exit criteria
    2513         164 :             DO ispin = 1, nspins
    2514         164 :                grad_norm_spin(ispin) = dbcsr_maxabs(grad(ispin))
    2515             :             END DO ! ispin
    2516         246 :             grad_norm = MAXVAL(grad_norm_spin)
    2517             : 
    2518          82 :             converged = (grad_norm .LE. optimizer%eps_error)
    2519          82 :             IF (converged .OR. (iteration .GE. max_iter)) THEN
    2520             :                prepare_to_exit = .TRUE.
    2521             :             END IF
    2522             : 
    2523             :             ! it is not time to exit just yet
    2524          74 :             IF (.NOT. prepare_to_exit) THEN
    2525             : 
    2526             :                ! check the gradient along the step direction
    2527             :                ! and decide whether to switch to the line-search mode
    2528             :                ! do not do this in the first iteration
    2529          74 :                IF (iteration .NE. 0) THEN
    2530             : 
    2531             :                   ! enforce at least one line search
    2532             :                   ! without even checking the error
    2533          68 :                   IF (.NOT. line_search) THEN
    2534             : 
    2535          30 :                      line_search = .TRUE.
    2536          30 :                      line_search_iteration = line_search_iteration + 1
    2537             : 
    2538             :                   ELSE
    2539             : 
    2540             :                      ! check the line-search error and decide whether to
    2541             :                      ! change the direction
    2542             :                      line_search_error = 0.0_dp
    2543             :                      denom = 0.0_dp
    2544             :                      denom2 = 0.0_dp
    2545             : 
    2546          76 :                      DO ispin = 1, nspins
    2547             : 
    2548          38 :                         CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2549          38 :                         line_search_error = line_search_error + tempreal
    2550          38 :                         CALL dbcsr_dot(grad(ispin), grad(ispin), tempreal)
    2551          38 :                         denom = denom + tempreal
    2552          38 :                         CALL dbcsr_dot(step(ispin), step(ispin), tempreal)
    2553          76 :                         denom2 = denom2 + tempreal
    2554             : 
    2555             :                      END DO ! ispin
    2556             : 
    2557             :                      ! cosine of the angle between the step and grad
    2558             :                      ! (must be close to zero at convergence)
    2559          38 :                      line_search_error = line_search_error/SQRT(denom)/SQRT(denom2)
    2560             : 
    2561          38 :                      IF (ABS(line_search_error) .GT. optimizer%lin_search_eps_error) THEN
    2562          14 :                         line_search = .TRUE.
    2563          14 :                         line_search_iteration = line_search_iteration + 1
    2564             :                      ELSE
    2565             :                         line_search = .FALSE.
    2566             :                         line_search_iteration = 0
    2567             :                      END IF
    2568             : 
    2569             :                   END IF
    2570             : 
    2571             :                END IF ! iteration.ne.0
    2572             : 
    2573           6 :                IF (line_search) THEN
    2574          44 :                   objf_diff = 0.0_dp
    2575             :                ELSE
    2576          30 :                   objf_diff = objf_new - objf_old
    2577          30 :                   objf_old = objf_new
    2578             :                END IF
    2579             : 
    2580             :                ! update the step direction
    2581          74 :                IF (.NOT. line_search) THEN
    2582             : 
    2583          60 :                   cg_iteration = cg_iteration + 1
    2584             : 
    2585             :                   ! save the previous step
    2586          60 :                   DO ispin = 1, nspins
    2587          60 :                      CALL dbcsr_copy(prev_step(ispin), step(ispin))
    2588             :                   END DO ! ispin
    2589             : 
    2590             :                   ! compute the new step:
    2591             :                   ! if available use second derivative info - bfgs, hessian, preconditioner
    2592          30 :                   IF (prec_type .EQ. xalmo_prec_zero) THEN ! no second derivatives
    2593             : 
    2594             :                      ! no preconditioner
    2595           0 :                      DO ispin = 1, nspins
    2596             : 
    2597           0 :                         CALL dbcsr_copy(step(ispin), grad(ispin))
    2598           0 :                         CALL dbcsr_scale(step(ispin), -1.0_dp)
    2599             : 
    2600             :                      END DO ! ispin
    2601             : 
    2602             :                   ELSE ! use second derivatives
    2603             : 
    2604             :                      ! compute and invert hessian/precond?
    2605          30 :                      IF (iteration .EQ. 0) THEN
    2606             : 
    2607             :                         IF (d_bfgs) THEN
    2608             : 
    2609             :                            ! create matrix filled with 1.0 here
    2610             :                            CALL fill_matrix_with_ones(approx_inv_hessian(1))
    2611             :                            IF (nspins .GT. 1) THEN
    2612             :                               DO ispin = 2, nspins
    2613             :                                  CALL dbcsr_copy(approx_inv_hessian(ispin), approx_inv_hessian(1))
    2614             :                               END DO
    2615             :                            END IF
    2616             : 
    2617           6 :                         ELSE IF (l_bfgs) THEN
    2618             : 
    2619           6 :                            CALL lbfgs_seed(nlmo_lbfgs_history, m_theta, grad)
    2620          12 :                            DO ispin = 1, nspins
    2621           6 :                               CALL dbcsr_copy(step(ispin), grad(ispin))
    2622          12 :                               CALL dbcsr_scale(step(ispin), -1.0_dp)
    2623             :                            END DO ! ispin
    2624             : 
    2625             :                         ELSE
    2626             : 
    2627             :                            ! computing preconditioner
    2628           0 :                            DO ispin = 1, nspins
    2629             : 
    2630             :                               ! TODO: write preconditioner code later
    2631             :                               ! For now, create matrix filled with 1.0 here
    2632           0 :                               CALL fill_matrix_with_ones(approx_inv_hessian(ispin))
    2633             :                               !CALL compute_preconditioner(&
    2634             :                               !       m_prec_out=approx_hessian(ispin),&
    2635             :                               !       m_ks=almo_scf_env%matrix_ks(ispin),&
    2636             :                               !       m_s=matrix_s,&
    2637             :                               !       m_siginv=almo_scf_env%template_matrix_sigma(ispin),&
    2638             :                               !       m_quench_t=quench_t(ispin),&
    2639             :                               !       m_FTsiginv=FTsiginv(ispin),&
    2640             :                               !       m_siginvTFTsiginv=siginvTFTsiginv(ispin),&
    2641             :                               !       m_ST=ST(ispin),&
    2642             :                               !       para_env=almo_scf_env%para_env,&
    2643             :                               !       blacs_env=almo_scf_env%blacs_env,&
    2644             :                               !       nocc_of_domain=almo_scf_env%nocc_of_domain(:,ispin),&
    2645             :                               !       domain_s_inv=almo_scf_env%domain_s_inv(:,ispin),&
    2646             :                               !       domain_r_down=domain_r_down(:,ispin),&
    2647             :                               !       cpu_of_domain=almo_scf_env%cpu_of_domain,&
    2648             :                               !       domain_map=almo_scf_env%domain_map(ispin),&
    2649             :                               !       assume_t0_q0x=assume_t0_q0x,&
    2650             :                               !       penalty_occ_vol=penalty_occ_vol,&
    2651             :                               !       penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin),&
    2652             :                               !       eps_filter=eps_filter,&
    2653             :                               !       neg_thr=0.5_dp,&
    2654             :                               !       spin_factor=spin_factor,&
    2655             :                               !       special_case=my_special_case)
    2656             :                               !CALL invert hessian
    2657             :                            END DO ! ispin
    2658             : 
    2659             :                         END IF
    2660             : 
    2661             :                      ELSE ! not iteration zero
    2662             : 
    2663             :                         ! update approx inverse hessian
    2664             :                         IF (d_bfgs) THEN ! diagonal BFGS
    2665             : 
    2666             :                            DO ispin = 1, nspins
    2667             : 
    2668             :                               ! compute s and y
    2669             :                               CALL dbcsr_copy(bfgs_y(ispin), grad(ispin))
    2670             :                               CALL dbcsr_add(bfgs_y(ispin), prev_grad(ispin), 1.0_dp, -1.0_dp)
    2671             :                               CALL dbcsr_copy(bfgs_s(ispin), m_theta(ispin))
    2672             :                               CALL dbcsr_add(bfgs_s(ispin), prev_m_theta(ispin), 1.0_dp, -1.0_dp)
    2673             : 
    2674             :                               ! compute rho
    2675             :                               CALL dbcsr_dot(grad(ispin), step(ispin), bfgs_rho)
    2676             :                               bfgs_rho = 1.0_dp/bfgs_rho
    2677             : 
    2678             :                               ! compute the sum of the squared elements of bfgs_y
    2679             :                               CALL dbcsr_dot(bfgs_y(ispin), bfgs_y(ispin), bfgs_sum)
    2680             : 
    2681             :                               ! first term: start collecting new inv hessian in this temp matrix
    2682             :                               CALL dbcsr_copy(tempOccOcc2(ispin), approx_inv_hessian(ispin))
    2683             : 
    2684             :                               ! second term: + rho * s * s
    2685             :                               CALL dbcsr_hadamard_product(bfgs_s(ispin), bfgs_s(ispin), tempOccOcc1(ispin))
    2686             :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc1(ispin), 1.0_dp, bfgs_rho)
    2687             : 
    2688             :                               ! third term: + rho^2 * s * s * H * sum_(y * y)
    2689             :                               CALL dbcsr_hadamard_product(tempOccOcc1(ispin), &
    2690             :                                                           approx_inv_hessian(ispin), tempOccOcc3(ispin))
    2691             :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc3(ispin), &
    2692             :                                              1.0_dp, bfgs_rho*bfgs_rho*bfgs_sum)
    2693             : 
    2694             :                               ! fourth term: - 2 * rho * s * y * H
    2695             :                               CALL dbcsr_hadamard_product(bfgs_y(ispin), &
    2696             :                                                           approx_inv_hessian(ispin), tempOccOcc1(ispin))
    2697             :                               CALL dbcsr_hadamard_product(bfgs_s(ispin), tempOccOcc1(ispin), tempOccOcc3(ispin))
    2698             :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc3(ispin), &
    2699             :                                              1.0_dp, -2.0_dp*bfgs_rho)
    2700             : 
    2701             :                               CALL dbcsr_copy(approx_inv_hessian(ispin), tempOccOcc2(ispin))
    2702             : 
    2703             :                            END DO
    2704             : 
    2705          24 :                         ELSE IF (l_bfgs) THEN
    2706             : 
    2707          24 :                            CALL lbfgs_get_direction(nlmo_lbfgs_history, m_theta, grad, step)
    2708             : 
    2709             :                         END IF ! which method?
    2710             : 
    2711             :                      END IF ! compute approximate inverse hessian
    2712             : 
    2713          30 :                      IF (.NOT. l_bfgs) THEN
    2714             : 
    2715           0 :                         DO ispin = 1, nspins
    2716             : 
    2717             :                            CALL dbcsr_hadamard_product(approx_inv_hessian(ispin), &
    2718           0 :                                                        grad(ispin), step(ispin))
    2719           0 :                            CALL dbcsr_scale(step(ispin), -1.0_dp)
    2720             : 
    2721             :                         END DO ! ispin
    2722             : 
    2723             :                      END IF
    2724             : 
    2725             :                   END IF ! second derivative type fork
    2726             : 
    2727             :                   ! check whether we need to reset conjugate directions
    2728          30 :                   IF (iteration .EQ. 0) THEN
    2729           6 :                      reset_conjugator = .TRUE.
    2730             :                   END IF
    2731             : 
    2732             :                   ! compute the conjugation coefficient - beta
    2733          30 :                   IF (.NOT. reset_conjugator) THEN
    2734             :                      CALL compute_cg_beta( &
    2735             :                         beta=beta, &
    2736             :                         reset_conjugator=reset_conjugator, &
    2737             :                         conjugator=optimizer%conjugator, &
    2738             :                         grad=grad(:), &
    2739             :                         prev_grad=prev_grad(:), &
    2740             :                         step=step(:), &
    2741             :                         prev_step=prev_step(:), &
    2742             :                         prev_minus_prec_grad=prev_minus_prec_grad(:) &
    2743          24 :                         )
    2744             : 
    2745             :                   END IF
    2746             : 
    2747          30 :                   IF (reset_conjugator) THEN
    2748             : 
    2749           6 :                      beta = 0.0_dp
    2750           6 :                      IF (unit_nr > 0 .AND. (.NOT. just_started)) THEN
    2751           0 :                         WRITE (unit_nr, '(T2,A35)') "Re-setting conjugator to zero"
    2752             :                      END IF
    2753           6 :                      reset_conjugator = .FALSE.
    2754             : 
    2755             :                   END IF
    2756             : 
    2757             :                   ! save the preconditioned gradient (useful for beta)
    2758          60 :                   DO ispin = 1, nspins
    2759             : 
    2760          30 :                      CALL dbcsr_copy(prev_minus_prec_grad(ispin), step(ispin))
    2761             : 
    2762             :                      ! conjugate the step direction
    2763          60 :                      CALL dbcsr_add(step(ispin), prev_step(ispin), 1.0_dp, beta)
    2764             : 
    2765             :                   END DO ! ispin
    2766             : 
    2767             :                END IF ! update the step direction
    2768             : 
    2769             :                ! estimate the step size
    2770          74 :                IF (.NOT. line_search) THEN
    2771             :                   ! we just changed the direction and
    2772             :                   ! we have only E and grad from the current step
    2773             :                   ! it is not enough to compute step_size - just guess it
    2774          30 :                   e0 = objf_new
    2775          30 :                   g0 = 0.0_dp
    2776          60 :                   DO ispin = 1, nspins
    2777          30 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2778          60 :                      g0 = g0 + tempreal
    2779             :                   END DO ! ispin
    2780             :                   g0sign = SIGN(1.0_dp, g0) ! sign of g0
    2781             :                   IF (linear_search_type .EQ. 1) THEN ! this is quadratic LS
    2782          30 :                      IF (iteration .EQ. 0) THEN
    2783           6 :                         step_size = optimizer%lin_search_step_size_guess
    2784             :                      ELSE
    2785          24 :                         IF (next_step_size_guess .LE. 0.0_dp) THEN
    2786           0 :                            step_size = optimizer%lin_search_step_size_guess
    2787             :                         ELSE
    2788             :                            ! take the last value
    2789          24 :                            step_size = optimizer%lin_search_step_size_guess
    2790             :                            !step_size = next_step_size_guess*1.05_dp
    2791             :                         END IF
    2792             :                      END IF
    2793             :                   ELSE IF (linear_search_type .EQ. 2) THEN ! this is cautious LS
    2794             :                      ! this LS type is designed not to trust quadratic appr
    2795             :                      ! so it always restarts from a safe step size
    2796             :                      step_size = optimizer%lin_search_step_size_guess
    2797             :                   END IF
    2798          30 :                   IF (unit_nr > 0) THEN
    2799          15 :                      WRITE (unit_nr, '(T21,3A19)') "Line position", "Line grad", "Next line step"
    2800          15 :                      WRITE (unit_nr, '(T2,A19,3F19.5)') "Line search", 0.0_dp, g0, step_size
    2801             :                   END IF
    2802          30 :                   next_step_size_guess = step_size
    2803             :                ELSE ! this is not the first line search
    2804          44 :                   e1 = objf_new
    2805          44 :                   g1 = 0.0_dp
    2806          88 :                   DO ispin = 1, nspins
    2807          44 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2808          88 :                      g1 = g1 + tempreal
    2809             :                   END DO ! ispin
    2810          44 :                   g1sign = SIGN(1.0_dp, g1) ! sign of g1
    2811             :                   IF (linear_search_type .EQ. 1) THEN
    2812             :                      ! we have accumulated some points along this direction
    2813             :                      ! use only the most recent g0 (quadratic approximation)
    2814          44 :                      appr_sec_der = (g1 - g0)/step_size
    2815             :                      !IF (unit_nr > 0) THEN
    2816             :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    2817             :                      !      "DT", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    2818             :                      !ENDIF
    2819          44 :                      step_size = -g1/appr_sec_der
    2820             :                   ELSE IF (linear_search_type .EQ. 2) THEN
    2821             :                      ! alternative method for finding step size
    2822             :                      ! do not use quadratic approximation, only gradient signs
    2823             :                      IF (g1sign .NE. g0sign) THEN
    2824             :                         step_size = -step_size/2.0; 
    2825             :                      ELSE
    2826             :                         step_size = step_size*1.5; 
    2827             :                      END IF
    2828             :                   END IF
    2829             :                   ! end alternative LS types
    2830          44 :                   IF (unit_nr > 0) THEN
    2831          22 :                      WRITE (unit_nr, '(T21,3A19)') "Line position", "Line grad", "Next line step"
    2832          22 :                      WRITE (unit_nr, '(T2,A19,3F19.5)') "Line search", next_step_size_guess, g1, step_size
    2833             :                   END IF
    2834          44 :                   e0 = e1
    2835          44 :                   g0 = g1
    2836             :                   g0sign = g1sign
    2837          44 :                   next_step_size_guess = next_step_size_guess + step_size
    2838             :                END IF
    2839             : 
    2840             :                ! update theta
    2841         148 :                DO ispin = 1, nspins
    2842          74 :                   IF (.NOT. line_search) THEN ! we prepared to perform the first line search
    2843             :                      ! "previous" refers to the previous CG step, not the previous LS step
    2844          30 :                      CALL dbcsr_copy(prev_m_theta(ispin), m_theta(ispin))
    2845             :                   END IF
    2846         148 :                   CALL dbcsr_add(m_theta(ispin), step(ispin), 1.0_dp, step_size)
    2847             :                END DO ! ispin
    2848             : 
    2849             :             END IF ! not.prepare_to_exit
    2850             : 
    2851          82 :             IF (line_search) THEN
    2852          50 :                iter_type = "LS"
    2853             :             ELSE
    2854          32 :                iter_type = "CG"
    2855             :             END IF
    2856             : 
    2857          82 :             t2 = m_walltime()
    2858          82 :             IF (unit_nr > 0) THEN
    2859          41 :                iter_type = TRIM("NLMO OPT "//iter_type)
    2860             :                WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') &
    2861          41 :                   iter_type, iteration, &
    2862          41 :                   objf_new, objf_diff, grad_norm, &
    2863          82 :                   t2 - t1
    2864             :                WRITE (unit_nr, '(T2,A19,F23.10)') &
    2865          41 :                   "Localization:", localization_obj_function
    2866             :                WRITE (unit_nr, '(T2,A19,F23.10)') &
    2867          41 :                   "Orthogonalization:", penalty_func_new
    2868             :             END IF
    2869          82 :             t1 = m_walltime()
    2870             : 
    2871          82 :             iteration = iteration + 1
    2872          82 :             IF (prepare_to_exit) EXIT
    2873             : 
    2874             :          END DO ! inner loop
    2875             : 
    2876           8 :          IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
    2877           8 :             outer_prepare_to_exit = .TRUE.
    2878             :          END IF
    2879             : 
    2880           8 :          outer_iteration = outer_iteration + 1
    2881           8 :          IF (outer_prepare_to_exit) EXIT
    2882             : 
    2883             :       END DO ! outer loop
    2884             : 
    2885             :       ! return the optimal determinant penalty
    2886           8 :       optimizer%opt_penalty%penalty_strength = 0.0_dp
    2887          16 :       DO ispin = 1, nspins
    2888             :          optimizer%opt_penalty%penalty_strength = optimizer%opt_penalty%penalty_strength + &
    2889          16 :                                                   (-1.0_dp)*penalty_vol_prefactor(ispin)
    2890             :       END DO
    2891           8 :       optimizer%opt_penalty%penalty_strength = optimizer%opt_penalty%penalty_strength/nspins
    2892             : 
    2893           8 :       IF (converged) THEN
    2894           8 :          iter_type = "Final"
    2895             :       ELSE
    2896           0 :          iter_type = "Unconverged"
    2897             :       END IF
    2898             : 
    2899           8 :       IF (unit_nr > 0) THEN
    2900           4 :          WRITE (unit_nr, '()')
    2901           4 :          print_string = TRIM(iter_type)//" localization:"
    2902             :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2903           4 :             print_string, localization_obj_function
    2904           4 :          print_string = TRIM(iter_type)//" determinant:"
    2905             :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2906           4 :             print_string, overlap_determinant
    2907           4 :          print_string = TRIM(iter_type)//" penalty strength:"
    2908             :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2909           4 :             print_string, optimizer%opt_penalty%penalty_strength
    2910             :       END IF
    2911             : 
    2912             :       ! clean up
    2913           8 :       IF (l_bfgs) THEN
    2914           8 :          CALL lbfgs_release(nlmo_lbfgs_history)
    2915             :       END IF
    2916          16 :       DO ispin = 1, nspins
    2917          80 :          DO idim0 = 1, SIZE(m_B0, 2)
    2918         152 :             DO reim = 1, SIZE(m_B0, 1)
    2919         144 :                CALL dbcsr_release(m_B0(reim, idim0, ispin))
    2920             :             END DO
    2921             :          END DO
    2922           8 :          CALL dbcsr_release(m_theta(ispin))
    2923           8 :          CALL dbcsr_release(m_t_mo_local(ispin))
    2924           8 :          CALL dbcsr_release(tempNOcc1(ispin))
    2925           8 :          CALL dbcsr_release(approx_inv_hessian(ispin))
    2926           8 :          CALL dbcsr_release(prev_m_theta(ispin))
    2927           8 :          CALL dbcsr_release(m_theta_normalized(ispin))
    2928           8 :          CALL dbcsr_release(m_S0(ispin))
    2929           8 :          CALL dbcsr_release(prev_grad(ispin))
    2930           8 :          CALL dbcsr_release(grad(ispin))
    2931           8 :          CALL dbcsr_release(prev_step(ispin))
    2932           8 :          CALL dbcsr_release(step(ispin))
    2933           8 :          CALL dbcsr_release(prev_minus_prec_grad(ispin))
    2934           8 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
    2935           8 :          CALL dbcsr_release(m_sigma(ispin))
    2936           8 :          CALL dbcsr_release(m_siginv(ispin))
    2937           8 :          CALL dbcsr_release(tempOccOcc1(ispin))
    2938           8 :          CALL dbcsr_release(tempOccOcc2(ispin))
    2939           8 :          CALL dbcsr_release(tempOccOcc3(ispin))
    2940           8 :          CALL dbcsr_release(bfgs_y(ispin))
    2941          16 :          CALL dbcsr_release(bfgs_s(ispin))
    2942             :       END DO ! ispin
    2943             : 
    2944           8 :       DEALLOCATE (grad_norm_spin)
    2945           8 :       DEALLOCATE (nocc)
    2946           8 :       DEALLOCATE (penalty_vol_prefactor)
    2947           8 :       DEALLOCATE (suggested_vol_penalty)
    2948             : 
    2949           8 :       DEALLOCATE (approx_inv_hessian)
    2950           8 :       DEALLOCATE (prev_m_theta)
    2951           8 :       DEALLOCATE (m_theta_normalized)
    2952           8 :       DEALLOCATE (m_S0)
    2953           8 :       DEALLOCATE (prev_grad)
    2954           8 :       DEALLOCATE (grad)
    2955           8 :       DEALLOCATE (prev_step)
    2956           8 :       DEALLOCATE (step)
    2957           8 :       DEALLOCATE (prev_minus_prec_grad)
    2958           8 :       DEALLOCATE (m_sig_sqrti_ii)
    2959           8 :       DEALLOCATE (m_sigma)
    2960           8 :       DEALLOCATE (m_siginv)
    2961           8 :       DEALLOCATE (tempNOcc1)
    2962           8 :       DEALLOCATE (tempOccOcc1)
    2963           8 :       DEALLOCATE (tempOccOcc2)
    2964           8 :       DEALLOCATE (tempOccOcc3)
    2965           8 :       DEALLOCATE (bfgs_y)
    2966           8 :       DEALLOCATE (bfgs_s)
    2967             : 
    2968           8 :       DEALLOCATE (m_theta, m_t_mo_local)
    2969           8 :       DEALLOCATE (m_B0)
    2970           8 :       DEALLOCATE (weights)
    2971           8 :       DEALLOCATE (first_sgf, last_sgf, nsgf)
    2972             : 
    2973           8 :       IF (.NOT. converged) THEN
    2974           0 :          CPABORT("Optimization not converged! ")
    2975             :       END IF
    2976             : 
    2977           8 :       CALL timestop(handle)
    2978             : 
    2979          24 :    END SUBROUTINE almo_scf_construct_nlmos
    2980             : 
    2981             : ! **************************************************************************************************
    2982             : !> \brief Analysis of the orbitals
    2983             : !> \param detailed_analysis ...
    2984             : !> \param eps_filter ...
    2985             : !> \param m_T_in ...
    2986             : !> \param m_T0_in ...
    2987             : !> \param m_siginv_in ...
    2988             : !> \param m_siginv0_in ...
    2989             : !> \param m_S_in ...
    2990             : !> \param m_KS0_in ...
    2991             : !> \param m_quench_t_in ...
    2992             : !> \param energy_out ...
    2993             : !> \param m_eda_out ...
    2994             : !> \param m_cta_out ...
    2995             : !> \par History
    2996             : !>       2017.07 created [Rustam Z Khaliullin]
    2997             : !> \author Rustam Z Khaliullin
    2998             : ! **************************************************************************************************
    2999          24 :    SUBROUTINE xalmo_analysis(detailed_analysis, eps_filter, m_T_in, m_T0_in, &
    3000          24 :                              m_siginv_in, m_siginv0_in, m_S_in, m_KS0_in, m_quench_t_in, energy_out, &
    3001          24 :                              m_eda_out, m_cta_out)
    3002             : 
    3003             :       LOGICAL, INTENT(IN)                                :: detailed_analysis
    3004             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    3005             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_T_in, m_T0_in, m_siginv_in, &
    3006             :                                                             m_siginv0_in, m_S_in, m_KS0_in, &
    3007             :                                                             m_quench_t_in
    3008             :       REAL(KIND=dp), INTENT(INOUT)                       :: energy_out
    3009             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_eda_out, m_cta_out
    3010             : 
    3011             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'xalmo_analysis'
    3012             : 
    3013             :       INTEGER                                            :: handle, ispin, nspins
    3014             :       REAL(KIND=dp)                                      :: energy_ispin, spin_factor
    3015             :       TYPE(dbcsr_type)                                   :: FTsiginv0, Fvo0, m_X, siginvTFTsiginv0, &
    3016             :                                                             ST0
    3017             : 
    3018          24 :       CALL timeset(routineN, handle)
    3019             : 
    3020          24 :       nspins = SIZE(m_T_in)
    3021             : 
    3022          24 :       IF (nspins == 1) THEN
    3023          24 :          spin_factor = 2.0_dp
    3024             :       ELSE
    3025           0 :          spin_factor = 1.0_dp
    3026             :       END IF
    3027             : 
    3028          24 :       energy_out = 0.0_dp
    3029          48 :       DO ispin = 1, nspins
    3030             : 
    3031             :          ! create temporary matrices
    3032             :          CALL dbcsr_create(Fvo0, &
    3033             :                            template=m_T_in(ispin), &
    3034          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3035             :          CALL dbcsr_create(FTsiginv0, &
    3036             :                            template=m_T_in(ispin), &
    3037          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3038             :          CALL dbcsr_create(ST0, &
    3039             :                            template=m_T_in(ispin), &
    3040          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3041             :          CALL dbcsr_create(m_X, &
    3042             :                            template=m_T_in(ispin), &
    3043          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3044             :          CALL dbcsr_create(siginvTFTsiginv0, &
    3045             :                            template=m_siginv0_in(ispin), &
    3046          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3047             : 
    3048             :          ! compute F_{virt,occ} for the zero-delocalization state
    3049             :          CALL compute_frequently_used_matrices( &
    3050             :             filter_eps=eps_filter, &
    3051             :             m_T_in=m_T0_in(ispin), &
    3052             :             m_siginv_in=m_siginv0_in(ispin), &
    3053             :             m_S_in=m_S_in(1), &
    3054             :             m_F_in=m_KS0_in(ispin), &
    3055             :             m_FTsiginv_out=FTsiginv0, &
    3056             :             m_siginvTFTsiginv_out=siginvTFTsiginv0, &
    3057          24 :             m_ST_out=ST0)
    3058          24 :          CALL dbcsr_copy(Fvo0, m_quench_t_in(ispin))
    3059          24 :          CALL dbcsr_copy(Fvo0, FTsiginv0, keep_sparsity=.TRUE.)
    3060             :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    3061             :                              ST0, &
    3062             :                              siginvTFTsiginv0, &
    3063             :                              1.0_dp, Fvo0, &
    3064          24 :                              retain_sparsity=.TRUE.)
    3065             : 
    3066             :          ! get single excitation amplitudes
    3067          24 :          CALL dbcsr_copy(m_X, m_T0_in(ispin))
    3068          24 :          CALL dbcsr_add(m_X, m_T_in(ispin), -1.0_dp, 1.0_dp)
    3069             : 
    3070          24 :          CALL dbcsr_dot(m_X, Fvo0, energy_ispin)
    3071          24 :          energy_out = energy_out + energy_ispin*spin_factor
    3072             : 
    3073          24 :          IF (detailed_analysis) THEN
    3074             : 
    3075           2 :             CALL dbcsr_hadamard_product(m_X, Fvo0, m_eda_out(ispin))
    3076           2 :             CALL dbcsr_scale(m_eda_out(ispin), spin_factor)
    3077           2 :             CALL dbcsr_filter(m_eda_out(ispin), eps_filter)
    3078             : 
    3079             :             ! first, compute [QR'R]_mu^i = [(S-SRS).X.siginv']_mu^i
    3080             :             ! a. FTsiginv0 = S.T0*siginv0
    3081             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3082             :                                 ST0, &
    3083             :                                 m_siginv0_in(ispin), &
    3084             :                                 0.0_dp, FTsiginv0, &
    3085           2 :                                 filter_eps=eps_filter)
    3086             :             ! c. tmp1(use ST0) = S.X
    3087             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3088             :                                 m_S_in(1), &
    3089             :                                 m_X, &
    3090             :                                 0.0_dp, ST0, &
    3091           2 :                                 filter_eps=eps_filter)
    3092             :             ! d. tmp2 = tr(T0).tmp1 = tr(T0).S.X
    3093             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    3094             :                                 m_T0_in(ispin), &
    3095             :                                 ST0, &
    3096             :                                 0.0_dp, siginvTFTsiginv0, &
    3097           2 :                                 filter_eps=eps_filter)
    3098             :             ! e. tmp1 = tmp1 - tmp3.tmp2 = S.X - S.T0.siginv0*tr(T0).S.X
    3099             :             !         = (1-S.R0).S.X
    3100             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    3101             :                                 FTsiginv0, &
    3102             :                                 siginvTFTsiginv0, &
    3103             :                                 1.0_dp, ST0, &
    3104           2 :                                 filter_eps=eps_filter)
    3105             :             ! f. tmp2(use FTsiginv0) = tmp1*siginv
    3106             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3107             :                                 ST0, &
    3108             :                                 m_siginv_in(ispin), &
    3109             :                                 0.0_dp, FTsiginv0, &
    3110           2 :                                 filter_eps=eps_filter)
    3111             :             ! second, compute traces of blocks [RR'Q]^x_y * [X]^y_x
    3112             :             CALL dbcsr_hadamard_product(m_X, &
    3113           2 :                                         FTsiginv0, m_cta_out(ispin))
    3114           2 :             CALL dbcsr_scale(m_cta_out(ispin), spin_factor)
    3115           2 :             CALL dbcsr_filter(m_cta_out(ispin), eps_filter)
    3116             : 
    3117             :          END IF ! do ALMO EDA/CTA
    3118             : 
    3119          24 :          CALL dbcsr_release(Fvo0)
    3120          24 :          CALL dbcsr_release(FTsiginv0)
    3121          24 :          CALL dbcsr_release(ST0)
    3122          24 :          CALL dbcsr_release(m_X)
    3123          72 :          CALL dbcsr_release(siginvTFTsiginv0)
    3124             : 
    3125             :       END DO ! ispin
    3126             : 
    3127          24 :       CALL timestop(handle)
    3128             : 
    3129          24 :    END SUBROUTINE xalmo_analysis
    3130             : 
    3131             : ! **************************************************************************************************
    3132             : !> \brief Compute matrices that are used often in various parts of the
    3133             : !>        optimization procedure
    3134             : !> \param filter_eps ...
    3135             : !> \param m_T_in ...
    3136             : !> \param m_siginv_in ...
    3137             : !> \param m_S_in ...
    3138             : !> \param m_F_in ...
    3139             : !> \param m_FTsiginv_out ...
    3140             : !> \param m_siginvTFTsiginv_out ...
    3141             : !> \param m_ST_out ...
    3142             : !> \par History
    3143             : !>       2016.12 created [Rustam Z Khaliullin]
    3144             : !> \author Rustam Z Khaliullin
    3145             : ! **************************************************************************************************
    3146        1498 :    SUBROUTINE compute_frequently_used_matrices(filter_eps, &
    3147             :                                                m_T_in, m_siginv_in, m_S_in, m_F_in, m_FTsiginv_out, &
    3148             :                                                m_siginvTFTsiginv_out, m_ST_out)
    3149             : 
    3150             :       REAL(KIND=dp), INTENT(IN)                          :: filter_eps
    3151             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_T_in, m_siginv_in, m_S_in, m_F_in
    3152             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_FTsiginv_out, m_siginvTFTsiginv_out, &
    3153             :                                                             m_ST_out
    3154             : 
    3155             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_frequently_used_matrices'
    3156             : 
    3157             :       INTEGER                                            :: handle
    3158             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_oo_1
    3159             : 
    3160        1498 :       CALL timeset(routineN, handle)
    3161             : 
    3162             :       CALL dbcsr_create(m_tmp_no_1, &
    3163             :                         template=m_T_in, &
    3164        1498 :                         matrix_type=dbcsr_type_no_symmetry)
    3165             :       CALL dbcsr_create(m_tmp_oo_1, &
    3166             :                         template=m_siginv_in, &
    3167        1498 :                         matrix_type=dbcsr_type_no_symmetry)
    3168             : 
    3169             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3170             :                           m_F_in, &
    3171             :                           m_T_in, &
    3172             :                           0.0_dp, m_tmp_no_1, &
    3173        1498 :                           filter_eps=filter_eps)
    3174             : 
    3175             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3176             :                           m_tmp_no_1, &
    3177             :                           m_siginv_in, &
    3178             :                           0.0_dp, m_FTsiginv_out, &
    3179        1498 :                           filter_eps=filter_eps)
    3180             : 
    3181             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    3182             :                           m_T_in, &
    3183             :                           m_FTsiginv_out, &
    3184             :                           0.0_dp, m_tmp_oo_1, &
    3185        1498 :                           filter_eps=filter_eps)
    3186             : 
    3187             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3188             :                           m_siginv_in, &
    3189             :                           m_tmp_oo_1, &
    3190             :                           0.0_dp, m_siginvTFTsiginv_out, &
    3191        1498 :                           filter_eps=filter_eps)
    3192             : 
    3193             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3194             :                           m_S_in, &
    3195             :                           m_T_in, &
    3196             :                           0.0_dp, m_ST_out, &
    3197        1498 :                           filter_eps=filter_eps)
    3198             : 
    3199        1498 :       CALL dbcsr_release(m_tmp_no_1)
    3200        1498 :       CALL dbcsr_release(m_tmp_oo_1)
    3201             : 
    3202        1498 :       CALL timestop(handle)
    3203             : 
    3204        1498 :    END SUBROUTINE compute_frequently_used_matrices
    3205             : 
    3206             : ! **************************************************************************************************
    3207             : !> \brief Split the matrix of virtual orbitals into two:
    3208             : !>        retained orbs and discarded
    3209             : !> \param almo_scf_env ...
    3210             : !> \par History
    3211             : !>       2011.09 created [Rustam Z Khaliullin]
    3212             : !> \author Rustam Z Khaliullin
    3213             : ! **************************************************************************************************
    3214           0 :    SUBROUTINE split_v_blk(almo_scf_env)
    3215             : 
    3216             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    3217             : 
    3218             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'split_v_blk'
    3219             : 
    3220             :       INTEGER                                            :: discarded_v, handle, iblock_col, &
    3221             :                                                             iblock_col_size, iblock_row, &
    3222             :                                                             iblock_row_size, ispin, retained_v
    3223           0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: data_p
    3224             :       TYPE(dbcsr_iterator_type)                          :: iter
    3225             : 
    3226           0 :       CALL timeset(routineN, handle)
    3227             : 
    3228           0 :       DO ispin = 1, almo_scf_env%nspins
    3229             : 
    3230             :          CALL dbcsr_work_create(almo_scf_env%matrix_v_blk(ispin), &
    3231           0 :                                 work_mutable=.TRUE.)
    3232             :          CALL dbcsr_work_create(almo_scf_env%matrix_v_disc_blk(ispin), &
    3233           0 :                                 work_mutable=.TRUE.)
    3234             : 
    3235           0 :          CALL dbcsr_iterator_start(iter, almo_scf_env%matrix_v_full_blk(ispin))
    3236             : 
    3237           0 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    3238             : 
    3239             :             CALL dbcsr_iterator_next_block(iter, iblock_row, iblock_col, data_p, &
    3240           0 :                                            row_size=iblock_row_size, col_size=iblock_col_size)
    3241             : 
    3242           0 :             IF (iblock_row .NE. iblock_col) THEN
    3243           0 :                CPABORT("off-diagonal block found")
    3244             :             END IF
    3245             : 
    3246           0 :             retained_v = almo_scf_env%nvirt_of_domain(iblock_col, ispin)
    3247           0 :             discarded_v = almo_scf_env%nvirt_disc_of_domain(iblock_col, ispin)
    3248           0 :             CPASSERT(retained_v .GT. 0)
    3249           0 :             CPASSERT(discarded_v .GT. 0)
    3250             :             CALL dbcsr_put_block(almo_scf_env%matrix_v_disc_blk(ispin), iblock_row, iblock_col, &
    3251           0 :                                  block=data_p(:, (retained_v + 1):iblock_col_size))
    3252             :             CALL dbcsr_put_block(almo_scf_env%matrix_v_blk(ispin), iblock_row, iblock_col, &
    3253           0 :                                  block=data_p(:, 1:retained_v))
    3254             : 
    3255             :          END DO ! iterator
    3256           0 :          CALL dbcsr_iterator_stop(iter)
    3257             : 
    3258           0 :          CALL dbcsr_finalize(almo_scf_env%matrix_v_blk(ispin))
    3259           0 :          CALL dbcsr_finalize(almo_scf_env%matrix_v_disc_blk(ispin))
    3260             : 
    3261             :       END DO ! ispin
    3262             : 
    3263           0 :       CALL timestop(handle)
    3264             : 
    3265           0 :    END SUBROUTINE split_v_blk
    3266             : 
    3267             : ! **************************************************************************************************
    3268             : !> \brief various methods for calculating the Harris-Foulkes correction
    3269             : !> \param almo_scf_env ...
    3270             : !> \par History
    3271             : !>       2011.06 created [Rustam Z Khaliullin]
    3272             : !> \author Rustam Z Khaliullin
    3273             : ! **************************************************************************************************
    3274           0 :    SUBROUTINE harris_foulkes_correction(almo_scf_env)
    3275             : 
    3276             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    3277             : 
    3278             :       CHARACTER(len=*), PARAMETER :: routineN = 'harris_foulkes_correction'
    3279             :       INTEGER, PARAMETER                                 :: cayley_transform = 1, dm_ls_step = 2
    3280             : 
    3281             :       INTEGER :: algorithm_id, handle, handle1, handle2, handle3, handle4, handle5, handle6, &
    3282             :          handle7, handle8, ispin, iteration, n, nmins, nspin, opt_k_max_iter, &
    3283             :          outer_opt_k_iteration, outer_opt_k_max_iter, unit_nr
    3284             :       INTEGER, DIMENSION(1)                              :: fake, nelectron_spin_real
    3285             :       LOGICAL :: converged, line_search, md_in_k_space, outer_opt_k_prepare_to_exit, &
    3286             :          prepare_to_exit, reset_conjugator, reset_step_size, use_cubic_approximation, &
    3287             :          use_quadratic_approximation
    3288             :       REAL(KIND=dp) :: aa, bb, beta, conjugacy_error, conjugacy_error_threshold, &
    3289             :          delta_obj_function, denom, energy_correction_final, frob_matrix, frob_matrix_base, fun0, &
    3290             :          fun1, gfun0, gfun1, grad_norm, grad_norm_frob, kappa, kin_energy, line_search_error, &
    3291             :          line_search_error_threshold, num_threshold, numer, obj_function, quadratic_approx_error, &
    3292             :          quadratic_approx_error_threshold, safety_multiplier, spin_factor, step_size, &
    3293             :          step_size_quadratic_approx, step_size_quadratic_approx2, t1, t1a, t1cholesky, t2, t2a, &
    3294             :          t2cholesky, tau, time_step, x_opt_eps_adaptive, x_opt_eps_adaptive_factor
    3295             :       REAL(KIND=dp), DIMENSION(1)                        :: local_mu
    3296             :       REAL(KIND=dp), DIMENSION(2)                        :: energy_correction
    3297             :       REAL(KIND=dp), DIMENSION(3)                        :: minima
    3298             :       TYPE(cp_logger_type), POINTER                      :: logger
    3299             :       TYPE(ct_step_env_type)                             :: ct_step_env
    3300             :       TYPE(dbcsr_type) :: grad, k_vd_index_down, k_vr_index_down, matrix_k_central, matrix_tmp1, &
    3301             :          matrix_tmp2, prec, prev_grad, prev_minus_prec_grad, prev_step, sigma_oo_curr, &
    3302             :          sigma_oo_curr_inv, sigma_vv_sqrt, sigma_vv_sqrt_guess, sigma_vv_sqrt_inv, &
    3303             :          sigma_vv_sqrt_inv_guess, step, t_curr, tmp1_n_vr, tmp2_n_o, tmp3_vd_vr, tmp4_o_vr, &
    3304             :          tmp_k_blk, vd_fixed, vd_index_sqrt, vd_index_sqrt_inv, velocity, vr_fixed, vr_index_sqrt, &
    3305             :          vr_index_sqrt_inv
    3306           0 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: matrix_p_almo_scf_converged
    3307             : 
    3308           0 :       CALL timeset(routineN, handle)
    3309             : 
    3310             :       ! get a useful output_unit
    3311           0 :       logger => cp_get_default_logger()
    3312           0 :       IF (logger%para_env%is_source()) THEN
    3313           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    3314             :       ELSE
    3315           0 :          unit_nr = -1
    3316             :       END IF
    3317             : 
    3318           0 :       nspin = almo_scf_env%nspins
    3319           0 :       energy_correction_final = 0.0_dp
    3320           0 :       IF (nspin .EQ. 1) THEN
    3321           0 :          spin_factor = 2.0_dp
    3322             :       ELSE
    3323           0 :          spin_factor = 1.0_dp
    3324             :       END IF
    3325             : 
    3326           0 :       IF (almo_scf_env%deloc_use_occ_orbs) THEN
    3327             :          algorithm_id = cayley_transform
    3328             :       ELSE
    3329           0 :          algorithm_id = dm_ls_step
    3330             :       END IF
    3331             : 
    3332           0 :       t1 = m_walltime()
    3333             : 
    3334           0 :       SELECT CASE (algorithm_id)
    3335             :       CASE (cayley_transform)
    3336             : 
    3337             :          ! rescale density matrix by spin factor
    3338             :          ! so the orbitals and density are consistent with each other
    3339           0 :          IF (almo_scf_env%nspins == 1) THEN
    3340           0 :             CALL dbcsr_scale(almo_scf_env%matrix_p(1), 1.0_dp/spin_factor)
    3341             :          END IF
    3342             : 
    3343             :          ! transform matrix_t not matrix_t_blk (we might need ALMOs later)
    3344           0 :          DO ispin = 1, nspin
    3345             : 
    3346             :             CALL dbcsr_copy(almo_scf_env%matrix_t(ispin), &
    3347           0 :                             almo_scf_env%matrix_t_blk(ispin))
    3348             : 
    3349             :             ! obtain orthogonalization matrices for ALMOs
    3350             :             ! RZK-warning - remove this sqrt(sigma) and inv(sqrt(sigma))
    3351             :             ! ideally ALMO scf should use sigma and sigma_inv in
    3352             :             ! the tensor_up_down representation
    3353             : 
    3354           0 :             IF (unit_nr > 0) THEN
    3355           0 :                WRITE (unit_nr, *) "sqrt and inv(sqrt) of MO overlap matrix"
    3356             :             END IF
    3357             :             CALL dbcsr_create(almo_scf_env%matrix_sigma_sqrt(ispin), &
    3358             :                               template=almo_scf_env%matrix_sigma(ispin), &
    3359           0 :                               matrix_type=dbcsr_type_no_symmetry)
    3360             :             CALL dbcsr_create(almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3361             :                               template=almo_scf_env%matrix_sigma(ispin), &
    3362           0 :                               matrix_type=dbcsr_type_no_symmetry)
    3363             : 
    3364             :             CALL matrix_sqrt_Newton_Schulz(almo_scf_env%matrix_sigma_sqrt(ispin), &
    3365             :                                            almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3366             :                                            almo_scf_env%matrix_sigma(ispin), &
    3367             :                                            threshold=almo_scf_env%eps_filter, &
    3368             :                                            order=almo_scf_env%order_lanczos, &
    3369             :                                            eps_lanczos=almo_scf_env%eps_lanczos, &
    3370           0 :                                            max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    3371             : 
    3372           0 :             IF (safe_mode) THEN
    3373             :                CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma(ispin), &
    3374             :                                  matrix_type=dbcsr_type_no_symmetry)
    3375             :                CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_sigma(ispin), &
    3376             :                                  matrix_type=dbcsr_type_no_symmetry)
    3377             : 
    3378             :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3379             :                                    almo_scf_env%matrix_sigma(ispin), &
    3380             :                                    0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3381             :                CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    3382             :                                    almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3383             :                                    0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    3384             : 
    3385             :                frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    3386             :                CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    3387             :                frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    3388             :                IF (unit_nr > 0) THEN
    3389             :                   WRITE (unit_nr, *) "Error for (inv(sqrt(SIG))*SIG*inv(sqrt(SIG))-I)", frob_matrix/frob_matrix_base
    3390             :                END IF
    3391             : 
    3392             :                CALL dbcsr_release(matrix_tmp1)
    3393             :                CALL dbcsr_release(matrix_tmp2)
    3394             :             END IF
    3395             :          END DO
    3396             : 
    3397           0 :          IF (almo_scf_env%almo_update_algorithm .EQ. almo_scf_diag) THEN
    3398             : 
    3399           0 :             DO ispin = 1, nspin
    3400             : 
    3401           0 :                t1a = m_walltime()
    3402             : 
    3403           0 :                line_search_error_threshold = almo_scf_env%real01
    3404           0 :                conjugacy_error_threshold = almo_scf_env%real02
    3405           0 :                quadratic_approx_error_threshold = almo_scf_env%real03
    3406           0 :                x_opt_eps_adaptive_factor = almo_scf_env%real04
    3407             : 
    3408             :                !! the outer loop for k optimization
    3409           0 :                outer_opt_k_max_iter = almo_scf_env%opt_k_outer_max_iter
    3410           0 :                outer_opt_k_prepare_to_exit = .FALSE.
    3411           0 :                outer_opt_k_iteration = 0
    3412           0 :                grad_norm = 0.0_dp
    3413           0 :                grad_norm_frob = 0.0_dp
    3414           0 :                CALL dbcsr_set(almo_scf_env%matrix_x(ispin), 0.0_dp)
    3415           0 :                IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) outer_opt_k_max_iter = 0
    3416             : 
    3417           0 :                DO
    3418             : 
    3419             :                   ! obtain proper retained virtuals (1-R)|ALMO_vr>
    3420             :                   CALL apply_projector(psi_in=almo_scf_env%matrix_v_blk(ispin), &
    3421             :                                        psi_out=almo_scf_env%matrix_v(ispin), &
    3422             :                                        psi_projector=almo_scf_env%matrix_t_blk(ispin), &
    3423             :                                        metric=almo_scf_env%matrix_s(1), &
    3424             :                                        project_out=.TRUE., &
    3425             :                                        psi_projector_orthogonal=.FALSE., &
    3426             :                                        proj_in_template=almo_scf_env%matrix_ov(ispin), &
    3427             :                                        eps_filter=almo_scf_env%eps_filter, &
    3428           0 :                                        sig_inv_projector=almo_scf_env%matrix_sigma_inv(ispin))
    3429             :                   !sig_inv_template=almo_scf_env%matrix_sigma_inv(ispin),&
    3430             : 
    3431             :                   ! save initial retained virtuals
    3432             :                   CALL dbcsr_create(vr_fixed, &
    3433           0 :                                     template=almo_scf_env%matrix_v(ispin))
    3434           0 :                   CALL dbcsr_copy(vr_fixed, almo_scf_env%matrix_v(ispin))
    3435             : 
    3436             :                   ! init matrices common for optimized and non-optimized virts
    3437             :                   CALL dbcsr_create(sigma_vv_sqrt, &
    3438             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3439           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3440             :                   CALL dbcsr_create(sigma_vv_sqrt_inv, &
    3441             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3442           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3443             :                   CALL dbcsr_create(sigma_vv_sqrt_inv_guess, &
    3444             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3445           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3446             :                   CALL dbcsr_create(sigma_vv_sqrt_guess, &
    3447             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3448           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3449           0 :                   CALL dbcsr_set(sigma_vv_sqrt_guess, 0.0_dp)
    3450           0 :                   CALL dbcsr_add_on_diag(sigma_vv_sqrt_guess, 1.0_dp)
    3451           0 :                   CALL dbcsr_filter(sigma_vv_sqrt_guess, almo_scf_env%eps_filter)
    3452           0 :                   CALL dbcsr_set(sigma_vv_sqrt_inv_guess, 0.0_dp)
    3453           0 :                   CALL dbcsr_add_on_diag(sigma_vv_sqrt_inv_guess, 1.0_dp)
    3454           0 :                   CALL dbcsr_filter(sigma_vv_sqrt_inv_guess, almo_scf_env%eps_filter)
    3455             : 
    3456             :                   ! do things required to optimize virtuals
    3457           0 :                   IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    3458             : 
    3459             :                      ! project retained virtuals out of discarded block-by-block
    3460             :                      ! (1-Q^VR_ALMO)|ALMO_vd>
    3461             :                      ! this is probably not necessary, do it just to be safe
    3462             :                      !CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin),&
    3463             :                      !        psi_out=almo_scf_env%matrix_v_disc(ispin),&
    3464             :                      !        psi_projector=almo_scf_env%matrix_v_blk(ispin),&
    3465             :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3466             :                      !        project_out=.TRUE.,&
    3467             :                      !        psi_projector_orthogonal=.FALSE.,&
    3468             :                      !        proj_in_template=almo_scf_env%matrix_k_tr(ispin),&
    3469             :                      !        eps_filter=almo_scf_env%eps_filter,&
    3470             :                      !        sig_inv_template=almo_scf_env%matrix_sigma_vv(ispin))
    3471             :                      !CALL dbcsr_copy(almo_scf_env%matrix_v_disc_blk(ispin),&
    3472             :                      !        almo_scf_env%matrix_v_disc(ispin))
    3473             : 
    3474             :                      ! construct discarded virtuals (1-R)|ALMO_vd>
    3475             :                      CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin), &
    3476             :                                           psi_out=almo_scf_env%matrix_v_disc(ispin), &
    3477             :                                           psi_projector=almo_scf_env%matrix_t_blk(ispin), &
    3478             :                                           metric=almo_scf_env%matrix_s(1), &
    3479             :                                           project_out=.TRUE., &
    3480             :                                           psi_projector_orthogonal=.FALSE., &
    3481             :                                           proj_in_template=almo_scf_env%matrix_ov_disc(ispin), &
    3482             :                                           eps_filter=almo_scf_env%eps_filter, &
    3483           0 :                                           sig_inv_projector=almo_scf_env%matrix_sigma_inv(ispin))
    3484             :                      !sig_inv_template=almo_scf_env%matrix_sigma_inv(ispin),&
    3485             : 
    3486             :                      ! save initial discarded
    3487             :                      CALL dbcsr_create(vd_fixed, &
    3488           0 :                                        template=almo_scf_env%matrix_v_disc(ispin))
    3489           0 :                      CALL dbcsr_copy(vd_fixed, almo_scf_env%matrix_v_disc(ispin))
    3490             : 
    3491             :                      !! create the down metric in the retained k-subspace
    3492             :                      CALL dbcsr_create(k_vr_index_down, &
    3493             :                                        template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    3494           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3495             :                      !CALL dbcsr_copy(k_vr_index_down,&
    3496             :                      !        almo_scf_env%matrix_sigma_vv_blk(ispin))
    3497             : 
    3498             :                      !CALL get_overlap(bra=almo_scf_env%matrix_v_blk(ispin),&
    3499             :                      !        ket=almo_scf_env%matrix_v_blk(ispin),&
    3500             :                      !        overlap=k_vr_index_down,&
    3501             :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3502             :                      !        retain_overlap_sparsity=.FALSE.,&
    3503             :                      !        eps_filter=almo_scf_env%eps_filter)
    3504             : 
    3505             :                      !! create the up metric in the discarded k-subspace
    3506             :                      CALL dbcsr_create(k_vd_index_down, &
    3507             :                                        template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    3508           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3509             :                      !CALL dbcsr_init(k_vd_index_up)
    3510             :                      !CALL dbcsr_create(k_vd_index_up,&
    3511             :                      !        template=almo_scf_env%matrix_vv_disc_blk(ispin),&
    3512             :                      !        matrix_type=dbcsr_type_no_symmetry)
    3513             :                      !CALL dbcsr_copy(k_vd_index_down,&
    3514             :                      !        almo_scf_env%matrix_vv_disc_blk(ispin))
    3515             : 
    3516             :                      !CALL get_overlap(bra=almo_scf_env%matrix_v_disc_blk(ispin),&
    3517             :                      !        ket=almo_scf_env%matrix_v_disc_blk(ispin),&
    3518             :                      !        overlap=k_vd_index_down,&
    3519             :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3520             :                      !        retain_overlap_sparsity=.FALSE.,&
    3521             :                      !        eps_filter=almo_scf_env%eps_filter)
    3522             : 
    3523             :                      !IF (unit_nr>0) THEN
    3524             :                      !   WRITE(unit_nr,*) "Inverting blocked overlap matrix of discarded virtuals"
    3525             :                      !ENDIF
    3526             :                      !CALL invert_Hotelling(k_vd_index_up,&
    3527             :                      !        k_vd_index_down,&
    3528             :                      !        almo_scf_env%eps_filter)
    3529             :                      !IF (safe_mode) THEN
    3530             :                      !   CALL dbcsr_init(matrix_tmp1)
    3531             :                      !   CALL dbcsr_create(matrix_tmp1,template=k_vd_index_down,&
    3532             :                      !                        matrix_type=dbcsr_type_no_symmetry)
    3533             :                      !   CALL dbcsr_multiply("N","N",1.0_dp,k_vd_index_up,&
    3534             :                      !                          k_vd_index_down,&
    3535             :                      !                          0.0_dp, matrix_tmp1,&
    3536             :                      !                          filter_eps=almo_scf_env%eps_filter)
    3537             :                      !   frob_matrix_base=dbcsr_frobenius_norm(matrix_tmp1)
    3538             :                      !   CALL dbcsr_add_on_diag(matrix_tmp1,-1.0_dp)
    3539             :                      !   frob_matrix=dbcsr_frobenius_norm(matrix_tmp1)
    3540             :                      !   IF (unit_nr>0) THEN
    3541             :                      !      WRITE(unit_nr,*) "Error for (inv(SIG)*SIG-I)",&
    3542             :                      !            frob_matrix/frob_matrix_base
    3543             :                      !   ENDIF
    3544             :                      !   CALL dbcsr_release(matrix_tmp1)
    3545             :                      !ENDIF
    3546             : 
    3547             :                      ! init matrices necessary for optimization of truncated virts
    3548             :                      ! init blocked gradient before setting K to zero
    3549             :                      ! otherwise the block structure might be lost
    3550             :                      CALL dbcsr_create(grad, &
    3551           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3552           0 :                      CALL dbcsr_copy(grad, almo_scf_env%matrix_k_blk(ispin))
    3553             : 
    3554             :                      ! init MD in the k-space
    3555           0 :                      md_in_k_space = almo_scf_env%logical01
    3556           0 :                      IF (md_in_k_space) THEN
    3557             :                         CALL dbcsr_create(velocity, &
    3558           0 :                                           template=almo_scf_env%matrix_k_blk(ispin))
    3559           0 :                         CALL dbcsr_copy(velocity, almo_scf_env%matrix_k_blk(ispin))
    3560           0 :                         CALL dbcsr_set(velocity, 0.0_dp)
    3561           0 :                         time_step = almo_scf_env%opt_k_trial_step_size
    3562             :                      END IF
    3563             : 
    3564             :                      CALL dbcsr_create(prev_step, &
    3565           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3566             : 
    3567             :                      CALL dbcsr_create(prev_minus_prec_grad, &
    3568           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3569             : 
    3570             :                      ! initialize diagonal blocks of the preconditioner to 1.0_dp
    3571             :                      CALL dbcsr_create(prec, &
    3572           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3573           0 :                      CALL dbcsr_copy(prec, almo_scf_env%matrix_k_blk(ispin))
    3574           0 :                      CALL dbcsr_set(prec, 1.0_dp)
    3575             : 
    3576             :                      ! generate initial K (extrapolate if previous values are available)
    3577           0 :                      CALL dbcsr_set(almo_scf_env%matrix_k_blk(ispin), 0.0_dp)
    3578             :                      ! matrix_k_central stores current k because matrix_k_blk is updated
    3579             :                      ! during linear search
    3580             :                      CALL dbcsr_create(matrix_k_central, &
    3581           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3582             :                      CALL dbcsr_copy(matrix_k_central, &
    3583           0 :                                      almo_scf_env%matrix_k_blk(ispin))
    3584             :                      CALL dbcsr_create(tmp_k_blk, &
    3585           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3586             :                      CALL dbcsr_create(step, &
    3587           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3588           0 :                      CALL dbcsr_set(step, 0.0_dp)
    3589             :                      CALL dbcsr_create(t_curr, &
    3590           0 :                                        template=almo_scf_env%matrix_t(ispin))
    3591             :                      CALL dbcsr_create(sigma_oo_curr, &
    3592             :                                        template=almo_scf_env%matrix_sigma(ispin), &
    3593           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3594             :                      CALL dbcsr_create(sigma_oo_curr_inv, &
    3595             :                                        template=almo_scf_env%matrix_sigma(ispin), &
    3596           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3597             :                      CALL dbcsr_create(tmp1_n_vr, &
    3598           0 :                                        template=almo_scf_env%matrix_v(ispin))
    3599             :                      CALL dbcsr_create(tmp3_vd_vr, &
    3600           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3601             :                      CALL dbcsr_create(tmp2_n_o, &
    3602           0 :                                        template=almo_scf_env%matrix_t(ispin))
    3603             :                      CALL dbcsr_create(tmp4_o_vr, &
    3604           0 :                                        template=almo_scf_env%matrix_ov(ispin))
    3605             :                      CALL dbcsr_create(prev_grad, &
    3606           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3607           0 :                      CALL dbcsr_set(prev_grad, 0.0_dp)
    3608             : 
    3609             :                      !CALL dbcsr_init(sigma_oo_guess)
    3610             :                      !CALL dbcsr_create(sigma_oo_guess,&
    3611             :                      !        template=almo_scf_env%matrix_sigma(ispin),&
    3612             :                      !        matrix_type=dbcsr_type_no_symmetry)
    3613             :                      !CALL dbcsr_set(sigma_oo_guess,0.0_dp)
    3614             :                      !CALL dbcsr_add_on_diag(sigma_oo_guess,1.0_dp)
    3615             :                      !CALL dbcsr_filter(sigma_oo_guess,almo_scf_env%eps_filter)
    3616             :                      !CALL dbcsr_print(sigma_oo_guess)
    3617             : 
    3618             :                   END IF ! done constructing discarded virtuals
    3619             : 
    3620             :                   ! init variables
    3621           0 :                   opt_k_max_iter = almo_scf_env%opt_k_max_iter
    3622           0 :                   iteration = 0
    3623           0 :                   converged = .FALSE.
    3624           0 :                   prepare_to_exit = .FALSE.
    3625           0 :                   beta = 0.0_dp
    3626           0 :                   line_search = .FALSE.
    3627           0 :                   obj_function = 0.0_dp
    3628           0 :                   conjugacy_error = 0.0_dp
    3629           0 :                   line_search_error = 0.0_dp
    3630           0 :                   fun0 = 0.0_dp
    3631           0 :                   fun1 = 0.0_dp
    3632           0 :                   gfun0 = 0.0_dp
    3633           0 :                   gfun1 = 0.0_dp
    3634           0 :                   step_size_quadratic_approx = 0.0_dp
    3635           0 :                   reset_step_size = .TRUE.
    3636           0 :                   IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) opt_k_max_iter = 0
    3637             : 
    3638             :                   ! start cg iterations to optimize matrix_k_blk
    3639           0 :                   DO
    3640             : 
    3641           0 :                      CALL timeset('k_opt_vr', handle1)
    3642             : 
    3643           0 :                      IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    3644             : 
    3645             :                         ! construct k-excited virtuals
    3646             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vd_fixed, &
    3647             :                                             almo_scf_env%matrix_k_blk(ispin), &
    3648             :                                             0.0_dp, almo_scf_env%matrix_v(ispin), &
    3649           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3650             :                         CALL dbcsr_add(almo_scf_env%matrix_v(ispin), vr_fixed, &
    3651           0 :                                        +1.0_dp, +1.0_dp)
    3652             :                      END IF
    3653             : 
    3654             :                      ! decompose the overlap matrix of the current retained orbitals
    3655             :                      !IF (unit_nr>0) THEN
    3656             :                      !   WRITE(unit_nr,*) "decompose the active VV overlap matrix"
    3657             :                      !ENDIF
    3658             :                      CALL get_overlap(bra=almo_scf_env%matrix_v(ispin), &
    3659             :                                       ket=almo_scf_env%matrix_v(ispin), &
    3660             :                                       overlap=almo_scf_env%matrix_sigma_vv(ispin), &
    3661             :                                       metric=almo_scf_env%matrix_s(1), &
    3662             :                                       retain_overlap_sparsity=.FALSE., &
    3663           0 :                                       eps_filter=almo_scf_env%eps_filter)
    3664             :                      ! use either cholesky or sqrt
    3665             :                      !! RZK-warning: strangely, cholesky does not work with k-optimization
    3666           0 :                      IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) THEN
    3667           0 :                         CALL timeset('cholesky', handle2)
    3668           0 :                         t1cholesky = m_walltime()
    3669             : 
    3670             :                         ! re-create sigma_vv_sqrt because desymmetrize is buggy -
    3671             :                         ! it will create multiple copies of blocks
    3672             :                         CALL dbcsr_create(sigma_vv_sqrt, &
    3673             :                                           template=almo_scf_env%matrix_sigma_vv(ispin), &
    3674           0 :                                           matrix_type=dbcsr_type_no_symmetry)
    3675             :                         CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma_vv(ispin), &
    3676           0 :                                                 sigma_vv_sqrt)
    3677             :                         CALL cp_dbcsr_cholesky_decompose(sigma_vv_sqrt, &
    3678             :                                                          para_env=almo_scf_env%para_env, &
    3679           0 :                                                          blacs_env=almo_scf_env%blacs_env)
    3680           0 :                         CALL make_triu(sigma_vv_sqrt)
    3681           0 :                         CALL dbcsr_filter(sigma_vv_sqrt, almo_scf_env%eps_filter)
    3682             :                         ! apply SOLVE to compute U^(-1) : U*U^(-1)=I
    3683           0 :                         CALL dbcsr_get_info(sigma_vv_sqrt, nfullrows_total=n)
    3684             :                         CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3685           0 :                                           matrix_type=dbcsr_type_no_symmetry)
    3686           0 :                         CALL dbcsr_set(matrix_tmp1, 0.0_dp)
    3687           0 :                         CALL dbcsr_add_on_diag(matrix_tmp1, 1.0_dp)
    3688             :                         CALL cp_dbcsr_cholesky_restore(matrix_tmp1, n, sigma_vv_sqrt, &
    3689             :                                                        sigma_vv_sqrt_inv, op="SOLVE", pos="RIGHT", &
    3690             :                                                        para_env=almo_scf_env%para_env, &
    3691           0 :                                                        blacs_env=almo_scf_env%blacs_env)
    3692           0 :                         CALL dbcsr_filter(sigma_vv_sqrt_inv, almo_scf_env%eps_filter)
    3693           0 :                         CALL dbcsr_release(matrix_tmp1)
    3694             :                         IF (safe_mode) THEN
    3695             :                            CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3696             :                                              matrix_type=dbcsr_type_no_symmetry)
    3697             :                            CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma_vv(ispin), &
    3698             :                                                    matrix_tmp1)
    3699             :                            CALL dbcsr_multiply("T", "N", 1.0_dp, sigma_vv_sqrt, &
    3700             :                                                sigma_vv_sqrt, &
    3701             :                                                -1.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3702             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3703             :                            CALL dbcsr_add_on_diag(matrix_tmp1, 1.0_dp)
    3704             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3705             :                            IF (unit_nr > 0) THEN
    3706             :                               WRITE (unit_nr, *) "Error for ( U^T * U - Sig )", &
    3707             :                                  frob_matrix/frob_matrix_base
    3708             :                            END IF
    3709             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_vv_sqrt_inv, &
    3710             :                                                sigma_vv_sqrt, &
    3711             :                                                0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3712             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3713             :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3714             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3715             :                            IF (unit_nr > 0) THEN
    3716             :                               WRITE (unit_nr, *) "Error for ( inv(U) * U - I )", &
    3717             :                                  frob_matrix/frob_matrix_base
    3718             :                            END IF
    3719             :                            CALL dbcsr_release(matrix_tmp1)
    3720             :                         END IF ! safe_mode
    3721           0 :                         t2cholesky = m_walltime()
    3722           0 :                         IF (unit_nr > 0) THEN
    3723           0 :                            WRITE (unit_nr, *) "Cholesky+inverse wall-time: ", t2cholesky - t1cholesky
    3724             :                         END IF
    3725           0 :                         CALL timestop(handle2)
    3726             :                      ELSE
    3727             :                         CALL matrix_sqrt_Newton_Schulz(sigma_vv_sqrt, &
    3728             :                                                        sigma_vv_sqrt_inv, &
    3729             :                                                        almo_scf_env%matrix_sigma_vv(ispin), &
    3730             :                                                        !matrix_sqrt_inv_guess=sigma_vv_sqrt_inv_guess,&
    3731             :                                                        !matrix_sqrt_guess=sigma_vv_sqrt_guess,&
    3732             :                                                        threshold=almo_scf_env%eps_filter, &
    3733             :                                                        order=almo_scf_env%order_lanczos, &
    3734             :                                                        eps_lanczos=almo_scf_env%eps_lanczos, &
    3735           0 :                                                        max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    3736           0 :                         CALL dbcsr_copy(sigma_vv_sqrt_inv_guess, sigma_vv_sqrt_inv)
    3737           0 :                         CALL dbcsr_copy(sigma_vv_sqrt_guess, sigma_vv_sqrt)
    3738             :                         IF (safe_mode) THEN
    3739             :                            CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3740             :                                              matrix_type=dbcsr_type_no_symmetry)
    3741             :                            CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3742             :                                              matrix_type=dbcsr_type_no_symmetry)
    3743             : 
    3744             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_vv_sqrt_inv, &
    3745             :                                                almo_scf_env%matrix_sigma_vv(ispin), &
    3746             :                                                0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3747             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    3748             :                                                sigma_vv_sqrt_inv, &
    3749             :                                                0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    3750             : 
    3751             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    3752             :                            CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    3753             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    3754             :                            IF (unit_nr > 0) THEN
    3755             :                               WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    3756             :                                  frob_matrix/frob_matrix_base
    3757             :                            END IF
    3758             : 
    3759             :                            CALL dbcsr_release(matrix_tmp1)
    3760             :                            CALL dbcsr_release(matrix_tmp2)
    3761             :                         END IF
    3762             :                      END IF
    3763           0 :                      CALL timestop(handle1)
    3764             : 
    3765             :                      ! compute excitation amplitudes (to the current set of retained virtuals)
    3766             :                      ! set convergence criterion for x-optimization
    3767           0 :                      IF ((iteration .EQ. 0) .AND. (.NOT. line_search) .AND. &
    3768             :                          (outer_opt_k_iteration .EQ. 0)) THEN
    3769             :                         x_opt_eps_adaptive = &
    3770           0 :                            almo_scf_env%deloc_cayley_eps_convergence
    3771             :                      ELSE
    3772             :                         x_opt_eps_adaptive = &
    3773             :                            MAX(ABS(almo_scf_env%deloc_cayley_eps_convergence), &
    3774           0 :                                ABS(x_opt_eps_adaptive_factor*grad_norm))
    3775             :                      END IF
    3776           0 :                      CALL ct_step_env_init(ct_step_env)
    3777             :                      CALL ct_step_env_set(ct_step_env, &
    3778             :                                           para_env=almo_scf_env%para_env, &
    3779             :                                           blacs_env=almo_scf_env%blacs_env, &
    3780             :                                           use_occ_orbs=.TRUE., &
    3781             :                                           use_virt_orbs=.TRUE., &
    3782             :                                           occ_orbs_orthogonal=.FALSE., &
    3783             :                                           virt_orbs_orthogonal=.FALSE., &
    3784             :                                           pp_preconditioner_full=almo_scf_env%deloc_cayley_occ_precond, &
    3785             :                                           qq_preconditioner_full=almo_scf_env%deloc_cayley_vir_precond, &
    3786             :                                           tensor_type=almo_scf_env%deloc_cayley_tensor_type, &
    3787             :                                           neglect_quadratic_term=almo_scf_env%deloc_cayley_linear, &
    3788             :                                           conjugator=almo_scf_env%deloc_cayley_conjugator, &
    3789             :                                           max_iter=almo_scf_env%deloc_cayley_max_iter, &
    3790             :                                           calculate_energy_corr=.TRUE., &
    3791             :                                           update_p=.FALSE., &
    3792             :                                           update_q=.FALSE., &
    3793             :                                           eps_convergence=x_opt_eps_adaptive, &
    3794             :                                           eps_filter=almo_scf_env%eps_filter, &
    3795             :                                           !nspins=1,&
    3796             :                                           q_index_up=sigma_vv_sqrt_inv, &
    3797             :                                           q_index_down=sigma_vv_sqrt, &
    3798             :                                           p_index_up=almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3799             :                                           p_index_down=almo_scf_env%matrix_sigma_sqrt(ispin), &
    3800             :                                           matrix_ks=almo_scf_env%matrix_ks_0deloc(ispin), &
    3801             :                                           matrix_t=almo_scf_env%matrix_t(ispin), &
    3802             :                                           matrix_qp_template=almo_scf_env%matrix_vo(ispin), &
    3803             :                                           matrix_pq_template=almo_scf_env%matrix_ov(ispin), &
    3804             :                                           matrix_v=almo_scf_env%matrix_v(ispin), &
    3805           0 :                                           matrix_x_guess=almo_scf_env%matrix_x(ispin))
    3806             :                      ! perform calculations
    3807           0 :                      CALL ct_step_execute(ct_step_env)
    3808             :                      ! get the energy correction
    3809             :                      CALL ct_step_env_get(ct_step_env, &
    3810             :                                           energy_correction=energy_correction(ispin), &
    3811           0 :                                           copy_matrix_x=almo_scf_env%matrix_x(ispin))
    3812           0 :                      CALL ct_step_env_clean(ct_step_env)
    3813             :                      ! RZK-warning matrix_x is being transformed
    3814             :                      ! back and forth between orth and up_down representations
    3815           0 :                      energy_correction(1) = energy_correction(1)*spin_factor
    3816             : 
    3817           0 :                      IF (opt_k_max_iter .NE. 0) THEN
    3818             : 
    3819           0 :                         CALL timeset('k_opt_t_curr', handle3)
    3820             : 
    3821             :                         ! construct current occupied orbitals T_blk + V_r*X
    3822             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3823             :                                             almo_scf_env%matrix_v(ispin), &
    3824             :                                             almo_scf_env%matrix_x(ispin), &
    3825             :                                             0.0_dp, t_curr, &
    3826           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3827             :                         CALL dbcsr_add(t_curr, almo_scf_env%matrix_t_blk(ispin), &
    3828           0 :                                        +1.0_dp, +1.0_dp)
    3829             : 
    3830             :                         ! calculate current occupied overlap
    3831             :                         !IF (unit_nr>0) THEN
    3832             :                         !   WRITE(unit_nr,*) "Inverting current occ overlap matrix"
    3833             :                         !ENDIF
    3834             :                         CALL get_overlap(bra=t_curr, &
    3835             :                                          ket=t_curr, &
    3836             :                                          overlap=sigma_oo_curr, &
    3837             :                                          metric=almo_scf_env%matrix_s(1), &
    3838             :                                          retain_overlap_sparsity=.FALSE., &
    3839           0 :                                          eps_filter=almo_scf_env%eps_filter)
    3840           0 :                         IF (iteration .EQ. 0) THEN
    3841             :                            CALL invert_Hotelling(sigma_oo_curr_inv, &
    3842             :                                                  sigma_oo_curr, &
    3843             :                                                  threshold=almo_scf_env%eps_filter, &
    3844           0 :                                                  use_inv_as_guess=.FALSE.)
    3845             :                         ELSE
    3846             :                            CALL invert_Hotelling(sigma_oo_curr_inv, &
    3847             :                                                  sigma_oo_curr, &
    3848             :                                                  threshold=almo_scf_env%eps_filter, &
    3849           0 :                                                  use_inv_as_guess=.TRUE.)
    3850             :                            !CALL dbcsr_copy(sigma_oo_guess,sigma_oo_curr_inv)
    3851             :                         END IF
    3852             :                         IF (safe_mode) THEN
    3853             :                            CALL dbcsr_create(matrix_tmp1, template=sigma_oo_curr, &
    3854             :                                              matrix_type=dbcsr_type_no_symmetry)
    3855             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_oo_curr, &
    3856             :                                                sigma_oo_curr_inv, &
    3857             :                                                0.0_dp, matrix_tmp1, &
    3858             :                                                filter_eps=almo_scf_env%eps_filter)
    3859             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3860             :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3861             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3862             :                            !CALL dbcsr_filter(matrix_tmp1,almo_scf_env%eps_filter)
    3863             :                            !CALL dbcsr_print(matrix_tmp1)
    3864             :                            IF (unit_nr > 0) THEN
    3865             :                               WRITE (unit_nr, *) "Error for (SIG*inv(SIG)-I)", &
    3866             :                                  frob_matrix/frob_matrix_base, frob_matrix_base
    3867             :                            END IF
    3868             :                            CALL dbcsr_release(matrix_tmp1)
    3869             :                         END IF
    3870             :                         IF (safe_mode) THEN
    3871             :                            CALL dbcsr_create(matrix_tmp1, template=sigma_oo_curr, &
    3872             :                                              matrix_type=dbcsr_type_no_symmetry)
    3873             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_oo_curr_inv, &
    3874             :                                                sigma_oo_curr, &
    3875             :                                                0.0_dp, matrix_tmp1, &
    3876             :                                                filter_eps=almo_scf_env%eps_filter)
    3877             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3878             :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3879             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3880             :                            !CALL dbcsr_filter(matrix_tmp1,almo_scf_env%eps_filter)
    3881             :                            !CALL dbcsr_print(matrix_tmp1)
    3882             :                            IF (unit_nr > 0) THEN
    3883             :                               WRITE (unit_nr, *) "Error for (inv(SIG)*SIG-I)", &
    3884             :                                  frob_matrix/frob_matrix_base, frob_matrix_base
    3885             :                            END IF
    3886             :                            CALL dbcsr_release(matrix_tmp1)
    3887             :                         END IF
    3888             : 
    3889           0 :                         CALL timestop(handle3)
    3890           0 :                         CALL timeset('k_opt_vd', handle4)
    3891             : 
    3892             :                         ! construct current discarded virtuals:
    3893             :                         ! (1-R_curr)(1-Q^VR_curr)|ALMO_vd_basis> =
    3894             :                         ! = (1-Q^VR_curr)|ALMO_vd_basis>
    3895             :                         ! use sigma_vv_sqrt to store the inverse of the overlap
    3896             :                         ! sigma_vv_inv is computed from sqrt/cholesky
    3897             :                         CALL dbcsr_multiply("N", "T", 1.0_dp, &
    3898             :                                             sigma_vv_sqrt_inv, &
    3899             :                                             sigma_vv_sqrt_inv, &
    3900             :                                             0.0_dp, sigma_vv_sqrt, &
    3901           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3902             :                         CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin), &
    3903             :                                              psi_out=almo_scf_env%matrix_v_disc(ispin), &
    3904             :                                              psi_projector=almo_scf_env%matrix_v(ispin), &
    3905             :                                              metric=almo_scf_env%matrix_s(1), &
    3906             :                                              project_out=.FALSE., &
    3907             :                                              psi_projector_orthogonal=.FALSE., &
    3908             :                                              proj_in_template=almo_scf_env%matrix_k_tr(ispin), &
    3909             :                                              eps_filter=almo_scf_env%eps_filter, &
    3910           0 :                                              sig_inv_projector=sigma_vv_sqrt)
    3911             :                         !sig_inv_template=almo_scf_env%matrix_sigma_vv(ispin),&
    3912             :                         CALL dbcsr_add(almo_scf_env%matrix_v_disc(ispin), &
    3913           0 :                                        vd_fixed, -1.0_dp, +1.0_dp)
    3914             : 
    3915           0 :                         CALL timestop(handle4)
    3916           0 :                         CALL timeset('k_opt_grad', handle5)
    3917             : 
    3918             :                         ! evaluate the gradient from the assembled components
    3919             :                         ! grad_xx = c0 [ (Vd_curr^tr)*F*T_curr*sigma_oo_curr_inv*(X^tr)]_xx
    3920             :                         ! save previous gradient to calculate conjugation coef
    3921           0 :                         IF (line_search) THEN
    3922           0 :                            CALL dbcsr_copy(prev_grad, grad)
    3923             :                         END IF
    3924             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3925             :                                             almo_scf_env%matrix_ks_0deloc(ispin), &
    3926             :                                             t_curr, &
    3927             :                                             0.0_dp, tmp2_n_o, &
    3928           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3929             :                         CALL dbcsr_multiply("N", "T", 1.0_dp, &
    3930             :                                             sigma_oo_curr_inv, &
    3931             :                                             almo_scf_env%matrix_x(ispin), &
    3932             :                                             0.0_dp, tmp4_o_vr, &
    3933           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3934             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3935             :                                             tmp2_n_o, &
    3936             :                                             tmp4_o_vr, &
    3937             :                                             0.0_dp, tmp1_n_vr, &
    3938           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3939             :                         CALL dbcsr_multiply("T", "N", 2.0_dp*spin_factor, &
    3940             :                                             almo_scf_env%matrix_v_disc(ispin), &
    3941             :                                             tmp1_n_vr, &
    3942             :                                             0.0_dp, grad, &
    3943           0 :                                             retain_sparsity=.TRUE.)
    3944             :                         !filter_eps=almo_scf_env%eps_filter,&
    3945             :                         ! keep tmp2_n_o for the next step
    3946             :                         ! keep tmp4_o_vr for the preconditioner
    3947             : 
    3948             :                         ! check convergence and other exit criteria
    3949           0 :                         grad_norm_frob = dbcsr_frobenius_norm(grad)
    3950           0 :                         grad_norm = dbcsr_maxabs(grad)
    3951           0 :                         converged = (grad_norm .LT. almo_scf_env%opt_k_eps_convergence)
    3952           0 :                         IF (converged .OR. (iteration .GE. opt_k_max_iter)) THEN
    3953           0 :                            prepare_to_exit = .TRUE.
    3954             :                         END IF
    3955           0 :                         CALL timestop(handle5)
    3956             : 
    3957           0 :                         IF (.NOT. prepare_to_exit) THEN
    3958             : 
    3959           0 :                            CALL timeset('k_opt_energy', handle6)
    3960             : 
    3961             :                            ! compute "energy" c0*Tr[sig_inv_oo*t*F*t]
    3962             :                            CALL dbcsr_multiply("T", "N", spin_factor, &
    3963             :                                                t_curr, &
    3964             :                                                tmp2_n_o, &
    3965             :                                                0.0_dp, sigma_oo_curr, &
    3966           0 :                                                filter_eps=almo_scf_env%eps_filter)
    3967             :                            delta_obj_function = fun0
    3968           0 :                            CALL dbcsr_dot(sigma_oo_curr_inv, sigma_oo_curr, obj_function)
    3969           0 :                            delta_obj_function = obj_function - delta_obj_function
    3970           0 :                            IF (line_search) THEN
    3971             :                               fun1 = obj_function
    3972             :                            ELSE
    3973           0 :                               fun0 = obj_function
    3974             :                            END IF
    3975             : 
    3976           0 :                            CALL timestop(handle6)
    3977             : 
    3978             :                            ! update the step direction
    3979           0 :                            IF (.NOT. line_search) THEN
    3980             : 
    3981           0 :                               CALL timeset('k_opt_step', handle7)
    3982             : 
    3983           0 :                               IF ((.NOT. md_in_k_space) .AND. &
    3984             :                                   (iteration .GE. MAX(0, almo_scf_env%opt_k_prec_iter_start) .AND. &
    3985             :                                    MOD(iteration - almo_scf_env%opt_k_prec_iter_start, &
    3986             :                                        almo_scf_env%opt_k_prec_iter_freq) .EQ. 0)) THEN
    3987             : 
    3988             :                                  !IF ((iteration.eq.0).AND.(.NOT.md_in_k_space)) THEN
    3989             : 
    3990             :                                  ! compute the preconditioner
    3991           0 :                                  IF (unit_nr > 0) THEN
    3992           0 :                                     WRITE (unit_nr, *) "Computing preconditioner"
    3993             :                                  END IF
    3994             :                                  !CALL opt_k_create_preconditioner(prec,&
    3995             :                                  !        almo_scf_env%matrix_v_disc(ispin),&
    3996             :                                  !        almo_scf_env%matrix_ks_0deloc(ispin),&
    3997             :                                  !        almo_scf_env%matrix_x(ispin),&
    3998             :                                  !        tmp4_o_vr,&
    3999             :                                  !        almo_scf_env%matrix_s(1),&
    4000             :                                  !        grad,&
    4001             :                                  !        !almo_scf_env%matrix_v_disc_blk(ispin),&
    4002             :                                  !        vd_fixed,&
    4003             :                                  !        t_curr,&
    4004             :                                  !        k_vd_index_up,&
    4005             :                                  !        k_vr_index_down,&
    4006             :                                  !        tmp1_n_vr,&
    4007             :                                  !        spin_factor,&
    4008             :                                  !        almo_scf_env%eps_filter)
    4009             :                                  CALL opt_k_create_preconditioner_blk(almo_scf_env, &
    4010             :                                                                       almo_scf_env%matrix_v_disc(ispin), &
    4011             :                                                                       tmp4_o_vr, &
    4012             :                                                                       t_curr, &
    4013             :                                                                       ispin, &
    4014           0 :                                                                       spin_factor)
    4015             : 
    4016             :                               END IF
    4017             : 
    4018             :                               ! save the previous step
    4019           0 :                               CALL dbcsr_copy(prev_step, step)
    4020             : 
    4021             :                               ! compute the new step
    4022             :                               CALL opt_k_apply_preconditioner_blk(almo_scf_env, &
    4023           0 :                                                                   step, grad, ispin)
    4024             :                               !CALL dbcsr_hadamard_product(prec,grad,step)
    4025           0 :                               CALL dbcsr_scale(step, -1.0_dp)
    4026             : 
    4027             :                               ! check whether we need to reset conjugate directions
    4028           0 :                               reset_conjugator = .FALSE.
    4029             :                               ! first check if manual reset is active
    4030           0 :                               IF (iteration .LT. MAX(almo_scf_env%opt_k_conj_iter_start, 1) .OR. &
    4031             :                                   MOD(iteration - almo_scf_env%opt_k_conj_iter_start, &
    4032             :                                       almo_scf_env%opt_k_conj_iter_freq) .EQ. 0) THEN
    4033             : 
    4034             :                                  reset_conjugator = .TRUE.
    4035             : 
    4036             :                               ELSE
    4037             : 
    4038             :                                  ! check for the errors in the cg algorithm
    4039             :                                  !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4040             :                                  !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4041             :                                  !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4042           0 :                                  CALL dbcsr_dot(grad, prev_minus_prec_grad, numer)
    4043           0 :                                  CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4044           0 :                                  conjugacy_error = numer/denom
    4045             : 
    4046           0 :                                  IF (conjugacy_error .GT. MIN(0.5_dp, conjugacy_error_threshold)) THEN
    4047           0 :                                     reset_conjugator = .TRUE.
    4048           0 :                                     IF (unit_nr > 0) THEN
    4049           0 :                                        WRITE (unit_nr, *) "Lack of progress, conjugacy error is ", conjugacy_error
    4050             :                                     END IF
    4051             :                                  END IF
    4052             : 
    4053             :                                  ! check the gradient along the previous direction
    4054           0 :                                  IF ((iteration .NE. 0) .AND. (.NOT. reset_conjugator)) THEN
    4055           0 :                                     CALL dbcsr_dot(grad, prev_step, numer)
    4056           0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4057           0 :                                     line_search_error = numer/denom
    4058           0 :                                     IF (line_search_error .GT. line_search_error_threshold) THEN
    4059           0 :                                        reset_conjugator = .TRUE.
    4060           0 :                                        IF (unit_nr > 0) THEN
    4061           0 :                                           WRITE (unit_nr, *) "Bad line search, line search error is ", line_search_error
    4062             :                                        END IF
    4063             :                                     END IF
    4064             :                                  END IF
    4065             : 
    4066             :                               END IF
    4067             : 
    4068             :                               ! compute the conjugation coefficient - beta
    4069           0 :                               IF (.NOT. reset_conjugator) THEN
    4070             : 
    4071           0 :                                  SELECT CASE (almo_scf_env%opt_k_conjugator)
    4072             :                                  CASE (cg_hestenes_stiefel)
    4073           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4074           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4075           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4076           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4077           0 :                                     beta = -1.0_dp*numer/denom
    4078             :                                  CASE (cg_fletcher_reeves)
    4079             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4080             :                                     !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4081             :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4082             :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4083             :                                     !beta=numer/denom
    4084           0 :                                     CALL dbcsr_dot(grad, step, numer)
    4085           0 :                                     CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4086           0 :                                     beta = numer/denom
    4087             :                                  CASE (cg_polak_ribiere)
    4088             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4089             :                                     !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4090             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4091             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4092             :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4093           0 :                                     CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4094           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4095           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4096           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4097           0 :                                     beta = numer/denom
    4098             :                                  CASE (cg_fletcher)
    4099             :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4100             :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4101             :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4102             :                                     !beta=-1.0_dp*numer/denom
    4103           0 :                                     CALL dbcsr_dot(grad, step, numer)
    4104           0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4105           0 :                                     beta = numer/denom
    4106             :                                  CASE (cg_liu_storey)
    4107           0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4108             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4109             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4110             :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4111           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4112           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4113           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4114           0 :                                     beta = numer/denom
    4115             :                                  CASE (cg_dai_yuan)
    4116             :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4117             :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4118             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4119             :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4120             :                                     !beta=numer/denom
    4121           0 :                                     CALL dbcsr_dot(grad, step, numer)
    4122           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4123           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4124           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4125           0 :                                     beta = -1.0_dp*numer/denom
    4126             :                                  CASE (cg_hager_zhang)
    4127             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4128             :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4129             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4130             :                                     !CALL dbcsr_dot(tmp_k_blk,prev_grad,numer)
    4131             :                                     !kappa=2.0_dp*numer/denom
    4132             :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4133             :                                     !tau=numer/denom
    4134             :                                     !CALL dbcsr_dot(prev_step,grad,numer)
    4135             :                                     !beta=tau-kappa*numer/denom
    4136           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4137           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4138           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4139           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_minus_prec_grad, numer)
    4140           0 :                                     kappa = -2.0_dp*numer/denom
    4141           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4142           0 :                                     tau = -1.0_dp*numer/denom
    4143           0 :                                     CALL dbcsr_dot(prev_step, grad, numer)
    4144           0 :                                     beta = tau - kappa*numer/denom
    4145             :                                  CASE (cg_zero)
    4146           0 :                                     beta = 0.0_dp
    4147             :                                  CASE DEFAULT
    4148           0 :                                     CPABORT("illegal conjugator")
    4149             :                                  END SELECT
    4150             : 
    4151           0 :                                  IF (beta .LT. 0.0_dp) THEN
    4152           0 :                                     IF (unit_nr > 0) THEN
    4153           0 :                                        WRITE (unit_nr, *) "Beta is negative, ", beta
    4154             :                                     END IF
    4155             :                                     reset_conjugator = .TRUE.
    4156             :                                  END IF
    4157             : 
    4158             :                               END IF
    4159             : 
    4160           0 :                               IF (md_in_k_space) THEN
    4161             :                                  reset_conjugator = .TRUE.
    4162             :                               END IF
    4163             : 
    4164           0 :                               IF (reset_conjugator) THEN
    4165             : 
    4166           0 :                                  beta = 0.0_dp
    4167             :                                  !reset_step_size=.TRUE.
    4168             : 
    4169           0 :                                  IF (unit_nr > 0) THEN
    4170           0 :                                     WRITE (unit_nr, *) "(Re)-setting conjugator to zero"
    4171             :                                  END IF
    4172             : 
    4173             :                               END IF
    4174             : 
    4175             :                               ! save the preconditioned gradient
    4176           0 :                               CALL dbcsr_copy(prev_minus_prec_grad, step)
    4177             : 
    4178             :                               ! conjugate the step direction
    4179           0 :                               CALL dbcsr_add(step, prev_step, 1.0_dp, beta)
    4180             : 
    4181           0 :                               CALL timestop(handle7)
    4182             : 
    4183             :                               ! update the step direction
    4184             :                            ELSE ! step update
    4185           0 :                               conjugacy_error = 0.0_dp
    4186             :                            END IF
    4187             : 
    4188             :                            ! compute the gradient with respect to the step size in the curr direction
    4189           0 :                            IF (line_search) THEN
    4190           0 :                               CALL dbcsr_dot(grad, step, gfun1)
    4191           0 :                               line_search_error = gfun1/gfun0
    4192             :                            ELSE
    4193           0 :                               CALL dbcsr_dot(grad, step, gfun0)
    4194             :                            END IF
    4195             : 
    4196             :                            ! make a step - update k
    4197           0 :                            IF (line_search) THEN
    4198             : 
    4199             :                               ! check if the trial step provides enough numerical accuracy
    4200           0 :                               safety_multiplier = 1.0E+1_dp ! must be more than one
    4201             :                               num_threshold = MAX(EPSILON(1.0_dp), &
    4202           0 :                                                   safety_multiplier*(almo_scf_env%eps_filter**2)*almo_scf_env%ndomains)
    4203           0 :                               IF (ABS(fun1 - fun0 - gfun0*step_size) .LT. num_threshold) THEN
    4204           0 :                                  IF (unit_nr > 0) THEN
    4205             :                                     WRITE (unit_nr, '(T3,A,1X,E17.7)') &
    4206           0 :                                        "Numerical accuracy is too low to observe non-linear behavior", &
    4207           0 :                                        ABS(fun1 - fun0 - gfun0*step_size)
    4208           0 :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Error computing ", &
    4209           0 :                                        ABS(gfun0), &
    4210           0 :                                        " is smaller than the threshold", num_threshold
    4211             :                                  END IF
    4212           0 :                                  CPABORT("")
    4213             :                               END IF
    4214           0 :                               IF (ABS(gfun0) .LT. num_threshold) THEN
    4215           0 :                                  IF (unit_nr > 0) THEN
    4216           0 :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Linear gradient", &
    4217           0 :                                        ABS(gfun0), &
    4218           0 :                                        " is smaller than the threshold", num_threshold
    4219             :                                  END IF
    4220           0 :                                  CPABORT("")
    4221             :                               END IF
    4222             : 
    4223           0 :                               use_quadratic_approximation = .TRUE.
    4224           0 :                               use_cubic_approximation = .FALSE.
    4225             : 
    4226             :                               ! find the minimum assuming quadratic form
    4227             :                               ! use f0, f1, g0
    4228           0 :                               step_size_quadratic_approx = -(gfun0*step_size*step_size)/(2.0_dp*(fun1 - fun0 - gfun0*step_size))
    4229             :                               ! use f0, f1, g1
    4230           0 :                              step_size_quadratic_approx2 = -(fun1 - fun0 - step_size*gfun1/2.0_dp)/(gfun1 - (fun1 - fun0)/step_size)
    4231             : 
    4232           0 :                               IF ((step_size_quadratic_approx .LT. 0.0_dp) .AND. &
    4233             :                                   (step_size_quadratic_approx2 .LT. 0.0_dp)) THEN
    4234           0 :                                  IF (unit_nr > 0) THEN
    4235             :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,1X,E17.7,1X,A)') &
    4236           0 :                                        "Quadratic approximation gives negative steps", &
    4237           0 :                                        step_size_quadratic_approx, step_size_quadratic_approx2, &
    4238           0 :                                        "trying cubic..."
    4239             :                                  END IF
    4240             :                                  use_cubic_approximation = .TRUE.
    4241             :                                  use_quadratic_approximation = .FALSE.
    4242             :                               ELSE
    4243           0 :                                  IF (step_size_quadratic_approx .LT. 0.0_dp) THEN
    4244           0 :                                     step_size_quadratic_approx = step_size_quadratic_approx2
    4245             :                                  END IF
    4246           0 :                                  IF (step_size_quadratic_approx2 .LT. 0.0_dp) THEN
    4247           0 :                                     step_size_quadratic_approx2 = step_size_quadratic_approx
    4248             :                                  END IF
    4249             :                               END IF
    4250             : 
    4251             :                               ! check accuracy of the quadratic approximation
    4252             :                               IF (use_quadratic_approximation) THEN
    4253             :                                  quadratic_approx_error = ABS(step_size_quadratic_approx - &
    4254           0 :                                                               step_size_quadratic_approx2)/step_size_quadratic_approx
    4255           0 :                                  IF (quadratic_approx_error .GT. quadratic_approx_error_threshold) THEN
    4256           0 :                                     IF (unit_nr > 0) THEN
    4257           0 :                                        WRITE (unit_nr, '(T3,A,1X,E17.7,1X,E17.7,1X,A)') "Quadratic approximation is poor", &
    4258           0 :                                           step_size_quadratic_approx, step_size_quadratic_approx2, &
    4259           0 :                                           "Try cubic approximation"
    4260             :                                     END IF
    4261             :                                     use_cubic_approximation = .TRUE.
    4262             :                                     use_quadratic_approximation = .FALSE.
    4263             :                                  END IF
    4264             :                               END IF
    4265             : 
    4266             :                               ! check if numerics is fine enough to capture the cubic form
    4267           0 :                               IF (use_cubic_approximation) THEN
    4268             : 
    4269             :                                  ! if quadratic approximation is not accurate enough
    4270             :                                  ! try to find the minimum assuming cubic form
    4271             :                                  ! aa*x**3 + bb*x**2 + cc*x + dd = f(x)
    4272           0 :                                  bb = (-step_size*gfun1 + 3.0_dp*(fun1 - fun0) - 2.0_dp*step_size*gfun0)/(step_size*step_size)
    4273           0 :                                  aa = (gfun1 - 2.0_dp*step_size*bb - gfun0)/(3.0_dp*step_size*step_size)
    4274             : 
    4275           0 :                                  IF (ABS(gfun1 - 2.0_dp*step_size*bb - gfun0) .LT. num_threshold) THEN
    4276           0 :                                     IF (unit_nr > 0) THEN
    4277             :                                        WRITE (unit_nr, '(T3,A,1X,E17.7)') &
    4278           0 :                                           "Numerical accuracy is too low to observe cubic behavior", &
    4279           0 :                                           ABS(gfun1 - 2.0_dp*step_size*bb - gfun0)
    4280             :                                     END IF
    4281             :                                     use_cubic_approximation = .FALSE.
    4282             :                                     use_quadratic_approximation = .TRUE.
    4283             :                                  END IF
    4284           0 :                                  IF (ABS(gfun1) .LT. num_threshold) THEN
    4285           0 :                                     IF (unit_nr > 0) THEN
    4286           0 :                                        WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Linear gradient", &
    4287           0 :                                           ABS(gfun1), &
    4288           0 :                                           " is smaller than the threshold", num_threshold
    4289             :                                     END IF
    4290             :                                     use_cubic_approximation = .FALSE.
    4291             :                                     use_quadratic_approximation = .TRUE.
    4292             :                                  END IF
    4293             :                               END IF
    4294             : 
    4295             :                               ! find the step assuming cubic approximation
    4296           0 :                               IF (use_cubic_approximation) THEN
    4297             :                                  ! to obtain the minimum of the cubic function solve the quadratic equation
    4298             :                                  ! 0.0*x**3 + 3.0*aa*x**2 + 2.0*bb*x + cc = 0
    4299           0 :                                  CALL analytic_line_search(0.0_dp, 3.0_dp*aa, 2.0_dp*bb, gfun0, minima, nmins)
    4300           0 :                                  IF (nmins .LT. 1) THEN
    4301           0 :                                     IF (unit_nr > 0) THEN
    4302             :                                        WRITE (unit_nr, '(T3,A)') &
    4303           0 :                                           "Cubic approximation gives zero soultions! Use quadratic approximation"
    4304             :                                     END IF
    4305             :                                     use_quadratic_approximation = .TRUE.
    4306             :                                     use_cubic_approximation = .TRUE.
    4307             :                                  ELSE
    4308           0 :                                     step_size = minima(1)
    4309           0 :                                     IF (nmins .GT. 1) THEN
    4310           0 :                                        IF (unit_nr > 0) THEN
    4311             :                                           WRITE (unit_nr, '(T3,A)') &
    4312           0 :                                              "More than one solution found! Use quadratic approximation"
    4313             :                                        END IF
    4314             :                                        use_quadratic_approximation = .TRUE.
    4315           0 :                                        use_cubic_approximation = .TRUE.
    4316             :                                     END IF
    4317             :                                  END IF
    4318             :                               END IF
    4319             : 
    4320           0 :                               IF (use_quadratic_approximation) THEN ! use quadratic approximation
    4321           0 :                                  IF (unit_nr > 0) THEN
    4322           0 :                                     WRITE (unit_nr, '(T3,A)') "Use quadratic approximation"
    4323             :                                  END IF
    4324           0 :                                  step_size = (step_size_quadratic_approx + step_size_quadratic_approx2)*0.5_dp
    4325             :                               END IF
    4326             : 
    4327             :                               ! one more check on the step size
    4328           0 :                               IF (step_size .LT. 0.0_dp) THEN
    4329           0 :                                  CPABORT("Negative step proposed")
    4330             :                               END IF
    4331             : 
    4332             :                               CALL dbcsr_copy(almo_scf_env%matrix_k_blk(ispin), &
    4333           0 :                                               matrix_k_central)
    4334             :                               CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4335           0 :                                              step, 1.0_dp, step_size)
    4336             :                               CALL dbcsr_copy(matrix_k_central, &
    4337           0 :                                               almo_scf_env%matrix_k_blk(ispin))
    4338           0 :                               line_search = .FALSE.
    4339             : 
    4340             :                            ELSE
    4341             : 
    4342           0 :                               IF (md_in_k_space) THEN
    4343             : 
    4344             :                                  ! update velocities v(i) = v(i-1) + 0.5*dT*(a(i-1) + a(i))
    4345           0 :                                  IF (iteration .NE. 0) THEN
    4346             :                                     CALL dbcsr_add(velocity, &
    4347           0 :                                                    step, 1.0_dp, 0.5_dp*time_step)
    4348             :                                     CALL dbcsr_add(velocity, &
    4349           0 :                                                    prev_step, 1.0_dp, 0.5_dp*time_step)
    4350             :                                  END IF
    4351           0 :                                  kin_energy = dbcsr_frobenius_norm(velocity)
    4352           0 :                                  kin_energy = 0.5_dp*kin_energy*kin_energy
    4353             : 
    4354             :                                  ! update positions k(i) = k(i-1) + dT*v(i-1) + 0.5*dT*dT*a(i-1)
    4355             :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4356           0 :                                                 velocity, 1.0_dp, time_step)
    4357             :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4358           0 :                                                 step, 1.0_dp, 0.5_dp*time_step*time_step)
    4359             : 
    4360             :                               ELSE
    4361             : 
    4362           0 :                                  IF (reset_step_size) THEN
    4363           0 :                                     step_size = almo_scf_env%opt_k_trial_step_size
    4364           0 :                                     reset_step_size = .FALSE.
    4365             :                                  ELSE
    4366           0 :                                     step_size = step_size*almo_scf_env%opt_k_trial_step_size_multiplier
    4367             :                                  END IF
    4368             :                                  CALL dbcsr_copy(almo_scf_env%matrix_k_blk(ispin), &
    4369           0 :                                                  matrix_k_central)
    4370             :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4371           0 :                                                 step, 1.0_dp, step_size)
    4372           0 :                                  line_search = .TRUE.
    4373             :                               END IF
    4374             : 
    4375             :                            END IF
    4376             : 
    4377             :                         END IF ! .NOT.prepare_to_exit
    4378             : 
    4379             :                         ! print the status of the optimization
    4380           0 :                         t2a = m_walltime()
    4381           0 :                         IF (unit_nr > 0) THEN
    4382           0 :                            IF (md_in_k_space) THEN
    4383             :                               WRITE (unit_nr, '(T6,A,1X,I5,1X,E12.3,E16.7,F15.9,F15.9,F15.9,E12.3,F15.9,F15.9,F8.3)') &
    4384           0 :                                  "K iter CG", iteration, time_step, time_step*iteration, &
    4385           0 :                                  energy_correction(ispin), obj_function, delta_obj_function, grad_norm, &
    4386           0 :                                  kin_energy, kin_energy + obj_function, beta
    4387             :                            ELSE
    4388           0 :                               IF (line_search .OR. prepare_to_exit) THEN
    4389             :                                  WRITE (unit_nr, '(T6,A,1X,I3,1X,E12.3,F16.10,F16.10,E12.3,E12.3,E12.3,F8.3,F8.3,F10.3)') &
    4390           0 :                                     "K iter CG", iteration, step_size, &
    4391           0 :                                     energy_correction(ispin), delta_obj_function, grad_norm, &
    4392           0 :                                     gfun0, line_search_error, beta, conjugacy_error, t2a - t1a
    4393             :                                  !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    4394             :                               ELSE
    4395             :                                  WRITE (unit_nr, '(T6,A,1X,I3,1X,E12.3,F16.10,F16.10,E12.3,E12.3,E12.3,F8.3,F8.3,F10.3)') &
    4396           0 :                                     "K iter LS", iteration, step_size, &
    4397           0 :                                     energy_correction(ispin), delta_obj_function, grad_norm, &
    4398           0 :                                     gfun1, line_search_error, beta, conjugacy_error, t2a - t1a
    4399             :                                  !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    4400             :                               END IF
    4401             :                            END IF
    4402           0 :                            CALL m_flush(unit_nr)
    4403             :                         END IF
    4404           0 :                         t1a = m_walltime()
    4405             : 
    4406             :                      ELSE ! opt_k_max_iter .eq. 0
    4407             :                         prepare_to_exit = .TRUE.
    4408             :                      END IF ! opt_k_max_iter .ne. 0
    4409             : 
    4410           0 :                      IF (.NOT. line_search) iteration = iteration + 1
    4411             : 
    4412           0 :                      IF (prepare_to_exit) EXIT
    4413             : 
    4414             :                   END DO ! end iterations on K
    4415             : 
    4416           0 :                   IF (converged .OR. (outer_opt_k_iteration .GE. outer_opt_k_max_iter)) THEN
    4417           0 :                      outer_opt_k_prepare_to_exit = .TRUE.
    4418             :                   END IF
    4419             : 
    4420           0 :                   IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    4421             : 
    4422           0 :                      IF (unit_nr > 0) THEN
    4423           0 :                         WRITE (unit_nr, *) "Updating ALMO virtuals"
    4424             :                      END IF
    4425             : 
    4426           0 :                      CALL timeset('k_opt_v0_update', handle8)
    4427             : 
    4428             :                      ! update retained ALMO virtuals to restart the cg iterations
    4429             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4430             :                                          almo_scf_env%matrix_v_disc_blk(ispin), &
    4431             :                                          almo_scf_env%matrix_k_blk(ispin), &
    4432             :                                          0.0_dp, vr_fixed, &
    4433           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4434             :                      CALL dbcsr_add(vr_fixed, almo_scf_env%matrix_v_blk(ispin), &
    4435           0 :                                     +1.0_dp, +1.0_dp)
    4436             : 
    4437             :                      ! update discarded ALMO virtuals to restart the cg iterations
    4438             :                      CALL dbcsr_multiply("N", "T", 1.0_dp, &
    4439             :                                          almo_scf_env%matrix_v_blk(ispin), &
    4440             :                                          almo_scf_env%matrix_k_blk(ispin), &
    4441             :                                          0.0_dp, vd_fixed, &
    4442           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4443             :                      CALL dbcsr_add(vd_fixed, almo_scf_env%matrix_v_disc_blk(ispin), &
    4444           0 :                                     -1.0_dp, +1.0_dp)
    4445             : 
    4446             :                      ! orthogonalize new orbitals on fragments
    4447             :                      CALL get_overlap(bra=vr_fixed, &
    4448             :                                       ket=vr_fixed, &
    4449             :                                       overlap=k_vr_index_down, &
    4450             :                                       metric=almo_scf_env%matrix_s_blk(1), &
    4451             :                                       retain_overlap_sparsity=.FALSE., &
    4452           0 :                                       eps_filter=almo_scf_env%eps_filter)
    4453             :                      CALL dbcsr_create(vr_index_sqrt_inv, template=k_vr_index_down, &
    4454           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4455             :                      CALL dbcsr_create(vr_index_sqrt, template=k_vr_index_down, &
    4456           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4457             :                      CALL matrix_sqrt_Newton_Schulz(vr_index_sqrt, &
    4458             :                                                     vr_index_sqrt_inv, &
    4459             :                                                     k_vr_index_down, &
    4460             :                                                     threshold=almo_scf_env%eps_filter, &
    4461             :                                                     order=almo_scf_env%order_lanczos, &
    4462             :                                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4463           0 :                                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4464             :                      IF (safe_mode) THEN
    4465             :                         CALL dbcsr_create(matrix_tmp1, template=k_vr_index_down, &
    4466             :                                           matrix_type=dbcsr_type_no_symmetry)
    4467             :                         CALL dbcsr_create(matrix_tmp2, template=k_vr_index_down, &
    4468             :                                           matrix_type=dbcsr_type_no_symmetry)
    4469             : 
    4470             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vr_index_sqrt_inv, &
    4471             :                                             k_vr_index_down, &
    4472             :                                             0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4473             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    4474             :                                             vr_index_sqrt_inv, &
    4475             :                                             0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4476             : 
    4477             :                         frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4478             :                         CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4479             :                         frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4480             :                         IF (unit_nr > 0) THEN
    4481             :                            WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    4482             :                               frob_matrix/frob_matrix_base
    4483             :                         END IF
    4484             : 
    4485             :                         CALL dbcsr_release(matrix_tmp1)
    4486             :                         CALL dbcsr_release(matrix_tmp2)
    4487             :                      END IF
    4488             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4489             :                                          vr_fixed, &
    4490             :                                          vr_index_sqrt_inv, &
    4491             :                                          0.0_dp, almo_scf_env%matrix_v_blk(ispin), &
    4492           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4493             : 
    4494             :                      CALL get_overlap(bra=vd_fixed, &
    4495             :                                       ket=vd_fixed, &
    4496             :                                       overlap=k_vd_index_down, &
    4497             :                                       metric=almo_scf_env%matrix_s_blk(1), &
    4498             :                                       retain_overlap_sparsity=.FALSE., &
    4499           0 :                                       eps_filter=almo_scf_env%eps_filter)
    4500             :                      CALL dbcsr_create(vd_index_sqrt_inv, template=k_vd_index_down, &
    4501           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4502             :                      CALL dbcsr_create(vd_index_sqrt, template=k_vd_index_down, &
    4503           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4504             :                      CALL matrix_sqrt_Newton_Schulz(vd_index_sqrt, &
    4505             :                                                     vd_index_sqrt_inv, &
    4506             :                                                     k_vd_index_down, &
    4507             :                                                     threshold=almo_scf_env%eps_filter, &
    4508             :                                                     order=almo_scf_env%order_lanczos, &
    4509             :                                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4510           0 :                                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4511             :                      IF (safe_mode) THEN
    4512             :                         CALL dbcsr_create(matrix_tmp1, template=k_vd_index_down, &
    4513             :                                           matrix_type=dbcsr_type_no_symmetry)
    4514             :                         CALL dbcsr_create(matrix_tmp2, template=k_vd_index_down, &
    4515             :                                           matrix_type=dbcsr_type_no_symmetry)
    4516             : 
    4517             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vd_index_sqrt_inv, &
    4518             :                                             k_vd_index_down, &
    4519             :                                             0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4520             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    4521             :                                             vd_index_sqrt_inv, &
    4522             :                                             0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4523             : 
    4524             :                         frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4525             :                         CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4526             :                         frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4527             :                         IF (unit_nr > 0) THEN
    4528             :                            WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    4529             :                               frob_matrix/frob_matrix_base
    4530             :                         END IF
    4531             : 
    4532             :                         CALL dbcsr_release(matrix_tmp1)
    4533             :                         CALL dbcsr_release(matrix_tmp2)
    4534             :                      END IF
    4535             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4536             :                                          vd_fixed, &
    4537             :                                          vd_index_sqrt_inv, &
    4538             :                                          0.0_dp, almo_scf_env%matrix_v_disc_blk(ispin), &
    4539           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4540             : 
    4541           0 :                      CALL dbcsr_release(vr_index_sqrt_inv)
    4542           0 :                      CALL dbcsr_release(vr_index_sqrt)
    4543           0 :                      CALL dbcsr_release(vd_index_sqrt_inv)
    4544           0 :                      CALL dbcsr_release(vd_index_sqrt)
    4545             : 
    4546           0 :                      CALL timestop(handle8)
    4547             : 
    4548             :                   END IF ! ne.virt_full
    4549             : 
    4550             :                   ! RZK-warning released outside the outer loop
    4551           0 :                   CALL dbcsr_release(sigma_vv_sqrt)
    4552           0 :                   CALL dbcsr_release(sigma_vv_sqrt_inv)
    4553           0 :                   IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    4554           0 :                      CALL dbcsr_release(k_vr_index_down)
    4555           0 :                      CALL dbcsr_release(k_vd_index_down)
    4556             :                      !CALL dbcsr_release(k_vd_index_up)
    4557           0 :                      CALL dbcsr_release(matrix_k_central)
    4558           0 :                      CALL dbcsr_release(vr_fixed)
    4559           0 :                      CALL dbcsr_release(vd_fixed)
    4560           0 :                      CALL dbcsr_release(grad)
    4561           0 :                      CALL dbcsr_release(prec)
    4562           0 :                      CALL dbcsr_release(prev_grad)
    4563           0 :                      CALL dbcsr_release(tmp3_vd_vr)
    4564           0 :                      CALL dbcsr_release(tmp1_n_vr)
    4565           0 :                      CALL dbcsr_release(tmp_k_blk)
    4566           0 :                      CALL dbcsr_release(t_curr)
    4567           0 :                      CALL dbcsr_release(sigma_oo_curr)
    4568           0 :                      CALL dbcsr_release(sigma_oo_curr_inv)
    4569           0 :                      CALL dbcsr_release(step)
    4570           0 :                      CALL dbcsr_release(tmp2_n_o)
    4571           0 :                      CALL dbcsr_release(tmp4_o_vr)
    4572           0 :                      CALL dbcsr_release(prev_step)
    4573           0 :                      CALL dbcsr_release(prev_minus_prec_grad)
    4574           0 :                      IF (md_in_k_space) THEN
    4575           0 :                         CALL dbcsr_release(velocity)
    4576             :                      END IF
    4577             : 
    4578             :                   END IF
    4579             : 
    4580           0 :                   outer_opt_k_iteration = outer_opt_k_iteration + 1
    4581           0 :                   IF (outer_opt_k_prepare_to_exit) EXIT
    4582             : 
    4583             :                END DO ! outer loop for k
    4584             : 
    4585             :             END DO ! ispin
    4586             : 
    4587             :             ! RZK-warning update mo orbitals
    4588             : 
    4589             :          ELSE ! virtual orbitals might not be available use projected AOs
    4590             : 
    4591             :             ! compute sqrt(S) and inv(sqrt(S))
    4592             :             ! RZK-warning - remove this sqrt(S) and inv(sqrt(S))
    4593             :             ! ideally ALMO scf should use sigma and sigma_inv in
    4594             :             ! the tensor_up_down representation
    4595           0 :             IF (.NOT. almo_scf_env%s_sqrt_done) THEN
    4596             : 
    4597           0 :                IF (unit_nr > 0) THEN
    4598           0 :                   WRITE (unit_nr, *) "sqrt and inv(sqrt) of AO overlap matrix"
    4599             :                END IF
    4600             :                CALL dbcsr_create(almo_scf_env%matrix_s_sqrt(1), &
    4601             :                                  template=almo_scf_env%matrix_s(1), &
    4602           0 :                                  matrix_type=dbcsr_type_no_symmetry)
    4603             :                CALL dbcsr_create(almo_scf_env%matrix_s_sqrt_inv(1), &
    4604             :                                  template=almo_scf_env%matrix_s(1), &
    4605           0 :                                  matrix_type=dbcsr_type_no_symmetry)
    4606             : 
    4607             :                CALL matrix_sqrt_Newton_Schulz(almo_scf_env%matrix_s_sqrt(1), &
    4608             :                                               almo_scf_env%matrix_s_sqrt_inv(1), &
    4609             :                                               almo_scf_env%matrix_s(1), &
    4610             :                                               threshold=almo_scf_env%eps_filter, &
    4611             :                                               order=almo_scf_env%order_lanczos, &
    4612             :                                               eps_lanczos=almo_scf_env%eps_lanczos, &
    4613           0 :                                               max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4614             : 
    4615             :                IF (safe_mode) THEN
    4616             :                   CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_s(1), &
    4617             :                                     matrix_type=dbcsr_type_no_symmetry)
    4618             :                   CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_s(1), &
    4619             :                                     matrix_type=dbcsr_type_no_symmetry)
    4620             : 
    4621             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_sqrt_inv(1), &
    4622             :                                       almo_scf_env%matrix_s(1), &
    4623             :                                       0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4624             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, almo_scf_env%matrix_s_sqrt_inv(1), &
    4625             :                                       0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4626             : 
    4627             :                   frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4628             :                   CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4629             :                   frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4630             :                   IF (unit_nr > 0) THEN
    4631             :                      WRITE (unit_nr, *) "Error for (inv(sqrt(S))*S*inv(sqrt(S))-I)", frob_matrix/frob_matrix_base
    4632             :                   END IF
    4633             : 
    4634             :                   CALL dbcsr_release(matrix_tmp1)
    4635             :                   CALL dbcsr_release(matrix_tmp2)
    4636             :                END IF
    4637             : 
    4638           0 :                almo_scf_env%s_sqrt_done = .TRUE.
    4639             : 
    4640             :             END IF
    4641             : 
    4642           0 :             DO ispin = 1, nspin
    4643             : 
    4644           0 :                CALL ct_step_env_init(ct_step_env)
    4645             :                CALL ct_step_env_set(ct_step_env, &
    4646             :                                     para_env=almo_scf_env%para_env, &
    4647             :                                     blacs_env=almo_scf_env%blacs_env, &
    4648             :                                     use_occ_orbs=.TRUE., &
    4649             :                                     use_virt_orbs=almo_scf_env%deloc_cayley_use_virt_orbs, &
    4650             :                                     occ_orbs_orthogonal=.FALSE., &
    4651             :                                     virt_orbs_orthogonal=almo_scf_env%orthogonal_basis, &
    4652             :                                     tensor_type=almo_scf_env%deloc_cayley_tensor_type, &
    4653             :                                     neglect_quadratic_term=almo_scf_env%deloc_cayley_linear, &
    4654             :                                     calculate_energy_corr=.TRUE., &
    4655             :                                     update_p=.TRUE., &
    4656             :                                     update_q=.FALSE., &
    4657             :                                     pp_preconditioner_full=almo_scf_env%deloc_cayley_occ_precond, &
    4658             :                                     qq_preconditioner_full=almo_scf_env%deloc_cayley_vir_precond, &
    4659             :                                     eps_convergence=almo_scf_env%deloc_cayley_eps_convergence, &
    4660             :                                     eps_filter=almo_scf_env%eps_filter, &
    4661             :                                     !nspins=almo_scf_env%nspins,&
    4662             :                                     q_index_up=almo_scf_env%matrix_s_sqrt_inv(1), &
    4663             :                                     q_index_down=almo_scf_env%matrix_s_sqrt(1), &
    4664             :                                     p_index_up=almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    4665             :                                     p_index_down=almo_scf_env%matrix_sigma_sqrt(ispin), &
    4666             :                                     matrix_ks=almo_scf_env%matrix_ks_0deloc(ispin), &
    4667             :                                     matrix_p=almo_scf_env%matrix_p(ispin), &
    4668             :                                     matrix_qp_template=almo_scf_env%matrix_t(ispin), &
    4669             :                                     matrix_pq_template=almo_scf_env%matrix_t_tr(ispin), &
    4670             :                                     matrix_t=almo_scf_env%matrix_t(ispin), &
    4671             :                                     conjugator=almo_scf_env%deloc_cayley_conjugator, &
    4672           0 :                                     max_iter=almo_scf_env%deloc_cayley_max_iter)
    4673             : 
    4674             :                ! perform calculations
    4675           0 :                CALL ct_step_execute(ct_step_env)
    4676             : 
    4677             :                ! for now we do not need the new set of orbitals
    4678             :                ! just get the energy correction
    4679             :                CALL ct_step_env_get(ct_step_env, &
    4680           0 :                                     energy_correction=energy_correction(ispin))
    4681             :                !copy_da_energy_matrix=matrix_eda(ispin),&
    4682             :                !copy_da_charge_matrix=matrix_cta(ispin),&
    4683             : 
    4684           0 :                CALL ct_step_env_clean(ct_step_env)
    4685             : 
    4686             :             END DO
    4687             : 
    4688           0 :             energy_correction(1) = energy_correction(1)*spin_factor
    4689             : 
    4690             :          END IF
    4691             : 
    4692             :          ! print the energy correction and exit
    4693           0 :          DO ispin = 1, nspin
    4694             : 
    4695           0 :             IF (unit_nr > 0) THEN
    4696           0 :                WRITE (unit_nr, *)
    4697           0 :                WRITE (unit_nr, '(T2,A,I6,F20.9)') "ECORR", ispin, &
    4698           0 :                   energy_correction(ispin)
    4699           0 :                WRITE (unit_nr, *)
    4700             :             END IF
    4701           0 :             energy_correction_final = energy_correction_final + energy_correction(ispin)
    4702             : 
    4703             :             !!! print out the results of decomposition analysis
    4704             :             !!IF (unit_nr>0) THEN
    4705             :             !!   WRITE(unit_nr,*)
    4706             :             !!   WRITE(unit_nr,'(T2,A)') "ENERGY DECOMPOSITION"
    4707             :             !!ENDIF
    4708             :             !!CALL print_block_sum(eda_matrix(ispin), unit_nr=6)
    4709             :             !!IF (unit_nr>0) THEN
    4710             :             !!   WRITE(unit_nr,*)
    4711             :             !!   WRITE(unit_nr,'(T2,A)') "CHARGE DECOMPOSITION"
    4712             :             !!ENDIF
    4713             :             !!CALL print_block_sum(cta_matrix(ispin), unit_nr=6)
    4714             : 
    4715             :             ! obtain density matrix from updated MOs
    4716             :             ! RZK-later sigma and sigma_inv are lost here
    4717             :             CALL almo_scf_t_to_proj(t=almo_scf_env%matrix_t(ispin), &
    4718             :                                     p=almo_scf_env%matrix_p(ispin), &
    4719             :                                     eps_filter=almo_scf_env%eps_filter, &
    4720             :                                     orthog_orbs=.FALSE., &
    4721             :                                     nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    4722             :                                     s=almo_scf_env%matrix_s(1), &
    4723             :                                     sigma=almo_scf_env%matrix_sigma(ispin), &
    4724             :                                     sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
    4725             :                                     !use_guess=use_guess, &
    4726             :                                     algorithm=almo_scf_env%sigma_inv_algorithm, &
    4727             :                                     inverse_accelerator=almo_scf_env%order_lanczos, &
    4728             :                                     inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
    4729             :                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4730             :                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    4731             :                                     para_env=almo_scf_env%para_env, &
    4732           0 :                                     blacs_env=almo_scf_env%blacs_env)
    4733             : 
    4734           0 :             IF (almo_scf_env%nspins == 1) &
    4735             :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
    4736           0 :                                 spin_factor)
    4737             : 
    4738             :          END DO
    4739             : 
    4740             :       CASE (dm_ls_step)
    4741             : 
    4742             :          ! compute the inverse of S
    4743           0 :          IF (.NOT. almo_scf_env%s_inv_done) THEN
    4744           0 :             IF (unit_nr > 0) THEN
    4745           0 :                WRITE (unit_nr, *) "Inverting AO overlap matrix"
    4746             :             END IF
    4747             :             CALL dbcsr_create(almo_scf_env%matrix_s_inv(1), &
    4748             :                               template=almo_scf_env%matrix_s(1), &
    4749           0 :                               matrix_type=dbcsr_type_no_symmetry)
    4750           0 :             IF (.NOT. almo_scf_env%s_sqrt_done) THEN
    4751             :                CALL invert_Hotelling(almo_scf_env%matrix_s_inv(1), &
    4752             :                                      almo_scf_env%matrix_s(1), &
    4753           0 :                                      threshold=almo_scf_env%eps_filter)
    4754             :             ELSE
    4755             :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_sqrt_inv(1), &
    4756             :                                    almo_scf_env%matrix_s_sqrt_inv(1), &
    4757             :                                    0.0_dp, almo_scf_env%matrix_s_inv(1), &
    4758           0 :                                    filter_eps=almo_scf_env%eps_filter)
    4759             :             END IF
    4760             : 
    4761             :             IF (safe_mode) THEN
    4762             :                CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_s(1), &
    4763             :                                  matrix_type=dbcsr_type_no_symmetry)
    4764             :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_inv(1), &
    4765             :                                    almo_scf_env%matrix_s(1), &
    4766             :                                    0.0_dp, matrix_tmp1, &
    4767             :                                    filter_eps=almo_scf_env%eps_filter)
    4768             :                frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    4769             :                CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    4770             :                frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    4771             :                IF (unit_nr > 0) THEN
    4772             :                   WRITE (unit_nr, *) "Error for (inv(S)*S-I)", &
    4773             :                      frob_matrix/frob_matrix_base
    4774             :                END IF
    4775             :                CALL dbcsr_release(matrix_tmp1)
    4776             :             END IF
    4777             : 
    4778           0 :             almo_scf_env%s_inv_done = .TRUE.
    4779             : 
    4780             :          END IF
    4781             : 
    4782           0 :          DO ispin = 1, nspin
    4783             :             ! RZK-warning the preconditioner is very important
    4784             :             !       IF (.FALSE.) THEN
    4785             :             !           CALL apply_matrix_preconditioner(almo_scf_env%matrix_ks(ispin),&
    4786             :             !                   "forward",almo_scf_env%matrix_s_blk_sqrt(1),&
    4787             :             !                   almo_scf_env%matrix_s_blk_sqrt_inv(1))
    4788             :             !       ENDIF
    4789             :             !CALL dbcsr_filter(almo_scf_env%matrix_ks(ispin),&
    4790             :             !         almo_scf_env%eps_filter)
    4791             :          END DO
    4792             : 
    4793           0 :          ALLOCATE (matrix_p_almo_scf_converged(nspin))
    4794           0 :          DO ispin = 1, nspin
    4795             :             CALL dbcsr_create(matrix_p_almo_scf_converged(ispin), &
    4796           0 :                               template=almo_scf_env%matrix_p(ispin))
    4797             :             CALL dbcsr_copy(matrix_p_almo_scf_converged(ispin), &
    4798           0 :                             almo_scf_env%matrix_p(ispin))
    4799             :          END DO
    4800             : 
    4801             :          ! update the density matrix
    4802           0 :          DO ispin = 1, nspin
    4803             : 
    4804           0 :             nelectron_spin_real(1) = almo_scf_env%nelectrons_spin(ispin)
    4805           0 :             IF (almo_scf_env%nspins == 1) &
    4806           0 :                nelectron_spin_real(1) = nelectron_spin_real(1)/2
    4807             : 
    4808           0 :             local_mu(1) = SUM(almo_scf_env%mu_of_domain(:, ispin))/almo_scf_env%ndomains
    4809           0 :             fake(1) = 123523
    4810             : 
    4811             :             ! RZK UPDATE! the update algorithm is removed because
    4812             :             ! RZK UPDATE! it requires updating core LS_SCF routines
    4813             :             ! RZK UPDATE! (the code exists in the CVS version)
    4814           0 :             CPABORT("CVS only: density_matrix_sign has not been updated in SVN")
    4815             :             ! RZK UPDATE!CALL density_matrix_sign(almo_scf_env%matrix_p(ispin),&
    4816             :             ! RZK UPDATE!                     local_mu,&
    4817             :             ! RZK UPDATE!                     almo_scf_env%fixed_mu,&
    4818             :             ! RZK UPDATE!                     almo_scf_env%matrix_ks_0deloc(ispin),&
    4819             :             ! RZK UPDATE!                     almo_scf_env%matrix_s(1), &
    4820             :             ! RZK UPDATE!                     almo_scf_env%matrix_s_inv(1), &
    4821             :             ! RZK UPDATE!                     nelectron_spin_real,&
    4822             :             ! RZK UPDATE!                     almo_scf_env%eps_filter,&
    4823             :             ! RZK UPDATE!                     fake)
    4824             :             ! RZK UPDATE!
    4825           0 :             almo_scf_env%mu = local_mu(1)
    4826             : 
    4827             :             !IF (almo_scf_env%has_s_preconditioner) THEN
    4828             :             !    CALL apply_matrix_preconditioner(&
    4829             :             !             almo_scf_env%matrix_p_blk(ispin),&
    4830             :             !             "forward",almo_scf_env%matrix_s_blk_sqrt(1),&
    4831             :             !             almo_scf_env%matrix_s_blk_sqrt_inv(1))
    4832             :             !ENDIF
    4833             :             !CALL dbcsr_filter(almo_scf_env%matrix_p(ispin),&
    4834             :             !        almo_scf_env%eps_filter)
    4835             : 
    4836           0 :             IF (almo_scf_env%nspins == 1) &
    4837             :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
    4838           0 :                                 spin_factor)
    4839             : 
    4840             :             !CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin),&
    4841             :             !  almo_scf_env%matrix_p(ispin),&
    4842             :             !  energy_correction(ispin))
    4843             :             !IF (unit_nr>0) THEN
    4844             :             !   WRITE(unit_nr,*)
    4845             :             !   WRITE(unit_nr,'(T2,A,I6,F20.9)') "EFAKE",ispin,&
    4846             :             !           energy_correction(ispin)
    4847             :             !   WRITE(unit_nr,*)
    4848             :             !ENDIF
    4849             :             CALL dbcsr_add(matrix_p_almo_scf_converged(ispin), &
    4850           0 :                            almo_scf_env%matrix_p(ispin), -1.0_dp, 1.0_dp)
    4851             :             CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin), &
    4852             :                            matrix_p_almo_scf_converged(ispin), &
    4853           0 :                            energy_correction(ispin))
    4854             : 
    4855           0 :             energy_correction_final = energy_correction_final + energy_correction(ispin)
    4856             : 
    4857           0 :             IF (unit_nr > 0) THEN
    4858           0 :                WRITE (unit_nr, *)
    4859           0 :                WRITE (unit_nr, '(T2,A,I6,F20.9)') "ECORR", ispin, &
    4860           0 :                   energy_correction(ispin)
    4861           0 :                WRITE (unit_nr, *)
    4862             :             END IF
    4863             : 
    4864             :          END DO
    4865             : 
    4866           0 :          DO ispin = 1, nspin
    4867           0 :             CALL dbcsr_release(matrix_p_almo_scf_converged(ispin))
    4868             :          END DO
    4869           0 :          DEALLOCATE (matrix_p_almo_scf_converged)
    4870             : 
    4871             :       END SELECT ! algorithm selection
    4872             : 
    4873           0 :       t2 = m_walltime()
    4874             : 
    4875           0 :       IF (unit_nr > 0) THEN
    4876           0 :          WRITE (unit_nr, *)
    4877           0 :          WRITE (unit_nr, '(T2,A,F18.9,F18.9,F18.9,F12.6)') "ETOT", &
    4878           0 :             almo_scf_env%almo_scf_energy, &
    4879           0 :             energy_correction_final, &
    4880           0 :             almo_scf_env%almo_scf_energy + energy_correction_final, &
    4881           0 :             t2 - t1
    4882           0 :          WRITE (unit_nr, *)
    4883             :       END IF
    4884             : 
    4885           0 :       CALL timestop(handle)
    4886             : 
    4887           0 :    END SUBROUTINE harris_foulkes_correction
    4888             : 
    4889             : ! **************************************************************************************************
    4890             : !> \brief triu of a dbcsr matrix
    4891             : !> \param matrix ...
    4892             : ! **************************************************************************************************
    4893           0 :    SUBROUTINE make_triu(matrix)
    4894             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
    4895             : 
    4896             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'make_triu'
    4897             : 
    4898             :       INTEGER                                            :: col, handle, i, j, row
    4899           0 :       REAL(dp), DIMENSION(:, :), POINTER                 :: block
    4900             :       TYPE(dbcsr_iterator_type)                          :: iter
    4901             : 
    4902           0 :       CALL timeset(routineN, handle)
    4903             : 
    4904           0 :       CALL dbcsr_iterator_start(iter, matrix)
    4905           0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    4906           0 :          CALL dbcsr_iterator_next_block(iter, row, col, block)
    4907           0 :          IF (row > col) block(:, :) = 0.0_dp
    4908           0 :          IF (row == col) THEN
    4909           0 :             DO j = 1, SIZE(block, 2)
    4910           0 :             DO i = j + 1, SIZE(block, 1)
    4911           0 :                block(i, j) = 0.0_dp
    4912             :             END DO
    4913             :             END DO
    4914             :          END IF
    4915             :       END DO
    4916           0 :       CALL dbcsr_iterator_stop(iter)
    4917           0 :       CALL dbcsr_filter(matrix, eps=0.0_dp)
    4918             : 
    4919           0 :       CALL timestop(handle)
    4920           0 :    END SUBROUTINE make_triu
    4921             : 
    4922             : ! **************************************************************************************************
    4923             : !> \brief Computes a diagonal preconditioner for the cg optimization of k matrix
    4924             : !> \param prec ...
    4925             : !> \param vd_prop ...
    4926             : !> \param f ...
    4927             : !> \param x ...
    4928             : !> \param oo_inv_x_tr ...
    4929             : !> \param s ...
    4930             : !> \param grad ...
    4931             : !> \param vd_blk ...
    4932             : !> \param t ...
    4933             : !> \param template_vd_vd_blk ...
    4934             : !> \param template_vr_vr_blk ...
    4935             : !> \param template_n_vr ...
    4936             : !> \param spin_factor ...
    4937             : !> \param eps_filter ...
    4938             : !> \par History
    4939             : !>       2011.09 created [Rustam Z Khaliullin]
    4940             : !> \author Rustam Z Khaliullin
    4941             : ! **************************************************************************************************
    4942           0 :    SUBROUTINE opt_k_create_preconditioner(prec, vd_prop, f, x, oo_inv_x_tr, s, grad, &
    4943             :                                           vd_blk, t, template_vd_vd_blk, template_vr_vr_blk, template_n_vr, &
    4944             :                                           spin_factor, eps_filter)
    4945             : 
    4946             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: prec
    4947             :       TYPE(dbcsr_type), INTENT(IN)                       :: vd_prop, f, x, oo_inv_x_tr, s
    4948             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: grad
    4949             :       TYPE(dbcsr_type), INTENT(IN)                       :: vd_blk, t, template_vd_vd_blk, &
    4950             :                                                             template_vr_vr_blk, template_n_vr
    4951             :       REAL(KIND=dp), INTENT(IN)                          :: spin_factor, eps_filter
    4952             : 
    4953             :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_create_preconditioner'
    4954             : 
    4955             :       INTEGER                                            :: handle, p_nrows, q_nrows
    4956           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: p_diagonal, q_diagonal
    4957             :       TYPE(dbcsr_type)                                   :: pp_diag, qq_diag, t1, t2, tmp, &
    4958             :                                                             tmp1_n_vr, tmp2_n_vr, tmp_n_vd, &
    4959             :                                                             tmp_vd_vd_blk, tmp_vr_vr_blk
    4960             : 
    4961             : ! init diag blocks outside
    4962             : ! init diag blocks otside
    4963             : !INTEGER                                  :: iblock_row, iblock_col,&
    4964             : !                                            nblkrows_tot, nblkcols_tot
    4965             : !REAL(KIND=dp), DIMENSION(:, :), POINTER  :: p_new_block
    4966             : !INTEGER                                  :: mynode, hold, row, col
    4967             : 
    4968           0 :       CALL timeset(routineN, handle)
    4969             : 
    4970             :       ! initialize a matrix to 1.0
    4971           0 :       CALL dbcsr_create(tmp, template=prec)
    4972             :       ! in order to use dbcsr_set matrix blocks must exist
    4973           0 :       CALL dbcsr_copy(tmp, prec)
    4974           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    4975             : 
    4976             :       ! compute qq = (Vd^tr)*F*Vd
    4977           0 :       CALL dbcsr_create(tmp_n_vd, template=vd_prop)
    4978             :       CALL dbcsr_multiply("N", "N", 1.0_dp, f, vd_prop, &
    4979           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    4980             :       CALL dbcsr_create(tmp_vd_vd_blk, &
    4981           0 :                         template=template_vd_vd_blk)
    4982           0 :       CALL dbcsr_copy(tmp_vd_vd_blk, template_vd_vd_blk)
    4983             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    4984             :                           0.0_dp, tmp_vd_vd_blk, &
    4985             :                           retain_sparsity=.TRUE., &
    4986           0 :                           filter_eps=eps_filter)
    4987             :       ! copy diagonal elements of the result into rows of a matrix
    4988           0 :       CALL dbcsr_get_info(tmp_vd_vd_blk, nfullrows_total=q_nrows)
    4989           0 :       ALLOCATE (q_diagonal(q_nrows))
    4990           0 :       CALL dbcsr_get_diag(tmp_vd_vd_blk, q_diagonal)
    4991             :       CALL dbcsr_create(qq_diag, &
    4992           0 :                         template=template_vd_vd_blk)
    4993           0 :       CALL dbcsr_add_on_diag(qq_diag, 1.0_dp)
    4994           0 :       CALL dbcsr_set_diag(qq_diag, q_diagonal)
    4995           0 :       CALL dbcsr_create(t1, template=prec)
    4996             :       CALL dbcsr_multiply("N", "N", 1.0_dp, qq_diag, tmp, &
    4997           0 :                           0.0_dp, t1, filter_eps=eps_filter)
    4998             : 
    4999             :       ! compute pp = X*sigma_oo_inv*X^tr
    5000           0 :       CALL dbcsr_create(tmp_vr_vr_blk, template=template_vr_vr_blk)
    5001           0 :       CALL dbcsr_copy(tmp_vr_vr_blk, template_vr_vr_blk)
    5002             :       CALL dbcsr_multiply("N", "N", 1.0_dp, x, oo_inv_x_tr, &
    5003             :                           0.0_dp, tmp_vr_vr_blk, &
    5004             :                           retain_sparsity=.TRUE., &
    5005           0 :                           filter_eps=eps_filter)
    5006             :       ! copy diagonal elements of the result into cols of a matrix
    5007           0 :       CALL dbcsr_get_info(tmp_vr_vr_blk, nfullrows_total=p_nrows)
    5008           0 :       ALLOCATE (p_diagonal(p_nrows))
    5009           0 :       CALL dbcsr_get_diag(tmp_vr_vr_blk, p_diagonal)
    5010           0 :       CALL dbcsr_create(pp_diag, template=template_vr_vr_blk)
    5011           0 :       CALL dbcsr_add_on_diag(pp_diag, 1.0_dp)
    5012           0 :       CALL dbcsr_set_diag(pp_diag, p_diagonal)
    5013           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    5014           0 :       CALL dbcsr_create(t2, template=prec)
    5015             :       CALL dbcsr_multiply("N", "N", 1.0_dp, tmp, pp_diag, &
    5016           0 :                           0.0_dp, t2, filter_eps=eps_filter)
    5017             : 
    5018           0 :       CALL dbcsr_hadamard_product(t1, t2, prec)
    5019             : 
    5020             :       ! compute qq = (Vd^tr)*S*Vd
    5021             :       CALL dbcsr_multiply("N", "N", 1.0_dp, s, vd_prop, &
    5022           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5023             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5024             :                           0.0_dp, tmp_vd_vd_blk, &
    5025             :                           retain_sparsity=.TRUE., &
    5026           0 :                           filter_eps=eps_filter)
    5027             :       ! copy diagonal elements of the result into rows of a matrix
    5028           0 :       CALL dbcsr_get_diag(tmp_vd_vd_blk, q_diagonal)
    5029           0 :       CALL dbcsr_add_on_diag(qq_diag, 1.0_dp)
    5030           0 :       CALL dbcsr_set_diag(qq_diag, q_diagonal)
    5031           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    5032             :       CALL dbcsr_multiply("N", "N", 1.0_dp, qq_diag, tmp, &
    5033           0 :                           0.0_dp, t1, filter_eps=eps_filter)
    5034             : 
    5035             :       ! compute pp = X*sig_oo_inv*(T^tr)*F*T*sig_oo_inv*(X^tr)
    5036           0 :       CALL dbcsr_create(tmp1_n_vr, template=template_n_vr)
    5037           0 :       CALL dbcsr_create(tmp2_n_vr, template=template_n_vr)
    5038             :       CALL dbcsr_multiply("N", "N", 1.0_dp, t, oo_inv_x_tr, &
    5039           0 :                           0.0_dp, tmp1_n_vr, filter_eps=eps_filter)
    5040             :       CALL dbcsr_multiply("N", "N", 1.0_dp, f, tmp1_n_vr, &
    5041           0 :                           0.0_dp, tmp2_n_vr, filter_eps=eps_filter)
    5042             :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp1_n_vr, tmp2_n_vr, &
    5043             :                           0.0_dp, tmp_vr_vr_blk, &
    5044             :                           retain_sparsity=.TRUE., &
    5045           0 :                           filter_eps=eps_filter)
    5046             :       ! copy diagonal elements of the result into cols of a matrix
    5047           0 :       CALL dbcsr_get_diag(tmp_vr_vr_blk, p_diagonal)
    5048           0 :       CALL dbcsr_add_on_diag(pp_diag, 1.0_dp)
    5049           0 :       CALL dbcsr_set_diag(pp_diag, p_diagonal)
    5050           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    5051             :       CALL dbcsr_multiply("N", "N", 1.0_dp, tmp, pp_diag, &
    5052           0 :                           0.0_dp, t2, filter_eps=eps_filter)
    5053             : 
    5054           0 :       CALL dbcsr_hadamard_product(t1, t2, tmp)
    5055           0 :       CALL dbcsr_add(prec, tmp, 1.0_dp, -1.0_dp)
    5056           0 :       CALL dbcsr_scale(prec, 2.0_dp*spin_factor)
    5057             : 
    5058             :       ! compute qp = X*sig_oo_inv*(T^tr)*S*Vd
    5059             :       CALL dbcsr_multiply("N", "N", 1.0_dp, s, vd_blk, &
    5060           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5061             :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp_n_vd, tmp1_n_vr, &
    5062             :                           0.0_dp, tmp, retain_sparsity=.TRUE., &
    5063           0 :                           filter_eps=eps_filter)
    5064           0 :       CALL dbcsr_hadamard_product(grad, tmp, t1)
    5065             :       ! gradient already contains 2.0*spin_factor
    5066           0 :       CALL dbcsr_scale(t1, -2.0_dp)
    5067             : 
    5068           0 :       CALL dbcsr_add(prec, t1, 1.0_dp, 1.0_dp)
    5069             : 
    5070           0 :       CALL inverse_of_elements(prec)
    5071           0 :       CALL dbcsr_filter(prec, eps_filter)
    5072             : 
    5073           0 :       DEALLOCATE (q_diagonal)
    5074           0 :       DEALLOCATE (p_diagonal)
    5075           0 :       CALL dbcsr_release(tmp)
    5076           0 :       CALL dbcsr_release(qq_diag)
    5077           0 :       CALL dbcsr_release(t1)
    5078           0 :       CALL dbcsr_release(pp_diag)
    5079           0 :       CALL dbcsr_release(t2)
    5080           0 :       CALL dbcsr_release(tmp_n_vd)
    5081           0 :       CALL dbcsr_release(tmp_vd_vd_blk)
    5082           0 :       CALL dbcsr_release(tmp_vr_vr_blk)
    5083           0 :       CALL dbcsr_release(tmp1_n_vr)
    5084           0 :       CALL dbcsr_release(tmp2_n_vr)
    5085             : 
    5086           0 :       CALL timestop(handle)
    5087             : 
    5088           0 :    END SUBROUTINE opt_k_create_preconditioner
    5089             : 
    5090             : ! **************************************************************************************************
    5091             : !> \brief Computes a block-diagonal preconditioner for the optimization of
    5092             : !>        k matrix
    5093             : !> \param almo_scf_env ...
    5094             : !> \param vd_prop ...
    5095             : !> \param oo_inv_x_tr ...
    5096             : !> \param t_curr ...
    5097             : !> \param ispin ...
    5098             : !> \param spin_factor ...
    5099             : !> \par History
    5100             : !>       2011.10 created [Rustam Z Khaliullin]
    5101             : !> \author Rustam Z Khaliullin
    5102             : ! **************************************************************************************************
    5103           0 :    SUBROUTINE opt_k_create_preconditioner_blk(almo_scf_env, vd_prop, oo_inv_x_tr, &
    5104             :                                               t_curr, ispin, spin_factor)
    5105             : 
    5106             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    5107             :       TYPE(dbcsr_type), INTENT(IN)                       :: vd_prop, oo_inv_x_tr, t_curr
    5108             :       INTEGER, INTENT(IN)                                :: ispin
    5109             :       REAL(KIND=dp), INTENT(IN)                          :: spin_factor
    5110             : 
    5111             :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_create_preconditioner_blk'
    5112             : 
    5113             :       INTEGER                                            :: handle
    5114             :       REAL(KIND=dp)                                      :: eps_filter
    5115             :       TYPE(dbcsr_type)                                   :: opt_k_e_dd, opt_k_e_rr, s_dd_sqrt, &
    5116             :                                                             s_rr_sqrt, t1, tmp, tmp1_n_vr, &
    5117             :                                                             tmp2_n_vr, tmp_n_vd, tmp_vd_vd_blk, &
    5118             :                                                             tmp_vr_vr_blk
    5119             : 
    5120             : ! matrices that has been computed outside the routine already
    5121             : 
    5122           0 :       CALL timeset(routineN, handle)
    5123             : 
    5124           0 :       eps_filter = almo_scf_env%eps_filter
    5125             : 
    5126             :       ! compute S_qq = (Vd^tr)*S*Vd
    5127           0 :       CALL dbcsr_create(tmp_n_vd, template=almo_scf_env%matrix_v_disc(ispin))
    5128             :       CALL dbcsr_create(tmp_vd_vd_blk, &
    5129             :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5130           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5131             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5132             :                           almo_scf_env%matrix_s(1), &
    5133             :                           vd_prop, &
    5134           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5135             :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5136           0 :                       almo_scf_env%matrix_vv_disc_blk(ispin))
    5137             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5138             :                           0.0_dp, tmp_vd_vd_blk, &
    5139           0 :                           retain_sparsity=.TRUE.)
    5140             : 
    5141             :       CALL dbcsr_create(s_dd_sqrt, &
    5142             :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5143           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5144             :       CALL matrix_sqrt_Newton_Schulz(s_dd_sqrt, &
    5145             :                                      almo_scf_env%opt_k_t_dd(ispin), &
    5146             :                                      tmp_vd_vd_blk, &
    5147             :                                      threshold=eps_filter, &
    5148             :                                      order=almo_scf_env%order_lanczos, &
    5149             :                                      eps_lanczos=almo_scf_env%eps_lanczos, &
    5150           0 :                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5151             : 
    5152             :       ! compute F_qq = (Vd^tr)*F*Vd
    5153             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5154             :                           almo_scf_env%matrix_ks_0deloc(ispin), &
    5155             :                           vd_prop, &
    5156           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5157             :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5158           0 :                       almo_scf_env%matrix_vv_disc_blk(ispin))
    5159             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5160             :                           0.0_dp, tmp_vd_vd_blk, &
    5161           0 :                           retain_sparsity=.TRUE.)
    5162           0 :       CALL dbcsr_release(tmp_n_vd)
    5163             : 
    5164             :       ! bring to the blocked-orthogonalized basis
    5165             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5166             :                           tmp_vd_vd_blk, &
    5167             :                           almo_scf_env%opt_k_t_dd(ispin), &
    5168           0 :                           0.0_dp, s_dd_sqrt, filter_eps=eps_filter)
    5169             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5170             :                           almo_scf_env%opt_k_t_dd(ispin), &
    5171             :                           s_dd_sqrt, &
    5172           0 :                           0.0_dp, tmp_vd_vd_blk, filter_eps=eps_filter)
    5173             : 
    5174             :       ! diagonalize the matrix
    5175             :       CALL dbcsr_create(opt_k_e_dd, &
    5176           0 :                         template=almo_scf_env%matrix_vv_disc_blk(ispin))
    5177           0 :       CALL dbcsr_release(s_dd_sqrt)
    5178             :       CALL dbcsr_create(s_dd_sqrt, &
    5179             :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5180           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5181             :       CALL diagonalize_diagonal_blocks(tmp_vd_vd_blk, &
    5182             :                                        s_dd_sqrt, &
    5183           0 :                                        opt_k_e_dd)
    5184             : 
    5185             :       ! obtain the transformation matrix in the discarded subspace
    5186             :       ! T = S^{-1/2}.U
    5187             :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5188           0 :                       almo_scf_env%opt_k_t_dd(ispin))
    5189             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5190             :                           tmp_vd_vd_blk, &
    5191             :                           s_dd_sqrt, &
    5192             :                           0.0_dp, almo_scf_env%opt_k_t_dd(ispin), &
    5193           0 :                           filter_eps=eps_filter)
    5194           0 :       CALL dbcsr_release(s_dd_sqrt)
    5195           0 :       CALL dbcsr_release(tmp_vd_vd_blk)
    5196             : 
    5197             :       ! copy diagonal elements of the result into rows of a matrix
    5198             :       CALL dbcsr_create(tmp, &
    5199           0 :                         template=almo_scf_env%matrix_k_blk_ones(ispin))
    5200             :       CALL dbcsr_copy(tmp, &
    5201           0 :                       almo_scf_env%matrix_k_blk_ones(ispin))
    5202             :       CALL dbcsr_create(t1, &
    5203           0 :                         template=almo_scf_env%matrix_k_blk_ones(ispin))
    5204             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5205             :                           opt_k_e_dd, tmp, &
    5206           0 :                           0.0_dp, t1, filter_eps=eps_filter)
    5207           0 :       CALL dbcsr_release(opt_k_e_dd)
    5208             : 
    5209             :       ! compute S_pp = X*sigma_oo_inv*X^tr
    5210             :       CALL dbcsr_create(tmp_vr_vr_blk, &
    5211             :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5212           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5213             :       CALL dbcsr_copy(tmp_vr_vr_blk, &
    5214           0 :                       almo_scf_env%matrix_sigma_vv_blk(ispin))
    5215             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5216             :                           almo_scf_env%matrix_x(ispin), &
    5217             :                           oo_inv_x_tr, &
    5218             :                           0.0_dp, tmp_vr_vr_blk, &
    5219           0 :                           retain_sparsity=.TRUE.)
    5220             : 
    5221             :       ! obtain the orthogonalization matrix
    5222             :       CALL dbcsr_create(s_rr_sqrt, &
    5223             :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5224           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5225             :       CALL matrix_sqrt_Newton_Schulz(s_rr_sqrt, &
    5226             :                                      almo_scf_env%opt_k_t_rr(ispin), &
    5227             :                                      tmp_vr_vr_blk, &
    5228             :                                      threshold=eps_filter, &
    5229             :                                      order=almo_scf_env%order_lanczos, &
    5230             :                                      eps_lanczos=almo_scf_env%eps_lanczos, &
    5231           0 :                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5232             : 
    5233             :       ! compute F_pp = X*sig_oo_inv*(T^tr)*F*T*sig_oo_inv*(X^tr)
    5234             :       CALL dbcsr_create(tmp1_n_vr, &
    5235           0 :                         template=almo_scf_env%matrix_v(ispin))
    5236             :       CALL dbcsr_create(tmp2_n_vr, &
    5237           0 :                         template=almo_scf_env%matrix_v(ispin))
    5238             :       CALL dbcsr_multiply("N", "N", 1.0_dp, t_curr, oo_inv_x_tr, &
    5239           0 :                           0.0_dp, tmp1_n_vr, filter_eps=eps_filter)
    5240             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5241             :                           almo_scf_env%matrix_ks_0deloc(ispin), &
    5242             :                           tmp1_n_vr, &
    5243           0 :                           0.0_dp, tmp2_n_vr, filter_eps=eps_filter)
    5244             :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp1_n_vr, tmp2_n_vr, &
    5245             :                           0.0_dp, tmp_vr_vr_blk, &
    5246           0 :                           retain_sparsity=.TRUE.)
    5247           0 :       CALL dbcsr_release(tmp1_n_vr)
    5248           0 :       CALL dbcsr_release(tmp2_n_vr)
    5249             : 
    5250             :       ! bring to the blocked-orthogonalized basis
    5251             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5252             :                           tmp_vr_vr_blk, &
    5253             :                           almo_scf_env%opt_k_t_rr(ispin), &
    5254           0 :                           0.0_dp, s_rr_sqrt, filter_eps=eps_filter)
    5255             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5256             :                           almo_scf_env%opt_k_t_rr(ispin), &
    5257             :                           s_rr_sqrt, &
    5258           0 :                           0.0_dp, tmp_vr_vr_blk, filter_eps=eps_filter)
    5259             : 
    5260             :       ! diagonalize the matrix
    5261             :       CALL dbcsr_create(opt_k_e_rr, &
    5262           0 :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin))
    5263           0 :       CALL dbcsr_release(s_rr_sqrt)
    5264             :       CALL dbcsr_create(s_rr_sqrt, &
    5265             :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5266           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5267             :       CALL diagonalize_diagonal_blocks(tmp_vr_vr_blk, &
    5268             :                                        s_rr_sqrt, &
    5269           0 :                                        opt_k_e_rr)
    5270             : 
    5271             :       ! obtain the transformation matrix in the retained subspace
    5272             :       ! T = S^{-1/2}.U
    5273             :       CALL dbcsr_copy(tmp_vr_vr_blk, &
    5274           0 :                       almo_scf_env%opt_k_t_rr(ispin))
    5275             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5276             :                           tmp_vr_vr_blk, &
    5277             :                           s_rr_sqrt, &
    5278             :                           0.0_dp, almo_scf_env%opt_k_t_rr(ispin), &
    5279           0 :                           filter_eps=eps_filter)
    5280           0 :       CALL dbcsr_release(s_rr_sqrt)
    5281           0 :       CALL dbcsr_release(tmp_vr_vr_blk)
    5282             : 
    5283             :       ! copy diagonal elements of the result into cols of a matrix
    5284             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5285             :                           tmp, opt_k_e_rr, &
    5286             :                           0.0_dp, almo_scf_env%opt_k_denom(ispin), &
    5287           0 :                           filter_eps=eps_filter)
    5288           0 :       CALL dbcsr_release(opt_k_e_rr)
    5289           0 :       CALL dbcsr_release(tmp)
    5290             : 
    5291             :       ! form the denominator matrix
    5292             :       CALL dbcsr_add(almo_scf_env%opt_k_denom(ispin), t1, &
    5293           0 :                      -1.0_dp, 1.0_dp)
    5294           0 :       CALL dbcsr_release(t1)
    5295             :       CALL dbcsr_scale(almo_scf_env%opt_k_denom(ispin), &
    5296           0 :                        2.0_dp*spin_factor)
    5297             : 
    5298           0 :       CALL inverse_of_elements(almo_scf_env%opt_k_denom(ispin))
    5299             :       CALL dbcsr_filter(almo_scf_env%opt_k_denom(ispin), &
    5300           0 :                         eps_filter)
    5301             : 
    5302           0 :       CALL timestop(handle)
    5303             : 
    5304           0 :    END SUBROUTINE opt_k_create_preconditioner_blk
    5305             : 
    5306             : ! **************************************************************************************************
    5307             : !> \brief Applies a block-diagonal preconditioner for the optimization of
    5308             : !>        k matrix (preconditioner matrices must be calculated and stored
    5309             : !>        beforehand)
    5310             : !> \param almo_scf_env ...
    5311             : !> \param step ...
    5312             : !> \param grad ...
    5313             : !> \param ispin ...
    5314             : !> \par History
    5315             : !>       2011.10 created [Rustam Z Khaliullin]
    5316             : !> \author Rustam Z Khaliullin
    5317             : ! **************************************************************************************************
    5318           0 :    SUBROUTINE opt_k_apply_preconditioner_blk(almo_scf_env, step, grad, ispin)
    5319             : 
    5320             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    5321             :       TYPE(dbcsr_type), INTENT(OUT)                      :: step
    5322             :       TYPE(dbcsr_type), INTENT(IN)                       :: grad
    5323             :       INTEGER, INTENT(IN)                                :: ispin
    5324             : 
    5325             :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_apply_preconditioner_blk'
    5326             : 
    5327             :       INTEGER                                            :: handle
    5328             :       REAL(KIND=dp)                                      :: eps_filter
    5329             :       TYPE(dbcsr_type)                                   :: tmp_k
    5330             : 
    5331           0 :       CALL timeset(routineN, handle)
    5332             : 
    5333           0 :       eps_filter = almo_scf_env%eps_filter
    5334             : 
    5335           0 :       CALL dbcsr_create(tmp_k, template=almo_scf_env%matrix_k_blk(ispin))
    5336             : 
    5337             :       ! transform gradient to the correct "diagonal" basis
    5338             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5339             :                           grad, almo_scf_env%opt_k_t_rr(ispin), &
    5340           0 :                           0.0_dp, tmp_k, filter_eps=eps_filter)
    5341             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    5342             :                           almo_scf_env%opt_k_t_dd(ispin), tmp_k, &
    5343           0 :                           0.0_dp, step, filter_eps=eps_filter)
    5344             : 
    5345             :       ! apply diagonal preconditioner
    5346             :       CALL dbcsr_hadamard_product(step, &
    5347           0 :                                   almo_scf_env%opt_k_denom(ispin), tmp_k)
    5348             : 
    5349             :       ! back-transform the result to the initial basis
    5350             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5351             :                           almo_scf_env%opt_k_t_dd(ispin), tmp_k, &
    5352           0 :                           0.0_dp, step, filter_eps=eps_filter)
    5353             :       CALL dbcsr_multiply("N", "T", 1.0_dp, &
    5354             :                           step, almo_scf_env%opt_k_t_rr(ispin), &
    5355           0 :                           0.0_dp, tmp_k, filter_eps=eps_filter)
    5356             : 
    5357           0 :       CALL dbcsr_copy(step, tmp_k)
    5358             : 
    5359           0 :       CALL dbcsr_release(tmp_k)
    5360             : 
    5361           0 :       CALL timestop(handle)
    5362             : 
    5363           0 :    END SUBROUTINE opt_k_apply_preconditioner_blk
    5364             : 
    5365             : !! **************************************************************************************************
    5366             : !!> \brief Reduce the number of virtual orbitals by rotating them within
    5367             : !!>        a domain. The rotation is such that minimizes the frobenius norm of
    5368             : !!>        the Fov domain-blocks of the discarded virtuals
    5369             : !!> \par History
    5370             : !!>       2011.08 created [Rustam Z Khaliullin]
    5371             : !!> \author Rustam Z Khaliullin
    5372             : !! **************************************************************************************************
    5373             : !  SUBROUTINE truncate_subspace_v_blk(qs_env,almo_scf_env)
    5374             : !
    5375             : !    TYPE(qs_environment_type), POINTER       :: qs_env
    5376             : !    TYPE(almo_scf_env_type)                  :: almo_scf_env
    5377             : !
    5378             : !    CHARACTER(len=*), PARAMETER :: routineN = 'truncate_subspace_v_blk', &
    5379             : !      routineP = moduleN//':'//routineN
    5380             : !
    5381             : !    INTEGER                                  :: handle, ispin, iblock_row, &
    5382             : !                                                iblock_col, iblock_row_size, &
    5383             : !                                                iblock_col_size, retained_v, &
    5384             : !                                                iteration, line_search_step, &
    5385             : !                                                unit_nr, line_search_step_last
    5386             : !    REAL(KIND=dp)                            :: t1, obj_function, grad_norm,&
    5387             : !                                                c0, b0, a0, obj_function_new,&
    5388             : !                                                t2, alpha, ff1, ff2, step1,&
    5389             : !                                                step2,&
    5390             : !                                                frob_matrix_base,&
    5391             : !                                                frob_matrix
    5392             : !    LOGICAL                                  :: safe_mode, converged, &
    5393             : !                                                prepare_to_exit, failure
    5394             : !    TYPE(cp_logger_type), POINTER            :: logger
    5395             : !    TYPE(dbcsr_type)                      :: Fon, Fov, Fov_filtered, &
    5396             : !                                                temp1_oo, temp2_oo, Fov_original, &
    5397             : !                                                temp0_ov, U_blk_tot, U_blk, &
    5398             : !                                                grad_blk, step_blk, matrix_filter, &
    5399             : !                                                v_full_new,v_full_tmp,&
    5400             : !                                                matrix_sigma_vv_full,&
    5401             : !                                                matrix_sigma_vv_full_sqrt,&
    5402             : !                                                matrix_sigma_vv_full_sqrt_inv,&
    5403             : !                                                matrix_tmp1,&
    5404             : !                                                matrix_tmp2
    5405             : !
    5406             : !    REAL(kind=dp), DIMENSION(:, :), POINTER  :: data_p, p_new_block
    5407             : !    TYPE(dbcsr_iterator_type)                  :: iter
    5408             : !
    5409             : !
    5410             : !REAL(kind=dp), DIMENSION(:), ALLOCATABLE     :: eigenvalues, WORK
    5411             : !REAL(kind=dp), DIMENSION(:,:), ALLOCATABLE   :: data_copy, left_vectors, right_vectors
    5412             : !INTEGER                                      :: LWORK, INFO
    5413             : !TYPE(dbcsr_type)                          :: temp_u_v_full_blk
    5414             : !
    5415             : !    CALL timeset(routineN,handle)
    5416             : !
    5417             : !    safe_mode=.TRUE.
    5418             : !
    5419             : !    ! get a useful output_unit
    5420             : !    logger => cp_get_default_logger()
    5421             : !    IF (logger%para_env%is_source()) THEN
    5422             : !       unit_nr=cp_logger_get_default_unit_nr(logger,local=.TRUE.)
    5423             : !    ELSE
    5424             : !       unit_nr=-1
    5425             : !    ENDIF
    5426             : !
    5427             : !    DO ispin=1,almo_scf_env%nspins
    5428             : !
    5429             : !       t1 = m_walltime()
    5430             : !
    5431             : !       !!!!!!!!!!!!!!!!!
    5432             : !       ! 0. Orthogonalize virtuals
    5433             : !       !    Unfortunately, we have to do it in the FULL V subspace :(
    5434             : !
    5435             : !       CALL dbcsr_init(v_full_new)
    5436             : !       CALL dbcsr_create(v_full_new,&
    5437             : !               template=almo_scf_env%matrix_v_full_blk(ispin),&
    5438             : !               matrix_type=dbcsr_type_no_symmetry)
    5439             : !
    5440             : !       ! project the occupied subspace out
    5441             : !       CALL almo_scf_p_out_from_v(almo_scf_env%matrix_v_full_blk(ispin),&
    5442             : !              v_full_new,almo_scf_env%matrix_ov_full(ispin),&
    5443             : !              ispin,almo_scf_env)
    5444             : !
    5445             : !       ! init overlap and its functions
    5446             : !       CALL dbcsr_init(matrix_sigma_vv_full)
    5447             : !       CALL dbcsr_init(matrix_sigma_vv_full_sqrt)
    5448             : !       CALL dbcsr_init(matrix_sigma_vv_full_sqrt_inv)
    5449             : !       CALL dbcsr_create(matrix_sigma_vv_full,&
    5450             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5451             : !               matrix_type=dbcsr_type_no_symmetry)
    5452             : !       CALL dbcsr_create(matrix_sigma_vv_full_sqrt,&
    5453             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5454             : !               matrix_type=dbcsr_type_no_symmetry)
    5455             : !       CALL dbcsr_create(matrix_sigma_vv_full_sqrt_inv,&
    5456             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5457             : !               matrix_type=dbcsr_type_no_symmetry)
    5458             : !
    5459             : !       ! construct VV overlap
    5460             : !       CALL almo_scf_mo_to_sigma(v_full_new,&
    5461             : !               matrix_sigma_vv_full,&
    5462             : !               almo_scf_env%matrix_s(1),&
    5463             : !               almo_scf_env%eps_filter)
    5464             : !
    5465             : !       IF (unit_nr>0) THEN
    5466             : !          WRITE(unit_nr,*) "sqrt and inv(sqrt) of the FULL virtual MO overlap"
    5467             : !       ENDIF
    5468             : !
    5469             : !       ! construct orthogonalization matrices
    5470             : !       CALL matrix_sqrt_Newton_Schulz(matrix_sigma_vv_full_sqrt,&
    5471             : !                                      matrix_sigma_vv_full_sqrt_inv,&
    5472             : !                                      matrix_sigma_vv_full,&
    5473             : !                                      threshold=almo_scf_env%eps_filter,&
    5474             : !                                      order=almo_scf_env%order_lanczos,&
    5475             : !                                      eps_lanczos=almo_scf_env%eps_lanczos,&
    5476             : !                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5477             : !       IF (safe_mode) THEN
    5478             : !          CALL dbcsr_init(matrix_tmp1)
    5479             : !          CALL dbcsr_create(matrix_tmp1,template=matrix_sigma_vv_full,&
    5480             : !                               matrix_type=dbcsr_type_no_symmetry)
    5481             : !          CALL dbcsr_init(matrix_tmp2)
    5482             : !          CALL dbcsr_create(matrix_tmp2,template=matrix_sigma_vv_full,&
    5483             : !                               matrix_type=dbcsr_type_no_symmetry)
    5484             : !
    5485             : !          CALL dbcsr_multiply("N","N",1.0_dp,matrix_sigma_vv_full_sqrt_inv,&
    5486             : !                                 matrix_sigma_vv_full,&
    5487             : !                                 0.0_dp,matrix_tmp1,filter_eps=almo_scf_env%eps_filter)
    5488             : !          CALL dbcsr_multiply("N","N",1.0_dp,matrix_tmp1,&
    5489             : !                                 matrix_sigma_vv_full_sqrt_inv,&
    5490             : !                                 0.0_dp,matrix_tmp2,filter_eps=almo_scf_env%eps_filter)
    5491             : !
    5492             : !          frob_matrix_base=dbcsr_frobenius_norm(matrix_tmp2)
    5493             : !          CALL dbcsr_add_on_diag(matrix_tmp2,-1.0_dp)
    5494             : !          frob_matrix=dbcsr_frobenius_norm(matrix_tmp2)
    5495             : !          IF (unit_nr>0) THEN
    5496             : !             WRITE(unit_nr,*) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)",frob_matrix/frob_matrix_base
    5497             : !          ENDIF
    5498             : !
    5499             : !          CALL dbcsr_release(matrix_tmp1)
    5500             : !          CALL dbcsr_release(matrix_tmp2)
    5501             : !       ENDIF
    5502             : !
    5503             : !       ! discard unnecessary overlap functions
    5504             : !       CALL dbcsr_release(matrix_sigma_vv_full)
    5505             : !       CALL dbcsr_release(matrix_sigma_vv_full_sqrt)
    5506             : !
    5507             : !! this can be re-written because we have (1-P)|v>
    5508             : !
    5509             : !       !!!!!!!!!!!!!!!!!!!
    5510             : !       ! 1. Compute F_ov
    5511             : !       CALL dbcsr_init(Fon)
    5512             : !       CALL dbcsr_create(Fon,&
    5513             : !               template=almo_scf_env%matrix_v_full_blk(ispin))
    5514             : !       CALL dbcsr_init(Fov)
    5515             : !       CALL dbcsr_create(Fov,&
    5516             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5517             : !       CALL dbcsr_init(Fov_filtered)
    5518             : !       CALL dbcsr_create(Fov_filtered,&
    5519             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5520             : !       CALL dbcsr_init(temp1_oo)
    5521             : !       CALL dbcsr_create(temp1_oo,&
    5522             : !               template=almo_scf_env%matrix_sigma(ispin),&
    5523             : !               !matrix_type=dbcsr_type_no_symmetry)
    5524             : !       CALL dbcsr_init(temp2_oo)
    5525             : !       CALL dbcsr_create(temp2_oo,&
    5526             : !               template=almo_scf_env%matrix_sigma(ispin),&
    5527             : !               matrix_type=dbcsr_type_no_symmetry)
    5528             : !
    5529             : !       CALL dbcsr_multiply("T","N",1.0_dp,almo_scf_env%matrix_t_blk(ispin),&
    5530             : !               almo_scf_env%matrix_ks_0deloc(ispin),&
    5531             : !               0.0_dp,Fon,filter_eps=almo_scf_env%eps_filter)
    5532             : !
    5533             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5534             : !               almo_scf_env%matrix_v_full_blk(ispin),&
    5535             : !               0.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5536             : !
    5537             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5538             : !               almo_scf_env%matrix_t_blk(ispin),&
    5539             : !               0.0_dp,temp1_oo,filter_eps=almo_scf_env%eps_filter)
    5540             : !
    5541             : !       CALL dbcsr_multiply("N","N",1.0_dp,temp1_oo,&
    5542             : !               almo_scf_env%matrix_sigma_inv(ispin),&
    5543             : !               0.0_dp,temp2_oo,filter_eps=almo_scf_env%eps_filter)
    5544             : !       CALL dbcsr_release(temp1_oo)
    5545             : !
    5546             : !       CALL dbcsr_multiply("T","N",1.0_dp,almo_scf_env%matrix_t_blk(ispin),&
    5547             : !               almo_scf_env%matrix_s(1),&
    5548             : !               0.0_dp,Fon,filter_eps=almo_scf_env%eps_filter)
    5549             : !
    5550             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5551             : !               almo_scf_env%matrix_v_full_blk(ispin),&
    5552             : !               0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5553             : !       CALL dbcsr_release(Fon)
    5554             : !
    5555             : !       CALL dbcsr_multiply("N","N",-1.0_dp,temp2_oo,&
    5556             : !               Fov_filtered,&
    5557             : !               1.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5558             : !       CALL dbcsr_release(temp2_oo)
    5559             : !
    5560             : !       CALL dbcsr_multiply("N","N",1.0_dp,almo_scf_env%matrix_sigma_inv(ispin),&
    5561             : !               Fov,0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5562             : !
    5563             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fov_filtered,&
    5564             : !               matrix_sigma_vv_full_sqrt_inv,&
    5565             : !               0.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5566             : !       !CALL dbcsr_copy(Fov,Fov_filtered)
    5567             : !CALL dbcsr_print(Fov)
    5568             : !
    5569             : !       IF (safe_mode) THEN
    5570             : !          CALL dbcsr_init(Fov_original)
    5571             : !          CALL dbcsr_create(Fov_original,template=Fov)
    5572             : !          CALL dbcsr_copy(Fov_original,Fov)
    5573             : !       ENDIF
    5574             : !
    5575             : !!! remove diagonal blocks
    5576             : !!CALL dbcsr_iterator_start(iter,Fov)
    5577             : !!DO WHILE (dbcsr_iterator_blocks_left(iter))
    5578             : !!
    5579             : !!   CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5580             : !!           row_size=iblock_row_size,col_size=iblock_col_size)
    5581             : !!
    5582             : !!   IF (iblock_row.eq.iblock_col) data_p(:,:)=0.0_dp
    5583             : !!
    5584             : !!ENDDO
    5585             : !!CALL dbcsr_iterator_stop(iter)
    5586             : !!CALL dbcsr_finalize(Fov)
    5587             : !
    5588             : !!! perform svd of blocks
    5589             : !!!!! THIS ROUTINE WORKS ONLY ON ONE CPU AND ONLY FOR 2 MOLECULES !!!
    5590             : !!CALL dbcsr_init(temp_u_v_full_blk)
    5591             : !!CALL dbcsr_create(temp_u_v_full_blk,&
    5592             : !!        template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5593             : !!        matrix_type=dbcsr_type_no_symmetry)
    5594             : !!
    5595             : !!CALL dbcsr_work_create(temp_u_v_full_blk,&
    5596             : !!        work_mutable=.TRUE.)
    5597             : !!CALL dbcsr_iterator_start(iter,Fov)
    5598             : !!DO WHILE (dbcsr_iterator_blocks_left(iter))
    5599             : !!
    5600             : !!   CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5601             : !!           row_size=iblock_row_size,col_size=iblock_col_size)
    5602             : !!
    5603             : !!   IF (iblock_row.ne.iblock_col) THEN
    5604             : !!
    5605             : !!      ! Prepare data
    5606             : !!      allocate(eigenvalues(min(iblock_row_size,iblock_col_size)))
    5607             : !!      allocate(data_copy(iblock_row_size,iblock_col_size))
    5608             : !!      allocate(left_vectors(iblock_row_size,iblock_row_size))
    5609             : !!      allocate(right_vectors(iblock_col_size,iblock_col_size))
    5610             : !!      data_copy(:,:)=data_p(:,:)
    5611             : !!
    5612             : !!      ! Query the optimal workspace for dgesvd
    5613             : !!      LWORK = -1
    5614             : !!      allocate(WORK(MAX(1,LWORK)))
    5615             : !!      CALL DGESVD('N','A',iblock_row_size,iblock_col_size,data_copy,&
    5616             : !!              iblock_row_size,eigenvalues,left_vectors,iblock_row_size,&
    5617             : !!              right_vectors,iblock_col_size,WORK,LWORK,INFO)
    5618             : !!      LWORK = INT(WORK( 1 ))
    5619             : !!      deallocate(WORK)
    5620             : !!
    5621             : !!      ! Allocate the workspace and perform svd
    5622             : !!      allocate(WORK(MAX(1,LWORK)))
    5623             : !!      CALL DGESVD('N','A',iblock_row_size,iblock_col_size,data_copy,&
    5624             : !!              iblock_row_size,eigenvalues,left_vectors,iblock_row_size,&
    5625             : !!              right_vectors,iblock_col_size,WORK,LWORK,INFO)
    5626             : !!      deallocate(WORK)
    5627             : !!      IF( INFO.NE.0 ) THEN
    5628             : !!         CPABORT("DGESVD failed")
    5629             : !!      END IF
    5630             : !!
    5631             : !!      ! copy right singular vectors into a unitary matrix
    5632             : !!      CALL dbcsr_put_block(temp_u_v_full_blk,iblock_col,iblock_col,right_vectors)
    5633             : !!
    5634             : !!      deallocate(eigenvalues)
    5635             : !!      deallocate(data_copy)
    5636             : !!      deallocate(left_vectors)
    5637             : !!      deallocate(right_vectors)
    5638             : !!
    5639             : !!   ENDIF
    5640             : !!ENDDO
    5641             : !!CALL dbcsr_iterator_stop(iter)
    5642             : !!CALL dbcsr_finalize(temp_u_v_full_blk)
    5643             : !!!CALL dbcsr_print(temp_u_v_full_blk)
    5644             : !!CALL dbcsr_multiply("N","T",1.0_dp,Fov,temp_u_v_full_blk,&
    5645             : !!        0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5646             : !!
    5647             : !!CALL dbcsr_copy(Fov,Fov_filtered)
    5648             : !!CALL dbcsr_print(Fov)
    5649             : !
    5650             : !       !!!!!!!!!!!!!!!!!!!
    5651             : !       ! 2. Initialize variables
    5652             : !
    5653             : !       ! temp space
    5654             : !       CALL dbcsr_init(temp0_ov)
    5655             : !       CALL dbcsr_create(temp0_ov,&
    5656             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5657             : !
    5658             : !       ! current unitary matrix
    5659             : !       CALL dbcsr_init(U_blk)
    5660             : !       CALL dbcsr_create(U_blk,&
    5661             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5662             : !               matrix_type=dbcsr_type_no_symmetry)
    5663             : !
    5664             : !       ! unitary matrix accumulator
    5665             : !       CALL dbcsr_init(U_blk_tot)
    5666             : !       CALL dbcsr_create(U_blk_tot,&
    5667             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5668             : !               matrix_type=dbcsr_type_no_symmetry)
    5669             : !       CALL dbcsr_add_on_diag(U_blk_tot,1.0_dp)
    5670             : !
    5671             : !!CALL dbcsr_add_on_diag(U_blk,1.0_dp)
    5672             : !!CALL dbcsr_multiply("N","T",1.0_dp,U_blk,temp_u_v_full_blk,&
    5673             : !!        0.0_dp,U_blk_tot,filter_eps=almo_scf_env%eps_filter)
    5674             : !!
    5675             : !!CALL dbcsr_release(temp_u_v_full_blk)
    5676             : !
    5677             : !       ! init gradient
    5678             : !       CALL dbcsr_init(grad_blk)
    5679             : !       CALL dbcsr_create(grad_blk,&
    5680             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5681             : !               matrix_type=dbcsr_type_no_symmetry)
    5682             : !
    5683             : !       ! init step matrix
    5684             : !       CALL dbcsr_init(step_blk)
    5685             : !       CALL dbcsr_create(step_blk,&
    5686             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5687             : !               matrix_type=dbcsr_type_no_symmetry)
    5688             : !
    5689             : !       ! "retain discarded" filter (0.0 - retain, 1.0 - discard)
    5690             : !       CALL dbcsr_init(matrix_filter)
    5691             : !       CALL dbcsr_create(matrix_filter,&
    5692             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5693             : !       ! copy Fov into the filter matrix temporarily
    5694             : !       ! so we know which blocks contain significant elements
    5695             : !       CALL dbcsr_copy(matrix_filter,Fov)
    5696             : !
    5697             : !       ! fill out filter elements block-by-block
    5698             : !       CALL dbcsr_iterator_start(iter,matrix_filter)
    5699             : !       DO WHILE (dbcsr_iterator_blocks_left(iter))
    5700             : !
    5701             : !          CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5702             : !                  row_size=iblock_row_size,col_size=iblock_col_size)
    5703             : !
    5704             : !          retained_v=almo_scf_env%nvirt_of_domain(iblock_col,ispin)
    5705             : !
    5706             : !          data_p(:,1:retained_v)=0.0_dp
    5707             : !          data_p(:,(retained_v+1):iblock_col_size)=1.0_dp
    5708             : !
    5709             : !       ENDDO
    5710             : !       CALL dbcsr_iterator_stop(iter)
    5711             : !       CALL dbcsr_finalize(matrix_filter)
    5712             : !
    5713             : !       ! apply the filter
    5714             : !       CALL dbcsr_hadamard_product(Fov,matrix_filter,Fov_filtered)
    5715             : !
    5716             : !       !!!!!!!!!!!!!!!!!!!!!
    5717             : !       ! 3. start iterative minimization of the elements to be discarded
    5718             : !       iteration=0
    5719             : !       converged=.FALSE.
    5720             : !       prepare_to_exit=.FALSE.
    5721             : !       DO
    5722             : !
    5723             : !          iteration=iteration+1
    5724             : !
    5725             : !          !!!!!!!!!!!!!!!!!!!!!!!!!
    5726             : !          ! 4. compute the gradient
    5727             : !          CALL dbcsr_set(grad_blk,0.0_dp)
    5728             : !          ! create the diagonal blocks only
    5729             : !          CALL dbcsr_add_on_diag(grad_blk,1.0_dp)
    5730             : !
    5731             : !          CALL dbcsr_multiply("T","N",2.0_dp,Fov_filtered,Fov,&
    5732             : !                  0.0_dp,grad_blk,retain_sparsity=.TRUE.,&
    5733             : !                  filter_eps=almo_scf_env%eps_filter)
    5734             : !          CALL dbcsr_multiply("T","N",-2.0_dp,Fov,Fov_filtered,&
    5735             : !                  1.0_dp,grad_blk,retain_sparsity=.TRUE.,&
    5736             : !                  filter_eps=almo_scf_env%eps_filter)
    5737             : !
    5738             : !          !!!!!!!!!!!!!!!!!!!!!!!
    5739             : !          ! 5. check convergence
    5740             : !          obj_function = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5741             : !          grad_norm = dbcsr_frobenius_norm(grad_blk)
    5742             : !          converged=(grad_norm.lt.almo_scf_env%truncate_v_eps_convergence)
    5743             : !          IF (converged.OR.(iteration.ge.almo_scf_env%truncate_v_max_iter)) THEN
    5744             : !             prepare_to_exit=.TRUE.
    5745             : !          ENDIF
    5746             : !
    5747             : !          IF (.NOT.prepare_to_exit) THEN
    5748             : !
    5749             : !             !!!!!!!!!!!!!!!!!!!!!!!
    5750             : !             ! 6. perform steps in the direction of the gradient
    5751             : !             !    a. first, perform a trial step to "see" the parameters
    5752             : !             !       of the parabola along the gradient:
    5753             : !             !       a0 * x^2 + b0 * x + c0
    5754             : !             !    b. then perform the step to the bottom of the parabola
    5755             : !
    5756             : !             ! get c0
    5757             : !             c0 = obj_function
    5758             : !             ! get b0 <= d_f/d_alpha along grad
    5759             : !             !!!CALL dbcsr_multiply("N","N",4.0_dp,Fov,grad_blk,&
    5760             : !             !!!        0.0_dp,temp0_ov,&
    5761             : !             !!!        filter_eps=almo_scf_env%eps_filter)
    5762             : !             !!!CALL dbcsr_dot(Fov_filtered,temp0_ov,b0)
    5763             : !
    5764             : !             alpha=almo_scf_env%truncate_v_trial_step_size
    5765             : !
    5766             : !             line_search_step_last=3
    5767             : !             DO line_search_step=1,line_search_step_last
    5768             : !                CALL dbcsr_copy(step_blk,grad_blk)
    5769             : !                CALL dbcsr_scale(step_blk,-1.0_dp*alpha)
    5770             : !                CALL generator_to_unitary(step_blk,U_blk,&
    5771             : !                        almo_scf_env%eps_filter)
    5772             : !                CALL dbcsr_multiply("N","N",1.0_dp,Fov,U_blk,0.0_dp,temp0_ov,&
    5773             : !                        filter_eps=almo_scf_env%eps_filter)
    5774             : !                CALL dbcsr_hadamard_product(temp0_ov,matrix_filter,&
    5775             : !                        Fov_filtered)
    5776             : !
    5777             : !                obj_function_new = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5778             : !                IF (line_search_step.eq.1) THEN
    5779             : !                   ff1 = obj_function_new
    5780             : !                   step1 = alpha
    5781             : !                ELSE IF (line_search_step.eq.2) THEN
    5782             : !                   ff2 = obj_function_new
    5783             : !                   step2 = alpha
    5784             : !                ENDIF
    5785             : !
    5786             : !                IF (unit_nr>0.AND.(line_search_step.ne.line_search_step_last)) THEN
    5787             : !                   WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.3,E12.3,E12.3,E12.3)') &
    5788             : !                         "JOINT_SVD_lin",&
    5789             : !                         iteration,&
    5790             : !                         alpha,&
    5791             : !                         obj_function,&
    5792             : !                         obj_function_new,&
    5793             : !                         obj_function_new-obj_function
    5794             : !                ENDIF
    5795             : !
    5796             : !                IF (line_search_step.eq.1) THEN
    5797             : !                   alpha=2.0_dp*alpha
    5798             : !                ENDIF
    5799             : !                IF (line_search_step.eq.2) THEN
    5800             : !                   a0 = ((ff1-c0)/step1 - (ff2-c0)/step2) / (step1 - step2)
    5801             : !                   b0 = (ff1-c0)/step1 - a0*step1
    5802             : !                   ! step size in to the bottom of "the parabola"
    5803             : !                   alpha=-b0/(2.0_dp*a0)
    5804             : !                   ! update the default step size
    5805             : !                   almo_scf_env%truncate_v_trial_step_size=alpha
    5806             : !                ENDIF
    5807             : !                !!!IF (line_search_step.eq.1) THEN
    5808             : !                !!!   a0 = (obj_function_new - b0 * alpha - c0) / (alpha*alpha)
    5809             : !                !!!   ! step size in to the bottom of "the parabola"
    5810             : !                !!!   alpha=-b0/(2.0_dp*a0)
    5811             : !                !!!   !IF (alpha.gt.10.0_dp) alpha=10.0_dp
    5812             : !                !!!ENDIF
    5813             : !
    5814             : !             ENDDO
    5815             : !
    5816             : !             ! update Fov and U_blk_tot (use grad_blk as tmp storage)
    5817             : !             CALL dbcsr_copy(Fov,temp0_ov)
    5818             : !             CALL dbcsr_multiply("N","N",1.0_dp,U_blk_tot,U_blk,&
    5819             : !                     0.0_dp,grad_blk,&
    5820             : !                     filter_eps=almo_scf_env%eps_filter)
    5821             : !             CALL dbcsr_copy(U_blk_tot,grad_blk)
    5822             : !
    5823             : !          ENDIF
    5824             : !
    5825             : !          t2 = m_walltime()
    5826             : !
    5827             : !          IF (unit_nr>0) THEN
    5828             : !             WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.3,E12.3,E12.3,E12.3,E12.3,F10.3)') &
    5829             : !                   "JOINT_SVD_itr",&
    5830             : !                   iteration,&
    5831             : !                   alpha,&
    5832             : !                   obj_function,&
    5833             : !                   obj_function_new,&
    5834             : !                   obj_function_new-obj_function,&
    5835             : !                   grad_norm,&
    5836             : !                   t2-t1
    5837             : !                   !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    5838             : !             CALL m_flush(unit_nr)
    5839             : !          ENDIF
    5840             : !
    5841             : !          t1 = m_walltime()
    5842             : !
    5843             : !          IF (prepare_to_exit) EXIT
    5844             : !
    5845             : !       ENDDO ! stop iterations
    5846             : !
    5847             : !       IF (safe_mode) THEN
    5848             : !          CALL dbcsr_multiply("N","N",1.0_dp,Fov_original,&
    5849             : !                  U_blk_tot,0.0_dp,temp0_ov,&
    5850             : !                  filter_eps=almo_scf_env%eps_filter)
    5851             : !CALL dbcsr_print(temp0_ov)
    5852             : !          CALL dbcsr_hadamard_product(temp0_ov,matrix_filter,&
    5853             : !                  Fov_filtered)
    5854             : !          obj_function_new = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5855             : !
    5856             : !          IF (unit_nr>0) THEN
    5857             : !             WRITE(unit_nr,'(T6,A,1X,E12.3)') &
    5858             : !                   "SANITY CHECK:",&
    5859             : !                   obj_function_new
    5860             : !             CALL m_flush(unit_nr)
    5861             : !          ENDIF
    5862             : !
    5863             : !          CALL dbcsr_release(Fov_original)
    5864             : !       ENDIF
    5865             : !
    5866             : !       CALL dbcsr_release(temp0_ov)
    5867             : !       CALL dbcsr_release(U_blk)
    5868             : !       CALL dbcsr_release(grad_blk)
    5869             : !       CALL dbcsr_release(step_blk)
    5870             : !       CALL dbcsr_release(matrix_filter)
    5871             : !       CALL dbcsr_release(Fov)
    5872             : !       CALL dbcsr_release(Fov_filtered)
    5873             : !
    5874             : !       ! compute rotated virtual orbitals
    5875             : !       CALL dbcsr_init(v_full_tmp)
    5876             : !       CALL dbcsr_create(v_full_tmp,&
    5877             : !               template=almo_scf_env%matrix_v_full_blk(ispin),&
    5878             : !               matrix_type=dbcsr_type_no_symmetry)
    5879             : !       CALL dbcsr_multiply("N","N",1.0_dp,&
    5880             : !               v_full_new,&
    5881             : !               matrix_sigma_vv_full_sqrt_inv,0.0_dp,v_full_tmp,&
    5882             : !               filter_eps=almo_scf_env%eps_filter)
    5883             : !       CALL dbcsr_multiply("N","N",1.0_dp,&
    5884             : !               v_full_tmp,&
    5885             : !               U_blk_tot,0.0_dp,v_full_new,&
    5886             : !               filter_eps=almo_scf_env%eps_filter)
    5887             : !
    5888             : !       CALL dbcsr_release(matrix_sigma_vv_full_sqrt_inv)
    5889             : !       CALL dbcsr_release(v_full_tmp)
    5890             : !       CALL dbcsr_release(U_blk_tot)
    5891             : !
    5892             : !!!!! orthogonalized virtuals are not blocked
    5893             : !       ! copy new virtuals into the truncated matrix
    5894             : !       !CALL dbcsr_work_create(almo_scf_env%matrix_v_blk(ispin),&
    5895             : !       CALL dbcsr_work_create(almo_scf_env%matrix_v(ispin),&
    5896             : !               work_mutable=.TRUE.)
    5897             : !       CALL dbcsr_iterator_start(iter,v_full_new)
    5898             : !       DO WHILE (dbcsr_iterator_blocks_left(iter))
    5899             : !
    5900             : !          CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5901             : !                  row_size=iblock_row_size,col_size=iblock_col_size)
    5902             : !
    5903             : !          retained_v=almo_scf_env%nvirt_of_domain(iblock_col,ispin)
    5904             : !
    5905             : !          CALL dbcsr_put_block(almo_scf_env%matrix_v(ispin), iblock_row,iblock_col,data_p(:,1:retained_v))
    5906             : !          CPASSERT(retained_v.gt.0)
    5907             : !
    5908             : !       ENDDO ! iterator
    5909             : !       CALL dbcsr_iterator_stop(iter)
    5910             : !       !!CALL dbcsr_finalize(almo_scf_env%matrix_v_blk(ispin))
    5911             : !       CALL dbcsr_finalize(almo_scf_env%matrix_v(ispin))
    5912             : !
    5913             : !       CALL dbcsr_release(v_full_new)
    5914             : !
    5915             : !    ENDDO ! ispin
    5916             : !
    5917             : !    CALL timestop(handle)
    5918             : !
    5919             : !  END SUBROUTINE truncate_subspace_v_blk
    5920             : 
    5921             : ! **************************************************************************************************
    5922             : !> \brief Compute the gradient wrt the main variable (e.g. Theta, X)
    5923             : !> \param m_grad_out ...
    5924             : !> \param m_ks ...
    5925             : !> \param m_s ...
    5926             : !> \param m_t ...
    5927             : !> \param m_t0 ...
    5928             : !> \param m_siginv ...
    5929             : !> \param m_quench_t ...
    5930             : !> \param m_FTsiginv ...
    5931             : !> \param m_siginvTFTsiginv ...
    5932             : !> \param m_ST ...
    5933             : !> \param m_STsiginv0 ...
    5934             : !> \param m_theta ...
    5935             : !> \param domain_s_inv ...
    5936             : !> \param domain_r_down ...
    5937             : !> \param cpu_of_domain ...
    5938             : !> \param domain_map ...
    5939             : !> \param assume_t0_q0x ...
    5940             : !> \param optimize_theta ...
    5941             : !> \param normalize_orbitals ...
    5942             : !> \param penalty_occ_vol ...
    5943             : !> \param penalty_occ_local ...
    5944             : !> \param penalty_occ_vol_prefactor ...
    5945             : !> \param envelope_amplitude ...
    5946             : !> \param eps_filter ...
    5947             : !> \param spin_factor ...
    5948             : !> \param special_case ...
    5949             : !> \param m_sig_sqrti_ii ...
    5950             : !> \param op_sm_set ...
    5951             : !> \param weights ...
    5952             : !> \param energy_coeff ...
    5953             : !> \param localiz_coeff ...
    5954             : !> \par History
    5955             : !>       2015.03 created [Rustam Z Khaliullin]
    5956             : !> \author Rustam Z Khaliullin
    5957             : ! **************************************************************************************************
    5958        1474 :    SUBROUTINE compute_gradient(m_grad_out, m_ks, m_s, m_t, m_t0, &
    5959             :                                m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_STsiginv0, &
    5960        1474 :                                m_theta, domain_s_inv, domain_r_down, &
    5961        1474 :                                cpu_of_domain, domain_map, assume_t0_q0x, optimize_theta, &
    5962             :                                normalize_orbitals, penalty_occ_vol, penalty_occ_local, &
    5963             :                                penalty_occ_vol_prefactor, envelope_amplitude, eps_filter, spin_factor, &
    5964        1474 :                                special_case, m_sig_sqrti_ii, op_sm_set, weights, energy_coeff, &
    5965             :                                localiz_coeff)
    5966             : 
    5967             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_grad_out, m_ks, m_s, m_t, m_t0, &
    5968             :                                                             m_siginv, m_quench_t, m_FTsiginv, &
    5969             :                                                             m_siginvTFTsiginv, m_ST, m_STsiginv0, &
    5970             :                                                             m_theta
    5971             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    5972             :          INTENT(IN)                                      :: domain_s_inv, domain_r_down
    5973             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    5974             :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    5975             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, optimize_theta, &
    5976             :                                                             normalize_orbitals, penalty_occ_vol
    5977             :       LOGICAL, INTENT(IN), OPTIONAL                      :: penalty_occ_local
    5978             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, &
    5979             :                                                             envelope_amplitude, eps_filter, &
    5980             :                                                             spin_factor
    5981             :       INTEGER, INTENT(IN)                                :: special_case
    5982             :       TYPE(dbcsr_type), INTENT(IN), OPTIONAL             :: m_sig_sqrti_ii
    5983             :       TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL, &
    5984             :          POINTER                                         :: op_sm_set
    5985             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL  :: weights
    5986             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: energy_coeff, localiz_coeff
    5987             : 
    5988             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_gradient'
    5989             : 
    5990             :       INTEGER                                            :: dim0, handle, idim0, nao, reim
    5991             :       LOGICAL                                            :: my_penalty_local
    5992             :       REAL(KIND=dp)                                      :: coeff, energy_g_norm, my_energy_coeff, &
    5993             :                                                             my_localiz_coeff, &
    5994             :                                                             penalty_occ_vol_g_norm
    5995        1474 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal
    5996             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_no_2, m_tmp_no_3, &
    5997             :                                                             m_tmp_oo_1, m_tmp_oo_2, temp1, temp2, &
    5998             :                                                             tempNOcc1, tempOccOcc1
    5999             : 
    6000        1474 :       CALL timeset(routineN, handle)
    6001             : 
    6002        1474 :       IF (normalize_orbitals .AND. (.NOT. PRESENT(m_sig_sqrti_ii))) THEN
    6003           0 :          CPABORT("Normalization matrix is required")
    6004             :       END IF
    6005             : 
    6006        1474 :       my_penalty_local = .FALSE.
    6007        1474 :       my_localiz_coeff = 1.0_dp
    6008        1474 :       my_energy_coeff = 0.0_dp
    6009        1474 :       IF (PRESENT(localiz_coeff)) THEN
    6010        1048 :          my_localiz_coeff = localiz_coeff
    6011             :       END IF
    6012        1474 :       IF (PRESENT(energy_coeff)) THEN
    6013        1048 :          my_energy_coeff = energy_coeff
    6014             :       END IF
    6015        1474 :       IF (PRESENT(penalty_occ_local)) THEN
    6016        1048 :          my_penalty_local = penalty_occ_local
    6017             :       END IF
    6018             : 
    6019             :       ! use this otherways unused variables
    6020        1474 :       CALL dbcsr_get_info(matrix=m_ks, nfullrows_total=nao)
    6021        1474 :       CALL dbcsr_get_info(matrix=m_s, nfullrows_total=nao)
    6022        1474 :       CALL dbcsr_get_info(matrix=m_t, nfullrows_total=nao)
    6023             : 
    6024             :       CALL dbcsr_create(m_tmp_no_1, &
    6025             :                         template=m_quench_t, &
    6026        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6027             :       CALL dbcsr_create(m_tmp_no_2, &
    6028             :                         template=m_quench_t, &
    6029        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6030             :       CALL dbcsr_create(m_tmp_no_3, &
    6031             :                         template=m_quench_t, &
    6032        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6033             :       CALL dbcsr_create(m_tmp_oo_1, &
    6034             :                         template=m_siginv, &
    6035        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6036             :       CALL dbcsr_create(m_tmp_oo_2, &
    6037             :                         template=m_siginv, &
    6038        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6039             :       CALL dbcsr_create(tempNOcc1, &
    6040             :                         template=m_t, &
    6041        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6042             :       CALL dbcsr_create(tempOccOcc1, &
    6043             :                         template=m_siginv, &
    6044        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6045             :       CALL dbcsr_create(temp1, &
    6046             :                         template=m_t, &
    6047        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6048             :       CALL dbcsr_create(temp2, &
    6049             :                         template=m_t, &
    6050        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6051             : 
    6052             :       ! do d_E/d_T first
    6053             :       !IF (.NOT.PRESENT(m_FTsiginv)) THEN
    6054             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6055             :       !           m_ks,&
    6056             :       !           m_t,&
    6057             :       !           0.0_dp,m_tmp_no_1,&
    6058             :       !           filter_eps=eps_filter)
    6059             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6060             :       !           m_tmp_no_1,&
    6061             :       !           m_siginv,&
    6062             :       !           0.0_dp,m_FTsiginv,&
    6063             :       !           filter_eps=eps_filter)
    6064             :       !ENDIF
    6065             : 
    6066        1474 :       CALL dbcsr_copy(m_tmp_no_2, m_quench_t)
    6067        1474 :       CALL dbcsr_copy(m_tmp_no_2, m_FTsiginv, keep_sparsity=.TRUE.)
    6068             : 
    6069             :       !IF (.NOT.PRESENT(m_siginvTFTsiginv)) THEN
    6070             :       !   CALL dbcsr_multiply("T","N",1.0_dp,&
    6071             :       !           m_t,&
    6072             :       !           m_FTsiginv,&
    6073             :       !           0.0_dp,m_tmp_oo_1,&
    6074             :       !           filter_eps=eps_filter)
    6075             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6076             :       !           m_siginv,&
    6077             :       !           m_tmp_oo_1,&
    6078             :       !           0.0_dp,m_siginvTFTsiginv,&
    6079             :       !           filter_eps=eps_filter)
    6080             :       !ENDIF
    6081             : 
    6082             :       !IF (.NOT.PRESENT(m_ST)) THEN
    6083             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6084             :       !           m_s,&
    6085             :       !           m_t,&
    6086             :       !           0.0_dp,m_ST,&
    6087             :       !           filter_eps=eps_filter)
    6088             :       !ENDIF
    6089             : 
    6090             :       CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6091             :                           m_ST, &
    6092             :                           m_siginvTFTsiginv, &
    6093             :                           1.0_dp, m_tmp_no_2, &
    6094        1474 :                           retain_sparsity=.TRUE.)
    6095        1474 :       CALL dbcsr_scale(m_tmp_no_2, 2.0_dp*spin_factor)
    6096             : 
    6097             :       ! LzL Add gradient for Localization
    6098        1474 :       IF (my_penalty_local) THEN
    6099             : 
    6100           0 :          CALL dbcsr_set(temp2, 0.0_dp) ! accumulate the localization gradient here
    6101             : 
    6102           0 :          DO idim0 = 1, SIZE(op_sm_set, 2) ! this loop is over miller ind
    6103             : 
    6104           0 :             DO reim = 1, SIZE(op_sm_set, 1) ! this loop is over Re/Im
    6105             : 
    6106             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6107             :                                    op_sm_set(reim, idim0)%matrix, &
    6108             :                                    m_t, &
    6109             :                                    0.0_dp, tempNOcc1, &
    6110           0 :                                    filter_eps=eps_filter)
    6111             : 
    6112             :                ! warning - save time by computing only the diagonal elements
    6113             :                CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6114             :                                    m_t, &
    6115             :                                    tempNOcc1, &
    6116             :                                    0.0_dp, tempOccOcc1, &
    6117           0 :                                    filter_eps=eps_filter)
    6118             : 
    6119           0 :                CALL dbcsr_get_info(tempOccOcc1, nfullrows_total=dim0)
    6120           0 :                ALLOCATE (tg_diagonal(dim0))
    6121           0 :                CALL dbcsr_get_diag(tempOccOcc1, tg_diagonal)
    6122           0 :                CALL dbcsr_set(tempOccOcc1, 0.0_dp)
    6123           0 :                CALL dbcsr_set_diag(tempOccOcc1, tg_diagonal)
    6124           0 :                DEALLOCATE (tg_diagonal)
    6125             : 
    6126             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6127             :                                    tempNOcc1, &
    6128             :                                    tempOccOcc1, &
    6129             :                                    0.0_dp, temp1, &
    6130           0 :                                    filter_eps=eps_filter)
    6131             : 
    6132             :             END DO
    6133             : 
    6134             :             SELECT CASE (2) ! allows for selection of different spread functionals
    6135             :             CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6136           0 :                CPABORT("Localization function is not implemented")
    6137             :                !coeff = -(weights(idim0)/z2(ielem))
    6138             :             CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6139           0 :                coeff = -weights(idim0)
    6140             :             CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6141             :                CPABORT("Localization function is not implemented")
    6142             :                !coeff = -(weights(idim0)/(2.0_dp*z2(ielem)))
    6143             :             END SELECT
    6144           0 :             CALL dbcsr_add(temp2, temp1, 1.0_dp, coeff)
    6145             :             !CALL dbcsr_add(grad_loc, temp1, 1.0_dp, 1.0_dp)
    6146             : 
    6147             :          END DO ! end loop over idim0
    6148           0 :          CALL dbcsr_add(m_tmp_no_2, temp2, my_energy_coeff, my_localiz_coeff*4.0_dp)
    6149             :       END IF
    6150             : 
    6151             :       ! add penalty on the occupied volume: det(sigma)
    6152        1474 :       IF (penalty_occ_vol) THEN
    6153             :          !RZK-warning CALL dbcsr_multiply("N","N",&
    6154             :          !RZK-warning         penalty_occ_vol_prefactor,&
    6155             :          !RZK-warning         m_ST,&
    6156             :          !RZK-warning         m_siginv,&
    6157             :          !RZK-warning         1.0_dp,m_tmp_no_2,&
    6158             :          !RZK-warning         retain_sparsity=.TRUE.,&
    6159             :          !RZK-warning         )
    6160           0 :          CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6161             :          CALL dbcsr_multiply("N", "N", &
    6162             :                              penalty_occ_vol_prefactor, &
    6163             :                              m_ST, &
    6164             :                              m_siginv, &
    6165             :                              0.0_dp, m_tmp_no_1, &
    6166           0 :                              retain_sparsity=.TRUE.)
    6167             :          ! this norm does not contain the normalization factors
    6168           0 :          penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_1)
    6169           0 :          energy_g_norm = dbcsr_maxabs(m_tmp_no_2)
    6170             :          !WRITE (*, "(A30,2F20.10)") "Energy/penalty g norms (no norm): ", energy_g_norm, penalty_occ_vol_g_norm
    6171           0 :          CALL dbcsr_add(m_tmp_no_2, m_tmp_no_1, 1.0_dp, 1.0_dp)
    6172             :       END IF
    6173             : 
    6174             :       ! take into account the factor from the normalization constraint
    6175        1474 :       IF (normalize_orbitals) THEN
    6176             : 
    6177             :          ! G = ( G - ST.[tr(T).G]_ii ) . [sig_sqrti]_ii
    6178             :          ! this expression can be simplified to
    6179             :          ! G = ( G - c0*ST ) . [sig_sqrti]_ii
    6180             :          ! where c0 = penalty_occ_vol_prefactor
    6181             :          ! This is because tr(T).G_Energy = 0 and
    6182             :          !                 tr(T).G_Penalty = c0*I
    6183             : 
    6184             :          !! faster way to take the norm into account (tested for vol penalty olny)
    6185             :          !!CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6186             :          !!CALL dbcsr_copy(m_tmp_no_1, m_ST, keep_sparsity=.TRUE.)
    6187             :          !!CALL dbcsr_add(m_tmp_no_2, m_tmp_no_1, 1.0_dp, -penalty_occ_vol_prefactor)
    6188             :          !!CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6189             :          !!CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6190             :          !!                    m_tmp_no_2, &
    6191             :          !!                    m_sig_sqrti_ii, &
    6192             :          !!                    0.0_dp, m_tmp_no_1, &
    6193             :          !!                    retain_sparsity=.TRUE.)
    6194             : 
    6195             :          ! slower way of taking the norm into account
    6196           0 :          CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6197             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6198             :                              m_tmp_no_2, &
    6199             :                              m_sig_sqrti_ii, &
    6200             :                              0.0_dp, m_tmp_no_1, &
    6201           0 :                              retain_sparsity=.TRUE.)
    6202             : 
    6203             :          ! get [tr(T).G]_ii
    6204           0 :          CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii)
    6205             :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6206             :                              m_t, &
    6207             :                              m_tmp_no_2, &
    6208             :                              0.0_dp, m_tmp_oo_1, &
    6209           0 :                              retain_sparsity=.TRUE.)
    6210             : 
    6211           0 :          CALL dbcsr_get_info(m_sig_sqrti_ii, nfullrows_total=dim0)
    6212           0 :          ALLOCATE (tg_diagonal(dim0))
    6213           0 :          CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    6214           0 :          CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    6215           0 :          CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    6216           0 :          DEALLOCATE (tg_diagonal)
    6217             : 
    6218             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6219             :                              m_sig_sqrti_ii, &
    6220             :                              m_tmp_oo_1, &
    6221             :                              0.0_dp, m_tmp_oo_2, &
    6222           0 :                              filter_eps=eps_filter)
    6223             :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6224             :                              m_ST, &
    6225             :                              m_tmp_oo_2, &
    6226             :                              1.0_dp, m_tmp_no_1, &
    6227           0 :                              retain_sparsity=.TRUE.)
    6228             : 
    6229             :       ELSE
    6230             : 
    6231        1474 :          CALL dbcsr_copy(m_tmp_no_1, m_tmp_no_2)
    6232             : 
    6233             :       END IF ! normalize_orbitals
    6234             : 
    6235             :       ! project out the occupied space from the gradient
    6236        1474 :       IF (assume_t0_q0x) THEN
    6237         466 :          IF (special_case .EQ. xalmo_case_fully_deloc) THEN
    6238         160 :             CALL dbcsr_copy(m_grad_out, m_tmp_no_1)
    6239             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6240             :                                 m_t0, &
    6241             :                                 m_grad_out, &
    6242             :                                 0.0_dp, m_tmp_oo_1, &
    6243         160 :                                 filter_eps=eps_filter)
    6244             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6245             :                                 m_STsiginv0, &
    6246             :                                 m_tmp_oo_1, &
    6247             :                                 1.0_dp, m_grad_out, &
    6248         160 :                                 filter_eps=eps_filter)
    6249         306 :          ELSE IF (special_case .EQ. xalmo_case_block_diag) THEN
    6250           0 :             CPABORT("Cannot project the zero-order space from itself")
    6251             :          ELSE
    6252             :             ! no special case: normal xALMOs
    6253             :             CALL apply_domain_operators( &
    6254             :                matrix_in=m_tmp_no_1, &
    6255             :                matrix_out=m_grad_out, &
    6256             :                operator2=domain_r_down(:), &
    6257             :                operator1=domain_s_inv(:), &
    6258             :                dpattern=m_quench_t, &
    6259             :                map=domain_map, &
    6260             :                node_of_domain=cpu_of_domain, &
    6261             :                my_action=1, &
    6262             :                filter_eps=eps_filter, &
    6263             :                !matrix_trimmer=,&
    6264         306 :                use_trimmer=.FALSE.)
    6265             :          END IF ! my_special_case
    6266         466 :          CALL dbcsr_copy(m_tmp_no_1, m_grad_out)
    6267             :       END IF
    6268             : 
    6269             :       !! check whether the gradient lies entirely in R or Q
    6270             :       !CALL dbcsr_multiply("T","N",1.0_dp,&
    6271             :       !        m_t,&
    6272             :       !        m_tmp_no_1,&
    6273             :       !        0.0_dp,m_tmp_oo_1,&
    6274             :       !        filter_eps=eps_filter,&
    6275             :       !        )
    6276             :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    6277             :       !        m_siginv,&
    6278             :       !        m_tmp_oo_1,&
    6279             :       !        0.0_dp,m_tmp_oo_2,&
    6280             :       !        filter_eps=eps_filter,&
    6281             :       !        )
    6282             :       !CALL dbcsr_copy(m_tmp_no_2,m_tmp_no_1)
    6283             :       !CALL dbcsr_multiply("N","N",-1.0_dp,&
    6284             :       !        m_ST,&
    6285             :       !        m_tmp_oo_2,&
    6286             :       !        1.0_dp,m_tmp_no_2,&
    6287             :       !        retain_sparsity=.TRUE.,&
    6288             :       !        )
    6289             :       !penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_2)
    6290             :       !WRITE(*,"(A50,2F20.10)") "Virtual-space projection of the gradient", penalty_occ_vol_g_norm
    6291             :       !CALL dbcsr_add(m_tmp_no_2,m_tmp_no_1,1.0_dp,-1.0_dp)
    6292             :       !penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_2)
    6293             :       !WRITE(*,"(A50,2F20.10)") "Occupied-space projection of the gradient", penalty_occ_vol_g_norm
    6294             :       !penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_1)
    6295             :       !WRITE(*,"(A50,2F20.10)") "Full gradient", penalty_occ_vol_g_norm
    6296             : 
    6297             :       ! transform d_E/d_T to d_E/d_theta
    6298        1474 :       IF (optimize_theta) THEN
    6299           0 :          CALL dbcsr_copy(m_tmp_no_2, m_theta)
    6300           0 :          CALL dtanh_of_elements(m_tmp_no_2, alpha=1.0_dp/envelope_amplitude)
    6301           0 :          CALL dbcsr_scale(m_tmp_no_2, envelope_amplitude)
    6302           0 :          CALL dbcsr_set(m_tmp_no_3, 0.0_dp)
    6303           0 :          CALL dbcsr_filter(m_tmp_no_3, eps_filter)
    6304             :          CALL dbcsr_hadamard_product(m_tmp_no_1, &
    6305             :                                      m_tmp_no_2, &
    6306           0 :                                      m_tmp_no_3)
    6307             :          CALL dbcsr_hadamard_product(m_tmp_no_3, &
    6308             :                                      m_quench_t, &
    6309           0 :                                      m_grad_out)
    6310             :       ELSE ! simply copy
    6311             :          CALL dbcsr_hadamard_product(m_tmp_no_1, &
    6312             :                                      m_quench_t, &
    6313        1474 :                                      m_grad_out)
    6314             :       END IF
    6315        1474 :       CALL dbcsr_filter(m_grad_out, eps_filter)
    6316             : 
    6317        1474 :       CALL dbcsr_release(m_tmp_no_1)
    6318        1474 :       CALL dbcsr_release(m_tmp_no_2)
    6319        1474 :       CALL dbcsr_release(m_tmp_no_3)
    6320        1474 :       CALL dbcsr_release(m_tmp_oo_1)
    6321        1474 :       CALL dbcsr_release(m_tmp_oo_2)
    6322        1474 :       CALL dbcsr_release(tempNOcc1)
    6323        1474 :       CALL dbcsr_release(tempOccOcc1)
    6324        1474 :       CALL dbcsr_release(temp1)
    6325        1474 :       CALL dbcsr_release(temp2)
    6326             : 
    6327        1474 :       CALL timestop(handle)
    6328             : 
    6329        2948 :    END SUBROUTINE compute_gradient
    6330             : 
    6331             : ! **************************************************************************************************
    6332             : !> \brief Serial code that prints matrices readable by Mathematica
    6333             : !> \param matrix - matrix to print
    6334             : !> \param filename ...
    6335             : !> \par History
    6336             : !>       2015.05 created [Rustam Z. Khaliullin]
    6337             : !> \author Rustam Z. Khaliullin
    6338             : ! **************************************************************************************************
    6339           0 :    SUBROUTINE print_mathematica_matrix(matrix, filename)
    6340             : 
    6341             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix
    6342             :       CHARACTER(len=*), INTENT(IN)                       :: filename
    6343             : 
    6344             :       CHARACTER(len=*), PARAMETER :: routineN = 'print_mathematica_matrix'
    6345             : 
    6346             :       CHARACTER(LEN=20)                                  :: formatstr, Scols
    6347             :       INTEGER                                            :: col, fiunit, handle, hori_offset, jj, &
    6348             :                                                             nblkcols_tot, nblkrows_tot, Ncols, &
    6349             :                                                             ncores, Nrows, row, unit_nr, &
    6350             :                                                             vert_offset
    6351           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ao_block_sizes, mo_block_sizes
    6352           0 :       INTEGER, DIMENSION(:), POINTER                     :: ao_blk_sizes, mo_blk_sizes
    6353             :       LOGICAL                                            :: found
    6354           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: H
    6355           0 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
    6356             :       TYPE(cp_logger_type), POINTER                      :: logger
    6357             :       TYPE(dbcsr_distribution_type)                      :: dist
    6358             :       TYPE(dbcsr_type)                                   :: matrix_asym
    6359             : 
    6360           0 :       CALL timeset(routineN, handle)
    6361             : 
    6362             :       ! get a useful output_unit
    6363           0 :       logger => cp_get_default_logger()
    6364           0 :       IF (logger%para_env%is_source()) THEN
    6365           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    6366             :       ELSE
    6367             :          unit_nr = -1
    6368             :       END IF
    6369             : 
    6370             :       ! serial code only
    6371           0 :       CALL dbcsr_get_info(matrix, distribution=dist)
    6372           0 :       CALL dbcsr_distribution_get(dist, numnodes=ncores)
    6373           0 :       IF (ncores .GT. 1) THEN
    6374           0 :          CPABORT("mathematica files: serial code only")
    6375             :       END IF
    6376             : 
    6377             :       CALL dbcsr_get_info(matrix, row_blk_size=ao_blk_sizes, col_blk_size=mo_blk_sizes, &
    6378           0 :                           nblkrows_total=nblkrows_tot, nblkcols_total=nblkcols_tot)
    6379           0 :       CPASSERT(nblkrows_tot == nblkcols_tot)
    6380           0 :       ALLOCATE (mo_block_sizes(nblkcols_tot), ao_block_sizes(nblkcols_tot))
    6381           0 :       mo_block_sizes(:) = mo_blk_sizes(:)
    6382           0 :       ao_block_sizes(:) = ao_blk_sizes(:)
    6383             : 
    6384             :       CALL dbcsr_create(matrix_asym, &
    6385             :                         template=matrix, &
    6386           0 :                         matrix_type=dbcsr_type_no_symmetry)
    6387           0 :       CALL dbcsr_desymmetrize(matrix, matrix_asym)
    6388             : 
    6389           0 :       Ncols = SUM(mo_block_sizes)
    6390           0 :       Nrows = SUM(ao_block_sizes)
    6391           0 :       ALLOCATE (H(Nrows, Ncols))
    6392           0 :       H(:, :) = 0.0_dp
    6393             : 
    6394           0 :       hori_offset = 0
    6395           0 :       DO col = 1, nblkcols_tot
    6396             : 
    6397           0 :          vert_offset = 0
    6398           0 :          DO row = 1, nblkrows_tot
    6399             : 
    6400           0 :             CALL dbcsr_get_block_p(matrix_asym, row, col, block_p, found)
    6401           0 :             IF (found) THEN
    6402             : 
    6403             :                H(vert_offset + 1:vert_offset + ao_block_sizes(row), &
    6404             :                  hori_offset + 1:hori_offset + mo_block_sizes(col)) &
    6405           0 :                   = block_p(:, :)
    6406             : 
    6407             :             END IF
    6408             : 
    6409           0 :             vert_offset = vert_offset + ao_block_sizes(row)
    6410             : 
    6411             :          END DO
    6412             : 
    6413           0 :          hori_offset = hori_offset + mo_block_sizes(col)
    6414             : 
    6415             :       END DO ! loop over electron blocks
    6416             : 
    6417           0 :       CALL dbcsr_release(matrix_asym)
    6418             : 
    6419           0 :       IF (unit_nr > 0) THEN
    6420           0 :          CALL open_file(filename, unit_number=fiunit, file_status='REPLACE')
    6421           0 :          WRITE (Scols, "(I10)") Ncols
    6422           0 :          formatstr = "("//TRIM(Scols)//"E27.17)"
    6423           0 :          DO jj = 1, Nrows
    6424           0 :             WRITE (fiunit, formatstr) H(jj, :)
    6425             :          END DO
    6426           0 :          CALL close_file(fiunit)
    6427             :       END IF
    6428             : 
    6429           0 :       DEALLOCATE (mo_block_sizes)
    6430           0 :       DEALLOCATE (ao_block_sizes)
    6431           0 :       DEALLOCATE (H)
    6432             : 
    6433           0 :       CALL timestop(handle)
    6434             : 
    6435           0 :    END SUBROUTINE print_mathematica_matrix
    6436             : 
    6437             : ! **************************************************************************************************
    6438             : !> \brief Compute the objective functional of NLMOs
    6439             : !> \param localization_obj_function_ispin ...
    6440             : !> \param penalty_func_ispin ...
    6441             : !> \param penalty_vol_prefactor ...
    6442             : !> \param overlap_determinant ...
    6443             : !> \param m_sigma ...
    6444             : !> \param nocc ...
    6445             : !> \param m_B0 ...
    6446             : !> \param m_theta_normalized ...
    6447             : !> \param template_matrix_mo ...
    6448             : !> \param weights ...
    6449             : !> \param m_S0 ...
    6450             : !> \param just_started ...
    6451             : !> \param penalty_amplitude ...
    6452             : !> \param eps_filter ...
    6453             : !> \par History
    6454             : !>       2020.01 created [Ziling Luo]
    6455             : !> \author Ziling Luo
    6456             : ! **************************************************************************************************
    6457          82 :    SUBROUTINE compute_obj_nlmos(localization_obj_function_ispin, penalty_func_ispin, &
    6458          82 :                                 penalty_vol_prefactor, overlap_determinant, m_sigma, nocc, m_B0, &
    6459          82 :                                 m_theta_normalized, template_matrix_mo, weights, m_S0, just_started, &
    6460             :                                 penalty_amplitude, eps_filter)
    6461             : 
    6462             :       REAL(KIND=dp), INTENT(INOUT) :: localization_obj_function_ispin, penalty_func_ispin, &
    6463             :          penalty_vol_prefactor, overlap_determinant
    6464             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_sigma
    6465             :       INTEGER, INTENT(IN)                                :: nocc
    6466             :       TYPE(dbcsr_type), DIMENSION(:, :), INTENT(IN)      :: m_B0
    6467             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_theta_normalized, template_matrix_mo
    6468             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: weights
    6469             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_S0
    6470             :       LOGICAL, INTENT(IN)                                :: just_started
    6471             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_amplitude, eps_filter
    6472             : 
    6473             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_obj_nlmos'
    6474             : 
    6475             :       INTEGER                                            :: handle, idim0, ielem, reim
    6476             :       REAL(KIND=dp)                                      :: det1, fval
    6477          82 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: reim_diag, z2
    6478             :       TYPE(dbcsr_type)                                   :: tempNOcc1, tempOccOcc1, tempOccOcc2
    6479             :       TYPE(mp_comm_type)                                 :: group
    6480             : 
    6481          82 :       CALL timeset(routineN, handle)
    6482             : 
    6483             :       CALL dbcsr_create(tempNOcc1, &
    6484             :                         template=template_matrix_mo, &
    6485          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6486             :       CALL dbcsr_create(tempOccOcc1, &
    6487             :                         template=m_theta_normalized, &
    6488          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6489             :       CALL dbcsr_create(tempOccOcc2, &
    6490             :                         template=m_theta_normalized, &
    6491          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6492             : 
    6493          82 :       localization_obj_function_ispin = 0.0_dp
    6494          82 :       penalty_func_ispin = 0.0_dp
    6495         246 :       ALLOCATE (z2(nocc))
    6496         164 :       ALLOCATE (reim_diag(nocc))
    6497             : 
    6498          82 :       CALL dbcsr_get_info(tempOccOcc2, group=group)
    6499             : 
    6500         842 :       DO idim0 = 1, SIZE(m_B0, 2) ! this loop is over miller ind
    6501             : 
    6502       12608 :          z2(:) = 0.0_dp
    6503             : 
    6504        1520 :          DO reim = 1, SIZE(m_B0, 1) ! this loop is over Re/Im
    6505             : 
    6506             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6507             :                                 m_B0(reim, idim0), &
    6508             :                                 m_theta_normalized, &
    6509             :                                 0.0_dp, tempOccOcc1, &
    6510         760 :                                 filter_eps=eps_filter)
    6511         760 :             CALL dbcsr_set(tempOccOcc2, 0.0_dp)
    6512         760 :             CALL dbcsr_add_on_diag(tempOccOcc2, 1.0_dp)
    6513             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6514             :                                 m_theta_normalized, &
    6515             :                                 tempOccOcc1, &
    6516             :                                 0.0_dp, tempOccOcc2, &
    6517         760 :                                 retain_sparsity=.TRUE.)
    6518             : 
    6519       12608 :             reim_diag = 0.0_dp
    6520         760 :             CALL dbcsr_get_diag(tempOccOcc2, reim_diag)
    6521         760 :             CALL group%sum(reim_diag)
    6522       13368 :             z2(:) = z2(:) + reim_diag(:)*reim_diag(:)
    6523             : 
    6524             :          END DO
    6525             : 
    6526       12690 :          DO ielem = 1, nocc
    6527             :             SELECT CASE (2) ! allows for selection of different spread functionals
    6528             :             CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6529       11848 :                fval = -weights(idim0)*LOG(ABS(z2(ielem)))
    6530             :             CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6531       11848 :                fval = weights(idim0) - weights(idim0)*ABS(z2(ielem))
    6532             :             CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6533             :                fval = weights(idim0) - weights(idim0)*SQRT(ABS(z2(ielem)))
    6534             :             END SELECT
    6535       12608 :             localization_obj_function_ispin = localization_obj_function_ispin + fval
    6536             :          END DO
    6537             : 
    6538             :       END DO ! end loop over idim0
    6539             : 
    6540          82 :       DEALLOCATE (z2)
    6541          82 :       DEALLOCATE (reim_diag)
    6542             : 
    6543             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6544             :                           m_S0, &
    6545             :                           m_theta_normalized, &
    6546             :                           0.0_dp, tempOccOcc1, &
    6547          82 :                           filter_eps=eps_filter)
    6548             :       ! compute current sigma
    6549             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6550             :                           m_theta_normalized, &
    6551             :                           tempOccOcc1, &
    6552             :                           0.0_dp, m_sigma, &
    6553          82 :                           filter_eps=eps_filter)
    6554             : 
    6555             :       CALL determinant(m_sigma, det1, &
    6556          82 :                        eps_filter)
    6557             :       ! save the current determinant
    6558          82 :       overlap_determinant = det1
    6559             : 
    6560          82 :       IF (just_started .AND. penalty_amplitude .LT. 0.0_dp) THEN
    6561           4 :          penalty_vol_prefactor = -(-penalty_amplitude)*localization_obj_function_ispin
    6562             :       END IF
    6563          82 :       penalty_func_ispin = penalty_func_ispin + penalty_vol_prefactor*LOG(det1)
    6564             : 
    6565          82 :       CALL dbcsr_release(tempNOcc1)
    6566          82 :       CALL dbcsr_release(tempOccOcc1)
    6567          82 :       CALL dbcsr_release(tempOccOcc2)
    6568             : 
    6569          82 :       CALL timestop(handle)
    6570             : 
    6571         164 :    END SUBROUTINE compute_obj_nlmos
    6572             : 
    6573             : ! **************************************************************************************************
    6574             : !> \brief Compute the gradient wrt the main variable
    6575             : !> \param m_grad_out ...
    6576             : !> \param m_B0 ...
    6577             : !> \param weights ...
    6578             : !> \param m_S0 ...
    6579             : !> \param m_theta_normalized ...
    6580             : !> \param m_siginv ...
    6581             : !> \param m_sig_sqrti_ii ...
    6582             : !> \param penalty_vol_prefactor ...
    6583             : !> \param eps_filter ...
    6584             : !> \param suggested_vol_penalty ...
    6585             : !> \par History
    6586             : !>       2018.10 created [Ziling Luo]
    6587             : !> \author Ziling Luo
    6588             : ! **************************************************************************************************
    6589          82 :    SUBROUTINE compute_gradient_nlmos(m_grad_out, m_B0, weights, &
    6590             :                                      m_S0, m_theta_normalized, m_siginv, m_sig_sqrti_ii, &
    6591             :                                      penalty_vol_prefactor, eps_filter, suggested_vol_penalty)
    6592             : 
    6593             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_grad_out
    6594             :       TYPE(dbcsr_type), DIMENSION(:, :), INTENT(IN)      :: m_B0
    6595             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: weights
    6596             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_S0, m_theta_normalized, m_siginv, &
    6597             :                                                             m_sig_sqrti_ii
    6598             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_vol_prefactor, eps_filter
    6599             :       REAL(KIND=dp), INTENT(INOUT)                       :: suggested_vol_penalty
    6600             : 
    6601             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_gradient_nlmos'
    6602             : 
    6603             :       INTEGER                                            :: dim0, handle, idim0, reim
    6604             :       REAL(KIND=dp)                                      :: norm_loc, norm_vol
    6605             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal, z2
    6606             :       TYPE(dbcsr_type)                                   :: m_temp_oo_1, m_temp_oo_2, m_temp_oo_3, &
    6607             :                                                             m_temp_oo_4
    6608             : 
    6609          82 :       CALL timeset(routineN, handle)
    6610             : 
    6611             :       CALL dbcsr_create(m_temp_oo_1, &
    6612             :                         template=m_theta_normalized, &
    6613          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6614             :       CALL dbcsr_create(m_temp_oo_2, &
    6615             :                         template=m_theta_normalized, &
    6616          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6617             :       CALL dbcsr_create(m_temp_oo_3, &
    6618             :                         template=m_theta_normalized, &
    6619          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6620             :       CALL dbcsr_create(m_temp_oo_4, &
    6621             :                         template=m_theta_normalized, &
    6622          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6623             : 
    6624          82 :       CALL dbcsr_get_info(m_siginv, nfullrows_total=dim0)
    6625         246 :       ALLOCATE (tg_diagonal(dim0))
    6626         164 :       ALLOCATE (z2(dim0))
    6627          82 :       CALL dbcsr_set(m_temp_oo_1, 0.0_dp) ! accumulate the gradient wrt a_norm here
    6628             : 
    6629             :       ! do d_Omega/d_a_normalized first
    6630         842 :       DO idim0 = 1, SIZE(m_B0, 2) ! this loop is over miller ind
    6631             : 
    6632       12608 :          z2(:) = 0.0_dp
    6633         760 :          CALL dbcsr_set(m_temp_oo_2, 0.0_dp) ! accumulate index gradient here
    6634        1520 :          DO reim = 1, SIZE(m_B0, 1) ! this loop is over Re/Im
    6635             : 
    6636             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6637             :                                 m_B0(reim, idim0), &
    6638             :                                 m_theta_normalized, &
    6639             :                                 0.0_dp, m_temp_oo_3, &
    6640         760 :                                 filter_eps=eps_filter)
    6641             : 
    6642             :             ! result contain Re/Im part of Z for the current Miller index
    6643             :             ! warning - save time by computing only the diagonal elements
    6644             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6645             :                                 m_theta_normalized, &
    6646             :                                 m_temp_oo_3, &
    6647             :                                 0.0_dp, m_temp_oo_4, &
    6648         760 :                                 filter_eps=eps_filter)
    6649             : 
    6650       12608 :             tg_diagonal(:) = 0.0_dp
    6651         760 :             CALL dbcsr_get_diag(m_temp_oo_4, tg_diagonal)
    6652         760 :             CALL dbcsr_set(m_temp_oo_4, 0.0_dp)
    6653         760 :             CALL dbcsr_set_diag(m_temp_oo_4, tg_diagonal)
    6654             :             !CALL para_group%sum(tg_diagonal)
    6655       12608 :             z2(:) = z2(:) + tg_diagonal(:)*tg_diagonal(:)
    6656             : 
    6657             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6658             :                                 m_temp_oo_3, &
    6659             :                                 m_temp_oo_4, &
    6660             :                                 1.0_dp, m_temp_oo_2, &
    6661        1520 :                                 filter_eps=eps_filter)
    6662             : 
    6663             :          END DO
    6664             : 
    6665             :          ! TODO: because some elements are zeros on some MPI tasks the
    6666             :          ! gradient evaluation will fail for CASE 1 and 3
    6667             :          SELECT CASE (2) ! allows for selection of different spread functionals
    6668             :          CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6669             :             z2(:) = -weights(idim0)/z2(:)
    6670             :          CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6671       12608 :             z2(:) = -weights(idim0)
    6672             :          CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6673             :             z2(:) = -weights(idim0)/(2*SQRT(z2(:)))
    6674             :          END SELECT
    6675         760 :          CALL dbcsr_set(m_temp_oo_3, 0.0_dp)
    6676         760 :          CALL dbcsr_set_diag(m_temp_oo_3, z2)
    6677             :          ! TODO: print this matrix to make sure its block structure is fine
    6678             :          ! and there are no unecessary elements
    6679             : 
    6680             :          CALL dbcsr_multiply("N", "N", 4.0_dp, &
    6681             :                              m_temp_oo_2, &
    6682             :                              m_temp_oo_3, &
    6683             :                              1.0_dp, m_temp_oo_1, &
    6684         842 :                              filter_eps=eps_filter)
    6685             : 
    6686             :       END DO ! end loop over idim0
    6687          82 :       DEALLOCATE (z2)
    6688             : 
    6689             :       ! sigma0.a_norm is necessary for the volume penalty and normalization
    6690             :       CALL dbcsr_multiply("N", "N", &
    6691             :                           1.0_dp, &
    6692             :                           m_S0, &
    6693             :                           m_theta_normalized, &
    6694             :                           0.0_dp, m_temp_oo_2, &
    6695          82 :                           filter_eps=eps_filter)
    6696             : 
    6697             :       ! add gradient of the penalty functional log[det(sigma)]
    6698             :       ! G = 2*prefactor*sigma0.a_norm.sigma_inv
    6699             :       CALL dbcsr_multiply("N", "N", &
    6700             :                           1.0_dp, &
    6701             :                           m_temp_oo_2, &
    6702             :                           m_siginv, &
    6703             :                           0.0_dp, m_temp_oo_3, &
    6704          82 :                           filter_eps=eps_filter)
    6705          82 :       norm_vol = dbcsr_maxabs(m_temp_oo_3)
    6706          82 :       norm_loc = dbcsr_maxabs(m_temp_oo_1)
    6707          82 :       suggested_vol_penalty = norm_loc/norm_vol
    6708             :       CALL dbcsr_add(m_temp_oo_1, m_temp_oo_3, &
    6709          82 :                      1.0_dp, 2.0_dp*penalty_vol_prefactor)
    6710             : 
    6711             :       ! take into account the factor from the normalization constraint
    6712             :       ! G = ( G - sigma0.a_norm.[tr(a_norm).G]_ii ) . [sig_sqrti]_ii
    6713             :       ! 1. get G.[sig_sqrti]_ii
    6714             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6715             :                           m_temp_oo_1, &
    6716             :                           m_sig_sqrti_ii, &
    6717             :                           0.0_dp, m_grad_out, &
    6718          82 :                           filter_eps=eps_filter)
    6719             : 
    6720             :       ! 2. get [tr(a_norm).G]_ii
    6721             :       ! it is possible to save time by computing only the diagonal elements
    6722             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6723             :                           m_theta_normalized, &
    6724             :                           m_temp_oo_1, &
    6725             :                           0.0_dp, m_temp_oo_3, &
    6726          82 :                           filter_eps=eps_filter)
    6727          82 :       CALL dbcsr_get_diag(m_temp_oo_3, tg_diagonal)
    6728          82 :       CALL dbcsr_set(m_temp_oo_3, 0.0_dp)
    6729          82 :       CALL dbcsr_set_diag(m_temp_oo_3, tg_diagonal)
    6730             : 
    6731             :       ! 3. [X]_ii . [sig_sqrti]_ii
    6732             :       ! it is possible to save time by computing only the diagonal elements
    6733             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6734             :                           m_sig_sqrti_ii, &
    6735             :                           m_temp_oo_3, &
    6736             :                           0.0_dp, m_temp_oo_1, &
    6737          82 :                           filter_eps=eps_filter)
    6738             :       ! 4. (sigma0*a_norm) .[X]_ii
    6739             :       CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6740             :                           m_temp_oo_2, &
    6741             :                           m_temp_oo_1, &
    6742             :                           1.0_dp, m_grad_out, &
    6743          82 :                           filter_eps=eps_filter)
    6744             : 
    6745          82 :       DEALLOCATE (tg_diagonal)
    6746          82 :       CALL dbcsr_release(m_temp_oo_1)
    6747          82 :       CALL dbcsr_release(m_temp_oo_2)
    6748          82 :       CALL dbcsr_release(m_temp_oo_3)
    6749          82 :       CALL dbcsr_release(m_temp_oo_4)
    6750             : 
    6751          82 :       CALL timestop(handle)
    6752             : 
    6753         164 :    END SUBROUTINE compute_gradient_nlmos
    6754             : 
    6755             : ! **************************************************************************************************
    6756             : !> \brief Compute MO coeffs from the main optimized variable (e.g. Theta, X)
    6757             : !> \param m_var_in ...
    6758             : !> \param m_t_out ...
    6759             : !> \param m_quench_t ...
    6760             : !> \param m_t0 ...
    6761             : !> \param m_oo_template ...
    6762             : !> \param m_STsiginv0 ...
    6763             : !> \param m_s ...
    6764             : !> \param m_sig_sqrti_ii_out ...
    6765             : !> \param domain_r_down ...
    6766             : !> \param domain_s_inv ...
    6767             : !> \param domain_map ...
    6768             : !> \param cpu_of_domain ...
    6769             : !> \param assume_t0_q0x ...
    6770             : !> \param just_started ...
    6771             : !> \param optimize_theta ...
    6772             : !> \param normalize_orbitals ...
    6773             : !> \param envelope_amplitude ...
    6774             : !> \param eps_filter ...
    6775             : !> \param special_case ...
    6776             : !> \param nocc_of_domain ...
    6777             : !> \param order_lanczos ...
    6778             : !> \param eps_lanczos ...
    6779             : !> \param max_iter_lanczos ...
    6780             : !> \par History
    6781             : !>       2015.03 created [Rustam Z Khaliullin]
    6782             : !> \author Rustam Z Khaliullin
    6783             : ! **************************************************************************************************
    6784        2948 :    SUBROUTINE compute_xalmos_from_main_var(m_var_in, m_t_out, m_quench_t, &
    6785        1474 :                                            m_t0, m_oo_template, m_STsiginv0, m_s, m_sig_sqrti_ii_out, domain_r_down, &
    6786        1474 :                                            domain_s_inv, domain_map, cpu_of_domain, assume_t0_q0x, just_started, &
    6787             :                                            optimize_theta, normalize_orbitals, envelope_amplitude, eps_filter, &
    6788        1474 :                                            special_case, nocc_of_domain, order_lanczos, eps_lanczos, max_iter_lanczos)
    6789             : 
    6790             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_var_in
    6791             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_t_out, m_quench_t, m_t0, &
    6792             :                                                             m_oo_template, m_STsiginv0, m_s, &
    6793             :                                                             m_sig_sqrti_ii_out
    6794             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6795             :          INTENT(IN)                                      :: domain_r_down, domain_s_inv
    6796             :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    6797             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    6798             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, just_started, &
    6799             :                                                             optimize_theta, normalize_orbitals
    6800             :       REAL(KIND=dp), INTENT(IN)                          :: envelope_amplitude, eps_filter
    6801             :       INTEGER, INTENT(IN)                                :: special_case
    6802             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: nocc_of_domain
    6803             :       INTEGER, INTENT(IN)                                :: order_lanczos
    6804             :       REAL(KIND=dp), INTENT(IN)                          :: eps_lanczos
    6805             :       INTEGER, INTENT(IN)                                :: max_iter_lanczos
    6806             : 
    6807             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_xalmos_from_main_var'
    6808             : 
    6809             :       INTEGER                                            :: handle, unit_nr
    6810             :       REAL(KIND=dp)                                      :: t_norm
    6811             :       TYPE(cp_logger_type), POINTER                      :: logger
    6812             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_oo_1
    6813             : 
    6814        1474 :       CALL timeset(routineN, handle)
    6815             : 
    6816             :       ! get a useful output_unit
    6817        1474 :       logger => cp_get_default_logger()
    6818        1474 :       IF (logger%para_env%is_source()) THEN
    6819         737 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    6820             :       ELSE
    6821             :          unit_nr = -1
    6822             :       END IF
    6823             : 
    6824             :       CALL dbcsr_create(m_tmp_no_1, &
    6825             :                         template=m_quench_t, &
    6826        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6827             :       CALL dbcsr_create(m_tmp_oo_1, &
    6828             :                         template=m_oo_template, &
    6829        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6830             : 
    6831        1474 :       CALL dbcsr_copy(m_tmp_no_1, m_var_in)
    6832        1474 :       IF (optimize_theta) THEN
    6833             :          ! check that all MO coefficients of the guess are less
    6834             :          ! than the maximum allowed amplitude
    6835           0 :          t_norm = dbcsr_maxabs(m_tmp_no_1)
    6836           0 :          IF (unit_nr > 0) THEN
    6837           0 :             WRITE (unit_nr, *) "Maximum norm of the initial guess: ", t_norm
    6838           0 :             WRITE (unit_nr, *) "Maximum allowed amplitude: ", &
    6839           0 :                envelope_amplitude
    6840             :          END IF
    6841           0 :          IF (t_norm .GT. envelope_amplitude .AND. just_started) THEN
    6842           0 :             CPABORT("Max norm of the initial guess is too large")
    6843             :          END IF
    6844             :          ! use artanh to tame MOs
    6845           0 :          CALL tanh_of_elements(m_tmp_no_1, alpha=1.0_dp/envelope_amplitude)
    6846           0 :          CALL dbcsr_scale(m_tmp_no_1, envelope_amplitude)
    6847             :       END IF
    6848             :       CALL dbcsr_hadamard_product(m_tmp_no_1, m_quench_t, &
    6849        1474 :                                   m_t_out)
    6850             : 
    6851             :       ! project out R_0
    6852        1474 :       IF (assume_t0_q0x) THEN
    6853         466 :          IF (special_case .EQ. xalmo_case_fully_deloc) THEN
    6854             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6855             :                                 m_STsiginv0, &
    6856             :                                 m_t_out, &
    6857             :                                 0.0_dp, m_tmp_oo_1, &
    6858         160 :                                 filter_eps=eps_filter)
    6859             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6860             :                                 m_t0, &
    6861             :                                 m_tmp_oo_1, &
    6862             :                                 1.0_dp, m_t_out, &
    6863         160 :                                 filter_eps=eps_filter)
    6864         306 :          ELSE IF (special_case .EQ. xalmo_case_block_diag) THEN
    6865           0 :             CPABORT("cannot use projector with block-daigonal ALMOs")
    6866             :          ELSE
    6867             :             ! no special case
    6868             :             CALL apply_domain_operators( &
    6869             :                matrix_in=m_t_out, &
    6870             :                matrix_out=m_tmp_no_1, &
    6871             :                operator1=domain_r_down, &
    6872             :                operator2=domain_s_inv, &
    6873             :                dpattern=m_quench_t, &
    6874             :                map=domain_map, &
    6875             :                node_of_domain=cpu_of_domain, &
    6876             :                my_action=1, &
    6877             :                filter_eps=eps_filter, &
    6878         306 :                use_trimmer=.FALSE.)
    6879             :             CALL dbcsr_copy(m_t_out, &
    6880         306 :                             m_tmp_no_1)
    6881             :          END IF ! special case
    6882             :          CALL dbcsr_add(m_t_out, &
    6883         466 :                         m_t0, 1.0_dp, 1.0_dp)
    6884             :       END IF
    6885             : 
    6886        1474 :       IF (normalize_orbitals) THEN
    6887             :          CALL orthogonalize_mos( &
    6888             :             ket=m_t_out, &
    6889             :             overlap=m_tmp_oo_1, &
    6890             :             metric=m_s, &
    6891             :             retain_locality=.TRUE., &
    6892             :             only_normalize=.TRUE., &
    6893             :             nocc_of_domain=nocc_of_domain(:), &
    6894             :             eps_filter=eps_filter, &
    6895             :             order_lanczos=order_lanczos, &
    6896             :             eps_lanczos=eps_lanczos, &
    6897             :             max_iter_lanczos=max_iter_lanczos, &
    6898           0 :             overlap_sqrti=m_sig_sqrti_ii_out)
    6899             :       END IF
    6900             : 
    6901        1474 :       CALL dbcsr_filter(m_t_out, eps_filter)
    6902             : 
    6903        1474 :       CALL dbcsr_release(m_tmp_no_1)
    6904        1474 :       CALL dbcsr_release(m_tmp_oo_1)
    6905             : 
    6906        1474 :       CALL timestop(handle)
    6907             : 
    6908        1474 :    END SUBROUTINE compute_xalmos_from_main_var
    6909             : 
    6910             : ! **************************************************************************************************
    6911             : !> \brief Compute the preconditioner matrices and invert them if necessary
    6912             : !> \param domain_prec_out ...
    6913             : !> \param m_prec_out ...
    6914             : !> \param m_ks ...
    6915             : !> \param m_s ...
    6916             : !> \param m_siginv ...
    6917             : !> \param m_quench_t ...
    6918             : !> \param m_FTsiginv ...
    6919             : !> \param m_siginvTFTsiginv ...
    6920             : !> \param m_ST ...
    6921             : !> \param m_STsiginv_out ...
    6922             : !> \param m_s_vv_out ...
    6923             : !> \param m_f_vv_out ...
    6924             : !> \param para_env ...
    6925             : !> \param blacs_env ...
    6926             : !> \param nocc_of_domain ...
    6927             : !> \param domain_s_inv ...
    6928             : !> \param domain_s_inv_half ...
    6929             : !> \param domain_s_half ...
    6930             : !> \param domain_r_down ...
    6931             : !> \param cpu_of_domain ...
    6932             : !> \param domain_map ...
    6933             : !> \param assume_t0_q0x ...
    6934             : !> \param penalty_occ_vol ...
    6935             : !> \param penalty_occ_vol_prefactor ...
    6936             : !> \param eps_filter ...
    6937             : !> \param neg_thr ...
    6938             : !> \param spin_factor ...
    6939             : !> \param special_case ...
    6940             : !> \param bad_modes_projector_down_out ...
    6941             : !> \param skip_inversion ...
    6942             : !> \par History
    6943             : !>       2015.03 created [Rustam Z Khaliullin]
    6944             : !> \author Rustam Z Khaliullin
    6945             : ! **************************************************************************************************
    6946        1500 :    SUBROUTINE compute_preconditioner(domain_prec_out, m_prec_out, m_ks, m_s, &
    6947             :                                      m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, &
    6948             :                                      m_STsiginv_out, m_s_vv_out, m_f_vv_out, para_env, &
    6949        1000 :                                      blacs_env, nocc_of_domain, domain_s_inv, domain_s_inv_half, domain_s_half, &
    6950         500 :                                      domain_r_down, cpu_of_domain, &
    6951             :                                      domain_map, assume_t0_q0x, penalty_occ_vol, penalty_occ_vol_prefactor, &
    6952         500 :                                      eps_filter, neg_thr, spin_factor, special_case, bad_modes_projector_down_out, &
    6953             :                                      skip_inversion)
    6954             : 
    6955             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6956             :          INTENT(INOUT)                                   :: domain_prec_out
    6957             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_prec_out, m_ks, m_s
    6958             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_siginv, m_quench_t, m_FTsiginv, &
    6959             :                                                             m_siginvTFTsiginv, m_ST
    6960             :       TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL          :: m_STsiginv_out, m_s_vv_out, m_f_vv_out
    6961             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    6962             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    6963             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: nocc_of_domain
    6964             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6965             :          INTENT(IN)                                      :: domain_s_inv
    6966             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6967             :          INTENT(IN), OPTIONAL                            :: domain_s_inv_half, domain_s_half
    6968             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6969             :          INTENT(IN)                                      :: domain_r_down
    6970             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    6971             :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    6972             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, penalty_occ_vol
    6973             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, eps_filter, &
    6974             :                                                             neg_thr, spin_factor
    6975             :       INTEGER, INTENT(IN)                                :: special_case
    6976             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6977             :          INTENT(INOUT), OPTIONAL                         :: bad_modes_projector_down_out
    6978             :       LOGICAL, INTENT(IN)                                :: skip_inversion
    6979             : 
    6980             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_preconditioner'
    6981             : 
    6982             :       INTEGER                                            :: handle, ndim, precond_domain_projector
    6983         500 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: nn_diagonal
    6984             :       TYPE(dbcsr_type)                                   :: m_tmp_nn_1, m_tmp_no_3
    6985             : 
    6986         500 :       CALL timeset(routineN, handle)
    6987             : 
    6988             :       CALL dbcsr_create(m_tmp_nn_1, &
    6989             :                         template=m_s, &
    6990         500 :                         matrix_type=dbcsr_type_no_symmetry)
    6991             :       CALL dbcsr_create(m_tmp_no_3, &
    6992             :                         template=m_quench_t, &
    6993         500 :                         matrix_type=dbcsr_type_no_symmetry)
    6994             : 
    6995             :       ! calculate (1-R)F(1-R) and S-SRS
    6996             :       ! RZK-warning take advantage: some elements will be removed by the quencher
    6997             :       ! RZK-warning S operations can be performed outside the spin loop to save time
    6998             :       ! IT IS REQUIRED THAT PRECONDITIONER DOES NOT BREAK THE LOCALITY!!!!
    6999             :       ! RZK-warning: further optimization is ABSOLUTELY NECESSARY
    7000             : 
    7001             :       ! First S-SRS
    7002             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7003             :                           m_ST, &
    7004             :                           m_siginv, &
    7005             :                           0.0_dp, m_tmp_no_3, &
    7006         500 :                           filter_eps=eps_filter)
    7007         500 :       CALL dbcsr_desymmetrize(m_s, m_tmp_nn_1)
    7008             :       ! return STsiginv if necessary
    7009         500 :       IF (PRESENT(m_STsiginv_out)) THEN
    7010           0 :          CALL dbcsr_copy(m_STsiginv_out, m_tmp_no_3)
    7011             :       END IF
    7012         500 :       IF (special_case .EQ. xalmo_case_fully_deloc) THEN
    7013             :          ! use S instead of S-SRS
    7014             :       ELSE
    7015             :          CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7016             :                              m_ST, &
    7017             :                              m_tmp_no_3, &
    7018             :                              1.0_dp, m_tmp_nn_1, &
    7019         456 :                              filter_eps=eps_filter)
    7020             :       END IF
    7021             :       ! return S_vv = (S or S-SRS) if necessary
    7022         500 :       IF (PRESENT(m_s_vv_out)) THEN
    7023           0 :          CALL dbcsr_copy(m_s_vv_out, m_tmp_nn_1)
    7024             :       END IF
    7025             : 
    7026             :       ! Second (1-R)F(1-R)
    7027             :       ! re-create matrix because desymmetrize is buggy -
    7028             :       ! it will create multiple copies of blocks
    7029         500 :       CALL dbcsr_desymmetrize(m_ks, m_prec_out)
    7030             :       CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7031             :                           m_FTsiginv, &
    7032             :                           m_ST, &
    7033             :                           1.0_dp, m_prec_out, &
    7034         500 :                           filter_eps=eps_filter)
    7035             :       CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7036             :                           m_ST, &
    7037             :                           m_FTsiginv, &
    7038             :                           1.0_dp, m_prec_out, &
    7039         500 :                           filter_eps=eps_filter)
    7040             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7041             :                           m_ST, &
    7042             :                           m_siginvTFTsiginv, &
    7043             :                           0.0_dp, m_tmp_no_3, &
    7044         500 :                           filter_eps=eps_filter)
    7045             :       CALL dbcsr_multiply("N", "T", 1.0_dp, &
    7046             :                           m_tmp_no_3, &
    7047             :                           m_ST, &
    7048             :                           1.0_dp, m_prec_out, &
    7049         500 :                           filter_eps=eps_filter)
    7050             :       ! return F_vv = (I-SR)F(I-RS) if necessary
    7051         500 :       IF (PRESENT(m_f_vv_out)) THEN
    7052           0 :          CALL dbcsr_copy(m_f_vv_out, m_prec_out)
    7053             :       END IF
    7054             : 
    7055             : #if 0
    7056             : !penalty_only=.TRUE.
    7057             :       WRITE (unit_nr, *) "prefactor0:", penalty_occ_vol_prefactor
    7058             :       !IF (penalty_occ_vol) THEN
    7059             :       CALL dbcsr_desymmetrize(m_s, &
    7060             :                               m_prec_out)
    7061             :       !CALL dbcsr_scale(m_prec_out,-penalty_occ_vol_prefactor)
    7062             :       !ENDIF
    7063             : #else
    7064             :       ! sum up the F_vv and S_vv terms
    7065             :       CALL dbcsr_add(m_prec_out, m_tmp_nn_1, &
    7066         500 :                      1.0_dp, 1.0_dp)
    7067             :       ! Scale to obtain unit step length
    7068         500 :       CALL dbcsr_scale(m_prec_out, 2.0_dp*spin_factor)
    7069             : 
    7070             :       ! add the contribution from the penalty on the occupied volume
    7071         500 :       IF (penalty_occ_vol) THEN
    7072             :          CALL dbcsr_add(m_prec_out, m_tmp_nn_1, &
    7073           0 :                         1.0_dp, penalty_occ_vol_prefactor)
    7074             :       END IF
    7075             : #endif
    7076             : 
    7077         500 :       CALL dbcsr_copy(m_tmp_nn_1, m_prec_out)
    7078             : 
    7079             :       ! invert using various algorithms
    7080         500 :       IF (special_case .EQ. xalmo_case_block_diag) THEN ! non-overlapping diagonal blocks
    7081             : 
    7082          96 :          IF (skip_inversion) THEN
    7083             : 
    7084             :             ! impose block-diagonal structure
    7085          92 :             CALL dbcsr_get_info(m_s, nfullrows_total=ndim)
    7086         276 :             ALLOCATE (nn_diagonal(ndim))
    7087          92 :             CALL dbcsr_get_diag(m_s, nn_diagonal)
    7088          92 :             CALL dbcsr_set(m_prec_out, 0.0_dp)
    7089          92 :             CALL dbcsr_set_diag(m_prec_out, nn_diagonal)
    7090          92 :             CALL dbcsr_filter(m_prec_out, eps_filter)
    7091          92 :             DEALLOCATE (nn_diagonal)
    7092             : 
    7093         184 :             CALL dbcsr_copy(m_prec_out, m_tmp_nn_1, keep_sparsity=.TRUE.)
    7094             : 
    7095             :          ELSE
    7096             : 
    7097             :             CALL pseudo_invert_diagonal_blk( &
    7098             :                matrix_in=m_tmp_nn_1, &
    7099             :                matrix_out=m_prec_out, &
    7100             :                nocc=nocc_of_domain(:) &
    7101           4 :                )
    7102             : 
    7103             :          END IF
    7104             : 
    7105         404 :       ELSE IF (special_case .EQ. xalmo_case_fully_deloc) THEN ! the entire system is a block
    7106             : 
    7107          44 :          IF (skip_inversion) THEN
    7108           0 :             CALL dbcsr_copy(m_prec_out, m_tmp_nn_1)
    7109             :          ELSE
    7110             : 
    7111             :             ! invert using cholesky (works with S matrix, will not work with S-SRS matrix)
    7112             :             CALL cp_dbcsr_cholesky_decompose(m_prec_out, &
    7113             :                                              para_env=para_env, &
    7114          44 :                                              blacs_env=blacs_env)
    7115             :             CALL cp_dbcsr_cholesky_invert(m_prec_out, &
    7116             :                                           para_env=para_env, &
    7117             :                                           blacs_env=blacs_env, &
    7118          44 :                                           uplo_to_full=.TRUE.)
    7119             :          END IF !skip_inversion
    7120             : 
    7121          44 :          CALL dbcsr_filter(m_prec_out, eps_filter)
    7122             : 
    7123             :       ELSE
    7124             : 
    7125             :          !!! use a true domain preconditioner with overlapping domains
    7126         360 :          IF (assume_t0_q0x) THEN
    7127          26 :             precond_domain_projector = -1
    7128             :          ELSE
    7129         334 :             precond_domain_projector = 0
    7130             :          END IF
    7131             :          !! RZK-warning: use PRESENT to make two nearly-identical calls
    7132             :          !! this is done because intel compiler does not seem to conform
    7133             :          !! to the FORTRAN standard for passing through optional arguments
    7134         360 :          IF (PRESENT(bad_modes_projector_down_out)) THEN
    7135             :             CALL construct_domain_preconditioner( &
    7136             :                matrix_main=m_tmp_nn_1, &
    7137             :                subm_s_inv=domain_s_inv(:), &
    7138             :                subm_s_inv_half=domain_s_inv_half(:), &
    7139             :                subm_s_half=domain_s_half(:), &
    7140             :                subm_r_down=domain_r_down(:), &
    7141             :                matrix_trimmer=m_quench_t, &
    7142             :                dpattern=m_quench_t, &
    7143             :                map=domain_map, &
    7144             :                node_of_domain=cpu_of_domain, &
    7145             :                preconditioner=domain_prec_out(:), &
    7146             :                use_trimmer=.FALSE., &
    7147             :                bad_modes_projector_down=bad_modes_projector_down_out(:), &
    7148             :                eps_zero_eigenvalues=neg_thr, &
    7149             :                my_action=precond_domain_projector, &
    7150             :                skip_inversion=skip_inversion &
    7151          18 :                )
    7152             :          ELSE
    7153             :             CALL construct_domain_preconditioner( &
    7154             :                matrix_main=m_tmp_nn_1, &
    7155             :                subm_s_inv=domain_s_inv(:), &
    7156             :                subm_r_down=domain_r_down(:), &
    7157             :                matrix_trimmer=m_quench_t, &
    7158             :                dpattern=m_quench_t, &
    7159             :                map=domain_map, &
    7160             :                node_of_domain=cpu_of_domain, &
    7161             :                preconditioner=domain_prec_out(:), &
    7162             :                use_trimmer=.FALSE., &
    7163             :                !eps_zero_eigenvalues=neg_thr,&
    7164             :                my_action=precond_domain_projector, &
    7165             :                skip_inversion=skip_inversion &
    7166         342 :                )
    7167             :          END IF
    7168             : 
    7169             :       END IF ! special_case
    7170             : 
    7171             :       ! invert using cholesky (works with S matrix, will not work with S-SRS matrix)
    7172             :       !!!CALL cp_dbcsr_cholesky_decompose(prec_vv,&
    7173             :       !!!        para_env=almo_scf_env%para_env,&
    7174             :       !!!        blacs_env=almo_scf_env%blacs_env)
    7175             :       !!!CALL cp_dbcsr_cholesky_invert(prec_vv,&
    7176             :       !!!        para_env=almo_scf_env%para_env,&
    7177             :       !!!        blacs_env=almo_scf_env%blacs_env,&
    7178             :       !!!        uplo_to_full=.TRUE.)
    7179             :       !!!CALL dbcsr_filter(prec_vv,&
    7180             :       !!!        almo_scf_env%eps_filter)
    7181             :       !!!
    7182             : 
    7183             :       ! re-create the matrix because desymmetrize is buggy -
    7184             :       ! it will create multiple copies of blocks
    7185             :       !!!DESYM!CALL dbcsr_create(prec_vv,&
    7186             :       !!!DESYM!        template=almo_scf_env%matrix_s(1),&
    7187             :       !!!DESYM!        matrix_type=dbcsr_type_no_symmetry)
    7188             :       !!!DESYM!CALL dbcsr_desymmetrize(almo_scf_env%matrix_s(1),&
    7189             :       !!!DESYM!        prec_vv)
    7190             :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    7191             :       !        almo_scf_env%matrix_s(1),&
    7192             :       !        matrix_t_out(ispin),&
    7193             :       !        0.0_dp,m_tmp_no_1,&
    7194             :       !        filter_eps=almo_scf_env%eps_filter)
    7195             :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    7196             :       !        m_tmp_no_1,&
    7197             :       !        almo_scf_env%matrix_sigma_inv(ispin),&
    7198             :       !        0.0_dp,m_tmp_no_3,&
    7199             :       !        filter_eps=almo_scf_env%eps_filter)
    7200             :       !CALL dbcsr_multiply("N","T",-1.0_dp,&
    7201             :       !        m_tmp_no_3,&
    7202             :       !        m_tmp_no_1,&
    7203             :       !        1.0_dp,prec_vv,&
    7204             :       !        filter_eps=almo_scf_env%eps_filter)
    7205             :       !CALL dbcsr_add_on_diag(prec_vv,&
    7206             :       !        prec_sf_mixing_s)
    7207             : 
    7208             :       !CALL dbcsr_create(prec_oo,&
    7209             :       !        template=almo_scf_env%matrix_sigma(ispin),&
    7210             :       !        matrix_type=dbcsr_type_no_symmetry)
    7211             :       !CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma(ispin),&
    7212             :       !        matrix_type=dbcsr_type_no_symmetry)
    7213             :       !CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma(ispin),&
    7214             :       !        prec_oo)
    7215             :       !CALL dbcsr_filter(prec_oo,&
    7216             :       !        almo_scf_env%eps_filter)
    7217             : 
    7218             :       !! invert using cholesky
    7219             :       !CALL dbcsr_create(prec_oo_inv,&
    7220             :       !        template=prec_oo,&
    7221             :       !        matrix_type=dbcsr_type_no_symmetry)
    7222             :       !CALL dbcsr_desymmetrize(prec_oo,&
    7223             :       !        prec_oo_inv)
    7224             :       !CALL cp_dbcsr_cholesky_decompose(prec_oo_inv,&
    7225             :       !        para_env=almo_scf_env%para_env,&
    7226             :       !        blacs_env=almo_scf_env%blacs_env)
    7227             :       !CALL cp_dbcsr_cholesky_invert(prec_oo_inv,&
    7228             :       !        para_env=almo_scf_env%para_env,&
    7229             :       !        blacs_env=almo_scf_env%blacs_env,&
    7230             :       !        uplo_to_full=.TRUE.)
    7231             : 
    7232         500 :       CALL dbcsr_release(m_tmp_nn_1)
    7233         500 :       CALL dbcsr_release(m_tmp_no_3)
    7234             : 
    7235         500 :       CALL timestop(handle)
    7236             : 
    7237        1000 :    END SUBROUTINE compute_preconditioner
    7238             : 
    7239             : ! **************************************************************************************************
    7240             : !> \brief Compute beta for conjugate gradient algorithms
    7241             : !> \param beta ...
    7242             : !> \param numer ...
    7243             : !> \param denom ...
    7244             : !> \param reset_conjugator ...
    7245             : !> \param conjugator ...
    7246             : !> \param grad ...
    7247             : !> \param prev_grad ...
    7248             : !> \param step ...
    7249             : !> \param prev_step ...
    7250             : !> \param prev_minus_prec_grad ...
    7251             : !> \par History
    7252             : !>       2015.04 created [Rustam Z Khaliullin]
    7253             : !> \author Rustam Z Khaliullin
    7254             : ! **************************************************************************************************
    7255        1016 :    SUBROUTINE compute_cg_beta(beta, numer, denom, reset_conjugator, conjugator, &
    7256         508 :                               grad, prev_grad, step, prev_step, prev_minus_prec_grad)
    7257             : 
    7258             :       REAL(KIND=dp), INTENT(INOUT)                       :: beta
    7259             :       REAL(KIND=dp), INTENT(INOUT), OPTIONAL             :: numer, denom
    7260             :       LOGICAL, INTENT(INOUT)                             :: reset_conjugator
    7261             :       INTEGER, INTENT(IN)                                :: conjugator
    7262             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: grad, prev_grad, step, prev_step
    7263             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT), &
    7264             :          OPTIONAL                                        :: prev_minus_prec_grad
    7265             : 
    7266             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_cg_beta'
    7267             : 
    7268             :       INTEGER                                            :: handle, i, nsize, unit_nr
    7269             :       REAL(KIND=dp)                                      :: den, kappa, my_denom, my_numer, &
    7270             :                                                             my_numer2, my_numer3, num, num2, num3, &
    7271             :                                                             tau
    7272             :       TYPE(cp_logger_type), POINTER                      :: logger
    7273             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1
    7274             : 
    7275         508 :       CALL timeset(routineN, handle)
    7276             : 
    7277             :       ! get a useful output_unit
    7278         508 :       logger => cp_get_default_logger()
    7279         508 :       IF (logger%para_env%is_source()) THEN
    7280         254 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    7281             :       ELSE
    7282             :          unit_nr = -1
    7283             :       END IF
    7284             : 
    7285         508 :       IF (.NOT. PRESENT(prev_minus_prec_grad)) THEN
    7286             :          IF (conjugator .EQ. cg_fletcher_reeves .OR. &
    7287          82 :              conjugator .EQ. cg_polak_ribiere .OR. &
    7288             :              conjugator .EQ. cg_hager_zhang) THEN
    7289           0 :             CPABORT("conjugator needs more input")
    7290             :          END IF
    7291             :       END IF
    7292             : 
    7293             :       ! return num denom so beta can be calculated spin-by-spin
    7294         508 :       IF (PRESENT(numer) .OR. PRESENT(denom)) THEN
    7295             :          IF (conjugator .EQ. cg_hestenes_stiefel .OR. &
    7296           0 :              conjugator .EQ. cg_dai_yuan .OR. &
    7297             :              conjugator .EQ. cg_hager_zhang) THEN
    7298           0 :             CPABORT("cannot return numer/denom")
    7299             :          END IF
    7300             :       END IF
    7301             : 
    7302         508 :       nsize = SIZE(grad)
    7303             : 
    7304         508 :       my_numer = 0.0_dp
    7305         508 :       my_numer2 = 0.0_dp
    7306         508 :       my_numer3 = 0.0_dp
    7307         508 :       my_denom = 0.0_dp
    7308             : 
    7309        1016 :       DO i = 1, nsize
    7310             : 
    7311             :          CALL dbcsr_create(m_tmp_no_1, &
    7312             :                            template=grad(i), &
    7313         508 :                            matrix_type=dbcsr_type_no_symmetry)
    7314             : 
    7315         570 :          SELECT CASE (conjugator)
    7316             :          CASE (cg_hestenes_stiefel)
    7317          62 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7318             :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), &
    7319          62 :                            1.0_dp, -1.0_dp)
    7320          62 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7321         156 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7322             :          CASE (cg_fletcher_reeves)
    7323          94 :             CALL dbcsr_dot(grad(i), step(i), num)
    7324         124 :             CALL dbcsr_dot(prev_grad(i), prev_minus_prec_grad(i), den)
    7325             :          CASE (cg_polak_ribiere)
    7326          30 :             CALL dbcsr_dot(prev_grad(i), prev_minus_prec_grad(i), den)
    7327          30 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7328          30 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7329         202 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7330             :          CASE (cg_fletcher)
    7331         172 :             CALL dbcsr_dot(grad(i), step(i), num)
    7332         192 :             CALL dbcsr_dot(prev_grad(i), prev_step(i), den)
    7333             :          CASE (cg_liu_storey)
    7334          20 :             CALL dbcsr_dot(prev_grad(i), prev_step(i), den)
    7335          20 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7336          20 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7337          54 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7338             :          CASE (cg_dai_yuan)
    7339          34 :             CALL dbcsr_dot(grad(i), step(i), num)
    7340          34 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7341          34 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7342         106 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7343             :          CASE (cg_hager_zhang)
    7344          72 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7345          72 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7346          72 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7347          72 :             CALL dbcsr_dot(m_tmp_no_1, prev_minus_prec_grad(i), num)
    7348          72 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num2)
    7349          72 :             CALL dbcsr_dot(prev_step(i), grad(i), num3)
    7350          72 :             my_numer2 = my_numer2 + num2
    7351          72 :             my_numer3 = my_numer3 + num3
    7352             :          CASE (cg_zero)
    7353          24 :             num = 0.0_dp
    7354          24 :             den = 1.0_dp
    7355             :          CASE DEFAULT
    7356         726 :             CPABORT("illegal conjugator")
    7357             :          END SELECT
    7358         508 :          my_numer = my_numer + num
    7359         508 :          my_denom = my_denom + den
    7360             : 
    7361        1016 :          CALL dbcsr_release(m_tmp_no_1)
    7362             : 
    7363             :       END DO ! i - nsize
    7364             : 
    7365        1016 :       DO i = 1, nsize
    7366             : 
    7367         508 :          SELECT CASE (conjugator)
    7368             :          CASE (cg_hestenes_stiefel, cg_dai_yuan)
    7369          96 :             beta = -1.0_dp*my_numer/my_denom
    7370             :          CASE (cg_fletcher_reeves, cg_polak_ribiere, cg_fletcher, cg_liu_storey)
    7371         316 :             beta = my_numer/my_denom
    7372             :          CASE (cg_hager_zhang)
    7373          72 :             kappa = -2.0_dp*my_numer/my_denom
    7374          72 :             tau = -1.0_dp*my_numer2/my_denom
    7375          72 :             beta = tau - kappa*my_numer3/my_denom
    7376             :          CASE (cg_zero)
    7377          24 :             beta = 0.0_dp
    7378             :          CASE DEFAULT
    7379         508 :             CPABORT("illegal conjugator")
    7380             :          END SELECT
    7381             : 
    7382             :       END DO ! i - nsize
    7383             : 
    7384         508 :       IF (beta .LT. 0.0_dp) THEN
    7385           0 :          IF (unit_nr > 0) THEN
    7386           0 :             WRITE (unit_nr, *) " Resetting conjugator because beta is negative: ", beta
    7387             :          END IF
    7388           0 :          reset_conjugator = .TRUE.
    7389             :       END IF
    7390             : 
    7391         508 :       IF (PRESENT(numer)) THEN
    7392           0 :          numer = my_numer
    7393             :       END IF
    7394         508 :       IF (PRESENT(denom)) THEN
    7395           0 :          denom = my_denom
    7396             :       END IF
    7397             : 
    7398         508 :       CALL timestop(handle)
    7399             : 
    7400         508 :    END SUBROUTINE compute_cg_beta
    7401             : 
    7402             : ! **************************************************************************************************
    7403             : !> \brief computes the step matrix from the gradient and Hessian using the Newton-Raphson method
    7404             : !> \param optimizer ...
    7405             : !> \param m_grad ...
    7406             : !> \param m_delta ...
    7407             : !> \param m_s ...
    7408             : !> \param m_ks ...
    7409             : !> \param m_siginv ...
    7410             : !> \param m_quench_t ...
    7411             : !> \param m_FTsiginv ...
    7412             : !> \param m_siginvTFTsiginv ...
    7413             : !> \param m_ST ...
    7414             : !> \param m_t ...
    7415             : !> \param m_sig_sqrti_ii ...
    7416             : !> \param domain_s_inv ...
    7417             : !> \param domain_r_down ...
    7418             : !> \param domain_map ...
    7419             : !> \param cpu_of_domain ...
    7420             : !> \param nocc_of_domain ...
    7421             : !> \param para_env ...
    7422             : !> \param blacs_env ...
    7423             : !> \param eps_filter ...
    7424             : !> \param optimize_theta ...
    7425             : !> \param penalty_occ_vol ...
    7426             : !> \param normalize_orbitals ...
    7427             : !> \param penalty_occ_vol_prefactor ...
    7428             : !> \param penalty_occ_vol_pf2 ...
    7429             : !> \param special_case ...
    7430             : !> \par History
    7431             : !>       2015.04 created [Rustam Z. Khaliullin]
    7432             : !> \author Rustam Z. Khaliullin
    7433             : ! **************************************************************************************************
    7434           0 :    SUBROUTINE newton_grad_to_step(optimizer, m_grad, m_delta, m_s, m_ks, &
    7435           0 :                                   m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_t, &
    7436           0 :                                   m_sig_sqrti_ii, domain_s_inv, domain_r_down, domain_map, cpu_of_domain, &
    7437           0 :                                   nocc_of_domain, para_env, blacs_env, eps_filter, optimize_theta, &
    7438           0 :                                   penalty_occ_vol, normalize_orbitals, penalty_occ_vol_prefactor, &
    7439           0 :                                   penalty_occ_vol_pf2, special_case)
    7440             : 
    7441             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
    7442             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_grad
    7443             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_delta, m_s, m_ks, m_siginv, m_quench_t
    7444             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_FTsiginv, m_siginvTFTsiginv, m_ST, &
    7445             :                                                             m_t, m_sig_sqrti_ii
    7446             :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
    7447             :          INTENT(IN)                                      :: domain_s_inv, domain_r_down
    7448             :       TYPE(domain_map_type), DIMENSION(:), INTENT(IN)    :: domain_map
    7449             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    7450             :       INTEGER, DIMENSION(:, :), INTENT(IN)               :: nocc_of_domain
    7451             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    7452             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    7453             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    7454             :       LOGICAL, INTENT(IN)                                :: optimize_theta, penalty_occ_vol, &
    7455             :                                                             normalize_orbitals
    7456             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: penalty_occ_vol_prefactor, &
    7457             :                                                             penalty_occ_vol_pf2
    7458             :       INTEGER, INTENT(IN)                                :: special_case
    7459             : 
    7460             :       CHARACTER(len=*), PARAMETER :: routineN = 'newton_grad_to_step'
    7461             : 
    7462             :       CHARACTER(LEN=20)                                  :: iter_type
    7463             :       INTEGER                                            :: handle, ispin, iteration, max_iter, &
    7464             :                                                             ndomains, nspins, outer_iteration, &
    7465             :                                                             outer_max_iter, unit_nr
    7466             :       LOGICAL :: converged, do_exact_inversion, outer_prepare_to_exit, prepare_to_exit, &
    7467             :          reset_conjugator, use_preconditioner
    7468             :       REAL(KIND=dp)                                      :: alpha, beta, denom, denom_ispin, &
    7469             :                                                             eps_error_target, numer, numer_ispin, &
    7470             :                                                             residue_norm, spin_factor, t1, t2
    7471           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: residue_max_norm
    7472             :       TYPE(cp_logger_type), POINTER                      :: logger
    7473             :       TYPE(dbcsr_type)                                   :: m_tmp_oo_1, m_tmp_oo_2
    7474           0 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_f_vo, m_f_vv, m_Hstep, m_prec, &
    7475           0 :                                                             m_residue, m_residue_prev, m_s_vv, &
    7476           0 :                                                             m_step, m_STsiginv, m_zet, m_zet_prev
    7477             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
    7478           0 :          DIMENSION(:, :)                                 :: domain_prec
    7479             : 
    7480           0 :       CALL timeset(routineN, handle)
    7481             : 
    7482             :       ! get a useful output_unit
    7483           0 :       logger => cp_get_default_logger()
    7484           0 :       IF (logger%para_env%is_source()) THEN
    7485           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    7486             :       ELSE
    7487             :          unit_nr = -1
    7488             :       END IF
    7489             : 
    7490             :       !!! Currently for non-theta only
    7491           0 :       IF (optimize_theta) THEN
    7492           0 :          CPABORT("theta is NYI")
    7493             :       END IF
    7494             : 
    7495             :       ! set optimizer options
    7496           0 :       use_preconditioner = (optimizer%preconditioner .NE. xalmo_prec_zero)
    7497           0 :       outer_max_iter = optimizer%max_iter_outer_loop
    7498           0 :       max_iter = optimizer%max_iter
    7499           0 :       eps_error_target = optimizer%eps_error
    7500             : 
    7501             :       ! set key dimensions
    7502           0 :       nspins = SIZE(m_ks)
    7503           0 :       ndomains = SIZE(domain_s_inv, 1)
    7504             : 
    7505           0 :       IF (nspins == 1) THEN
    7506           0 :          spin_factor = 2.0_dp
    7507             :       ELSE
    7508           0 :          spin_factor = 1.0_dp
    7509             :       END IF
    7510             : 
    7511           0 :       ALLOCATE (domain_prec(ndomains, nspins))
    7512           0 :       CALL init_submatrices(domain_prec)
    7513             : 
    7514             :       ! allocate matrices
    7515           0 :       ALLOCATE (m_residue(nspins))
    7516           0 :       ALLOCATE (m_residue_prev(nspins))
    7517           0 :       ALLOCATE (m_step(nspins))
    7518           0 :       ALLOCATE (m_zet(nspins))
    7519           0 :       ALLOCATE (m_zet_prev(nspins))
    7520           0 :       ALLOCATE (m_Hstep(nspins))
    7521           0 :       ALLOCATE (m_prec(nspins))
    7522           0 :       ALLOCATE (m_s_vv(nspins))
    7523           0 :       ALLOCATE (m_f_vv(nspins))
    7524           0 :       ALLOCATE (m_f_vo(nspins))
    7525           0 :       ALLOCATE (m_STsiginv(nspins))
    7526             : 
    7527           0 :       ALLOCATE (residue_max_norm(nspins))
    7528             : 
    7529             :       ! initiate objects before iterations
    7530           0 :       DO ispin = 1, nspins
    7531             : 
    7532             :          ! init matrices
    7533             :          CALL dbcsr_create(m_residue(ispin), &
    7534             :                            template=m_quench_t(ispin), &
    7535           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7536             :          CALL dbcsr_create(m_residue_prev(ispin), &
    7537             :                            template=m_quench_t(ispin), &
    7538           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7539             :          CALL dbcsr_create(m_step(ispin), &
    7540             :                            template=m_quench_t(ispin), &
    7541           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7542             :          CALL dbcsr_create(m_zet_prev(ispin), &
    7543             :                            template=m_quench_t(ispin), &
    7544           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7545             :          CALL dbcsr_create(m_zet(ispin), &
    7546             :                            template=m_quench_t(ispin), &
    7547           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7548             :          CALL dbcsr_create(m_Hstep(ispin), &
    7549             :                            template=m_quench_t(ispin), &
    7550           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7551             :          CALL dbcsr_create(m_f_vo(ispin), &
    7552             :                            template=m_quench_t(ispin), &
    7553           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7554             :          CALL dbcsr_create(m_STsiginv(ispin), &
    7555             :                            template=m_quench_t(ispin), &
    7556           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7557             :          CALL dbcsr_create(m_f_vv(ispin), &
    7558             :                            template=m_ks(ispin), &
    7559           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7560             :          CALL dbcsr_create(m_s_vv(ispin), &
    7561             :                            template=m_s(1), &
    7562           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7563             :          CALL dbcsr_create(m_prec(ispin), &
    7564             :                            template=m_ks(ispin), &
    7565           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7566             : 
    7567             :          ! compute the full "gradient" - it is necessary to
    7568             :          ! evaluate Hessian.X
    7569           0 :          CALL dbcsr_copy(m_f_vo(ispin), m_FTsiginv(ispin))
    7570             :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    7571             :                              m_ST(ispin), &
    7572             :                              m_siginvTFTsiginv(ispin), &
    7573             :                              1.0_dp, m_f_vo(ispin), &
    7574           0 :                              filter_eps=eps_filter)
    7575             : 
    7576             : ! RZK-warning
    7577             : ! compute preconditioner even if we do not use it
    7578             : ! this is for debugging because compute_preconditioner includes
    7579             : ! computing F_vv and S_vv necessary for
    7580             : !       IF ( use_preconditioner ) THEN
    7581             : 
    7582             : ! domain_s_inv and domain_r_down are never used with assume_t0_q0x=FALSE
    7583             :          CALL compute_preconditioner( &
    7584             :             domain_prec_out=domain_prec(:, ispin), &
    7585             :             m_prec_out=m_prec(ispin), &
    7586             :             m_ks=m_ks(ispin), &
    7587             :             m_s=m_s(1), &
    7588             :             m_siginv=m_siginv(ispin), &
    7589             :             m_quench_t=m_quench_t(ispin), &
    7590             :             m_FTsiginv=m_FTsiginv(ispin), &
    7591             :             m_siginvTFTsiginv=m_siginvTFTsiginv(ispin), &
    7592             :             m_ST=m_ST(ispin), &
    7593             :             m_STsiginv_out=m_STsiginv(ispin), &
    7594             :             m_s_vv_out=m_s_vv(ispin), &
    7595             :             m_f_vv_out=m_f_vv(ispin), &
    7596             :             para_env=para_env, &
    7597             :             blacs_env=blacs_env, &
    7598             :             nocc_of_domain=nocc_of_domain(:, ispin), &
    7599             :             domain_s_inv=domain_s_inv(:, ispin), &
    7600             :             domain_r_down=domain_r_down(:, ispin), &
    7601             :             cpu_of_domain=cpu_of_domain(:), &
    7602             :             domain_map=domain_map(ispin), &
    7603             :             assume_t0_q0x=.FALSE., &
    7604             :             penalty_occ_vol=penalty_occ_vol, &
    7605             :             penalty_occ_vol_prefactor=penalty_occ_vol_prefactor(ispin), &
    7606             :             eps_filter=eps_filter, &
    7607             :             neg_thr=0.5_dp, &
    7608             :             spin_factor=spin_factor, &
    7609             :             special_case=special_case, &
    7610             :             skip_inversion=.FALSE. &
    7611           0 :             )
    7612             : 
    7613             : !       ENDIF ! use_preconditioner
    7614             : 
    7615             :          ! initial guess
    7616           0 :          CALL dbcsr_copy(m_delta(ispin), m_quench_t(ispin))
    7617             :          ! in order to use dbcsr_set matrix blocks must exist
    7618           0 :          CALL dbcsr_set(m_delta(ispin), 0.0_dp)
    7619           0 :          CALL dbcsr_copy(m_residue(ispin), m_grad(ispin))
    7620           0 :          CALL dbcsr_scale(m_residue(ispin), -1.0_dp)
    7621             : 
    7622           0 :          do_exact_inversion = .FALSE.
    7623             :          IF (do_exact_inversion) THEN
    7624             : 
    7625             :             ! copy grad to m_step temporarily
    7626             :             ! use m_step as input to the inversion routine
    7627             :             CALL dbcsr_copy(m_step(ispin), m_grad(ispin))
    7628             : 
    7629             :             ! expensive "exact" inversion of the "nearly-exact" Hessian
    7630             :             ! hopefully returns Z=-H^(-1).G
    7631             :             CALL hessian_diag_apply( &
    7632             :                matrix_grad=m_step(ispin), &
    7633             :                matrix_step=m_zet(ispin), &
    7634             :                matrix_S_ao=m_s_vv(ispin), &
    7635             :                matrix_F_ao=m_f_vv(ispin), &
    7636             :                !matrix_S_ao=m_s(ispin),&
    7637             :                !matrix_F_ao=m_ks(ispin),&
    7638             :                matrix_S_mo=m_siginv(ispin), &
    7639             :                matrix_F_mo=m_siginvTFTsiginv(ispin), &
    7640             :                matrix_S_vo=m_STsiginv(ispin), &
    7641             :                matrix_F_vo=m_f_vo(ispin), &
    7642             :                quench_t=m_quench_t(ispin), &
    7643             :                spin_factor=spin_factor, &
    7644             :                eps_zero=eps_filter*10.0_dp, &
    7645             :                penalty_occ_vol=penalty_occ_vol, &
    7646             :                penalty_occ_vol_prefactor=penalty_occ_vol_prefactor(ispin), &
    7647             :                penalty_occ_vol_pf2=penalty_occ_vol_pf2(ispin), &
    7648             :                m_s=m_s(1), &
    7649             :                para_env=para_env, &
    7650             :                blacs_env=blacs_env &
    7651             :                )
    7652             :             ! correct solution by the spin factor
    7653             :             !CALL dbcsr_scale(m_zet(ispin),1.0_dp/(2.0_dp*spin_factor))
    7654             : 
    7655             :          ELSE ! use PCG to solve H.D=-G
    7656             : 
    7657           0 :             IF (use_preconditioner) THEN
    7658             : 
    7659           0 :                IF (special_case .EQ. xalmo_case_block_diag .OR. &
    7660             :                    special_case .EQ. xalmo_case_fully_deloc) THEN
    7661             : 
    7662             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7663             :                                       m_prec(ispin), &
    7664             :                                       m_residue(ispin), &
    7665             :                                       0.0_dp, m_zet(ispin), &
    7666           0 :                                       filter_eps=eps_filter)
    7667             : 
    7668             :                ELSE
    7669             : 
    7670             :                   CALL apply_domain_operators( &
    7671             :                      matrix_in=m_residue(ispin), &
    7672             :                      matrix_out=m_zet(ispin), &
    7673             :                      operator1=domain_prec(:, ispin), &
    7674             :                      dpattern=m_quench_t(ispin), &
    7675             :                      map=domain_map(ispin), &
    7676             :                      node_of_domain=cpu_of_domain(:), &
    7677             :                      my_action=0, &
    7678             :                      filter_eps=eps_filter &
    7679             :                      !matrix_trimmer=,&
    7680             :                      !use_trimmer=.FALSE.,&
    7681           0 :                      )
    7682             : 
    7683             :                END IF ! special_case
    7684             : 
    7685             :             ELSE ! do not use preconditioner
    7686             : 
    7687           0 :                CALL dbcsr_copy(m_zet(ispin), m_residue(ispin))
    7688             : 
    7689             :             END IF ! use_preconditioner
    7690             : 
    7691             :          END IF ! do_exact_inversion
    7692             : 
    7693           0 :          CALL dbcsr_copy(m_step(ispin), m_zet(ispin))
    7694             : 
    7695             :       END DO !ispin
    7696             : 
    7697             :       ! start the outer SCF loop
    7698           0 :       outer_prepare_to_exit = .FALSE.
    7699           0 :       outer_iteration = 0
    7700           0 :       residue_norm = 0.0_dp
    7701             : 
    7702             :       DO
    7703             : 
    7704             :          ! start the inner SCF loop
    7705           0 :          prepare_to_exit = .FALSE.
    7706           0 :          converged = .FALSE.
    7707           0 :          iteration = 0
    7708           0 :          t1 = m_walltime()
    7709             : 
    7710             :          DO
    7711             : 
    7712             :             ! apply hessian to the step matrix
    7713             :             CALL apply_hessian( &
    7714             :                m_x_in=m_step, &
    7715             :                m_x_out=m_Hstep, &
    7716             :                m_ks=m_ks, &
    7717             :                m_s=m_s, &
    7718             :                m_siginv=m_siginv, &
    7719             :                m_quench_t=m_quench_t, &
    7720             :                m_FTsiginv=m_FTsiginv, &
    7721             :                m_siginvTFTsiginv=m_siginvTFTsiginv, &
    7722             :                m_ST=m_ST, &
    7723             :                m_STsiginv=m_STsiginv, &
    7724             :                m_s_vv=m_s_vv, &
    7725             :                m_ks_vv=m_f_vv, &
    7726             :                !m_s_vv=m_s,&
    7727             :                !m_ks_vv=m_ks,&
    7728             :                m_g_full=m_f_vo, &
    7729             :                m_t=m_t, &
    7730             :                m_sig_sqrti_ii=m_sig_sqrti_ii, &
    7731             :                penalty_occ_vol=penalty_occ_vol, &
    7732             :                normalize_orbitals=normalize_orbitals, &
    7733             :                penalty_occ_vol_prefactor=penalty_occ_vol_prefactor, &
    7734             :                eps_filter=eps_filter, &
    7735             :                path_num=hessian_path_reuse &
    7736           0 :                )
    7737             : 
    7738             :             ! alpha is computed outside the spin loop
    7739           0 :             numer = 0.0_dp
    7740           0 :             denom = 0.0_dp
    7741           0 :             DO ispin = 1, nspins
    7742             : 
    7743           0 :                CALL dbcsr_dot(m_residue(ispin), m_zet(ispin), numer_ispin)
    7744           0 :                CALL dbcsr_dot(m_step(ispin), m_Hstep(ispin), denom_ispin)
    7745             : 
    7746           0 :                numer = numer + numer_ispin
    7747           0 :                denom = denom + denom_ispin
    7748             : 
    7749             :             END DO !ispin
    7750             : 
    7751           0 :             alpha = numer/denom
    7752             : 
    7753           0 :             DO ispin = 1, nspins
    7754             : 
    7755             :                ! update the variable
    7756           0 :                CALL dbcsr_add(m_delta(ispin), m_step(ispin), 1.0_dp, alpha)
    7757           0 :                CALL dbcsr_copy(m_residue_prev(ispin), m_residue(ispin))
    7758             :                CALL dbcsr_add(m_residue(ispin), m_Hstep(ispin), &
    7759           0 :                               1.0_dp, -1.0_dp*alpha)
    7760           0 :                residue_max_norm(ispin) = dbcsr_maxabs(m_residue(ispin))
    7761             : 
    7762             :             END DO ! ispin
    7763             : 
    7764             :             ! check convergence and other exit criteria
    7765           0 :             residue_norm = MAXVAL(residue_max_norm)
    7766           0 :             converged = (residue_norm .LT. eps_error_target)
    7767           0 :             IF (converged .OR. (iteration .GE. max_iter)) THEN
    7768             :                prepare_to_exit = .TRUE.
    7769             :             END IF
    7770             : 
    7771           0 :             IF (.NOT. prepare_to_exit) THEN
    7772             : 
    7773           0 :                DO ispin = 1, nspins
    7774             : 
    7775             :                   ! save current z before the update
    7776           0 :                   CALL dbcsr_copy(m_zet_prev(ispin), m_zet(ispin))
    7777             : 
    7778             :                   ! compute the new step (apply preconditioner if available)
    7779           0 :                   IF (use_preconditioner) THEN
    7780             : 
    7781             :                      !IF (unit_nr>0) THEN
    7782             :                      !   WRITE(unit_nr,*) "....applying preconditioner...."
    7783             :                      !ENDIF
    7784             : 
    7785           0 :                      IF (special_case .EQ. xalmo_case_block_diag .OR. &
    7786             :                          special_case .EQ. xalmo_case_fully_deloc) THEN
    7787             : 
    7788             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7789             :                                             m_prec(ispin), &
    7790             :                                             m_residue(ispin), &
    7791             :                                             0.0_dp, m_zet(ispin), &
    7792           0 :                                             filter_eps=eps_filter)
    7793             : 
    7794             :                      ELSE
    7795             : 
    7796             :                         CALL apply_domain_operators( &
    7797             :                            matrix_in=m_residue(ispin), &
    7798             :                            matrix_out=m_zet(ispin), &
    7799             :                            operator1=domain_prec(:, ispin), &
    7800             :                            dpattern=m_quench_t(ispin), &
    7801             :                            map=domain_map(ispin), &
    7802             :                            node_of_domain=cpu_of_domain(:), &
    7803             :                            my_action=0, &
    7804             :                            filter_eps=eps_filter &
    7805             :                            !matrix_trimmer=,&
    7806             :                            !use_trimmer=.FALSE.,&
    7807           0 :                            )
    7808             : 
    7809             :                      END IF ! special case
    7810             : 
    7811             :                   ELSE
    7812             : 
    7813           0 :                      CALL dbcsr_copy(m_zet(ispin), m_residue(ispin))
    7814             : 
    7815             :                   END IF
    7816             : 
    7817             :                END DO !ispin
    7818             : 
    7819             :                ! compute the conjugation coefficient - beta
    7820             :                CALL compute_cg_beta( &
    7821             :                   beta=beta, &
    7822             :                   reset_conjugator=reset_conjugator, &
    7823             :                   conjugator=cg_fletcher, &
    7824             :                   grad=m_residue, &
    7825             :                   prev_grad=m_residue_prev, &
    7826             :                   step=m_zet, &
    7827           0 :                   prev_step=m_zet_prev)
    7828             : 
    7829           0 :                DO ispin = 1, nspins
    7830             : 
    7831             :                   ! conjugate the step direction
    7832           0 :                   CALL dbcsr_add(m_step(ispin), m_zet(ispin), beta, 1.0_dp)
    7833             : 
    7834             :                END DO !ispin
    7835             : 
    7836             :             END IF ! not.prepare_to_exit
    7837             : 
    7838           0 :             t2 = m_walltime()
    7839           0 :             IF (unit_nr > 0) THEN
    7840             :                !iter_type=TRIM("ALMO SCF "//iter_type)
    7841           0 :                iter_type = TRIM("NR STEP")
    7842             :                WRITE (unit_nr, '(T6,A9,I6,F14.5,F14.5,F15.10,F9.2)') &
    7843           0 :                   iter_type, iteration, &
    7844           0 :                   alpha, beta, residue_norm, &
    7845           0 :                   t2 - t1
    7846             :             END IF
    7847           0 :             t1 = m_walltime()
    7848             : 
    7849           0 :             iteration = iteration + 1
    7850           0 :             IF (prepare_to_exit) EXIT
    7851             : 
    7852             :          END DO ! inner loop
    7853             : 
    7854           0 :          IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
    7855           0 :             outer_prepare_to_exit = .TRUE.
    7856             :          END IF
    7857             : 
    7858           0 :          outer_iteration = outer_iteration + 1
    7859           0 :          IF (outer_prepare_to_exit) EXIT
    7860             : 
    7861             :       END DO ! outer loop
    7862             : 
    7863             : ! is not necessary if penalty_occ_vol_pf2=0.0
    7864             : #if 0
    7865             : 
    7866             :       IF (penalty_occ_vol) THEN
    7867             : 
    7868             :          DO ispin = 1, nspins
    7869             : 
    7870             :             CALL dbcsr_copy(m_zet(ispin), m_grad(ispin))
    7871             :             CALL dbcsr_dot(m_delta(ispin), m_zet(ispin), alpha)
    7872             :             WRITE (unit_nr, *) "trace(grad.delta): ", alpha
    7873             :             alpha = -1.0_dp/(penalty_occ_vol_pf2(ispin)*alpha - 1.0_dp)
    7874             :             WRITE (unit_nr, *) "correction alpha: ", alpha
    7875             :             CALL dbcsr_scale(m_delta(ispin), alpha)
    7876             : 
    7877             :          END DO
    7878             : 
    7879             :       END IF
    7880             : 
    7881             : #endif
    7882             : 
    7883           0 :       DO ispin = 1, nspins
    7884             : 
    7885             :          ! check whether the step lies entirely in R or Q
    7886             :          CALL dbcsr_create(m_tmp_oo_1, &
    7887             :                            template=m_siginv(ispin), &
    7888           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7889             :          CALL dbcsr_create(m_tmp_oo_2, &
    7890             :                            template=m_siginv(ispin), &
    7891           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7892             :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    7893             :                              m_ST(ispin), &
    7894             :                              m_delta(ispin), &
    7895             :                              0.0_dp, m_tmp_oo_1, &
    7896           0 :                              filter_eps=eps_filter)
    7897             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7898             :                              m_siginv(ispin), &
    7899             :                              m_tmp_oo_1, &
    7900             :                              0.0_dp, m_tmp_oo_2, &
    7901           0 :                              filter_eps=eps_filter)
    7902           0 :          CALL dbcsr_copy(m_zet(ispin), m_quench_t(ispin))
    7903             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7904             :                              m_t(ispin), &
    7905             :                              m_tmp_oo_2, &
    7906             :                              0.0_dp, m_zet(ispin), &
    7907           0 :                              retain_sparsity=.TRUE.)
    7908           0 :          alpha = dbcsr_maxabs(m_zet(ispin))
    7909           0 :          WRITE (unit_nr, "(A50,2F20.10)") "Occupied-space projection of the step", alpha
    7910           0 :          CALL dbcsr_add(m_zet(ispin), m_delta(ispin), -1.0_dp, 1.0_dp)
    7911           0 :          alpha = dbcsr_maxabs(m_zet(ispin))
    7912           0 :          WRITE (unit_nr, "(A50,2F20.10)") "Virtual-space projection of the step", alpha
    7913           0 :          alpha = dbcsr_maxabs(m_delta(ispin))
    7914           0 :          WRITE (unit_nr, "(A50,2F20.10)") "Full step", alpha
    7915           0 :          CALL dbcsr_release(m_tmp_oo_1)
    7916           0 :          CALL dbcsr_release(m_tmp_oo_2)
    7917             : 
    7918             :       END DO
    7919             : 
    7920             :       ! clean up
    7921           0 :       DO ispin = 1, nspins
    7922           0 :          CALL release_submatrices(domain_prec(:, ispin))
    7923           0 :          CALL dbcsr_release(m_residue(ispin))
    7924           0 :          CALL dbcsr_release(m_residue_prev(ispin))
    7925           0 :          CALL dbcsr_release(m_step(ispin))
    7926           0 :          CALL dbcsr_release(m_zet(ispin))
    7927           0 :          CALL dbcsr_release(m_zet_prev(ispin))
    7928           0 :          CALL dbcsr_release(m_Hstep(ispin))
    7929           0 :          CALL dbcsr_release(m_f_vo(ispin))
    7930           0 :          CALL dbcsr_release(m_f_vv(ispin))
    7931           0 :          CALL dbcsr_release(m_s_vv(ispin))
    7932           0 :          CALL dbcsr_release(m_prec(ispin))
    7933           0 :          CALL dbcsr_release(m_STsiginv(ispin))
    7934             :       END DO !ispin
    7935           0 :       DEALLOCATE (domain_prec)
    7936           0 :       DEALLOCATE (m_residue)
    7937           0 :       DEALLOCATE (m_residue_prev)
    7938           0 :       DEALLOCATE (m_step)
    7939           0 :       DEALLOCATE (m_zet)
    7940           0 :       DEALLOCATE (m_zet_prev)
    7941           0 :       DEALLOCATE (m_prec)
    7942           0 :       DEALLOCATE (m_Hstep)
    7943           0 :       DEALLOCATE (m_s_vv)
    7944           0 :       DEALLOCATE (m_f_vv)
    7945           0 :       DEALLOCATE (m_f_vo)
    7946           0 :       DEALLOCATE (m_STsiginv)
    7947           0 :       DEALLOCATE (residue_max_norm)
    7948             : 
    7949           0 :       IF (.NOT. converged) THEN
    7950           0 :          CPABORT("Optimization not converged!")
    7951             :       END IF
    7952             : 
    7953             :       ! check that the step satisfies H.step=-grad
    7954             : 
    7955           0 :       CALL timestop(handle)
    7956             : 
    7957           0 :    END SUBROUTINE newton_grad_to_step
    7958             : 
    7959             : ! *****************************************************************************
    7960             : !> \brief Computes Hessian.X
    7961             : !> \param m_x_in ...
    7962             : !> \param m_x_out ...
    7963             : !> \param m_ks ...
    7964             : !> \param m_s ...
    7965             : !> \param m_siginv ...
    7966             : !> \param m_quench_t ...
    7967             : !> \param m_FTsiginv ...
    7968             : !> \param m_siginvTFTsiginv ...
    7969             : !> \param m_ST ...
    7970             : !> \param m_STsiginv ...
    7971             : !> \param m_s_vv ...
    7972             : !> \param m_ks_vv ...
    7973             : !> \param m_g_full ...
    7974             : !> \param m_t ...
    7975             : !> \param m_sig_sqrti_ii ...
    7976             : !> \param penalty_occ_vol ...
    7977             : !> \param normalize_orbitals ...
    7978             : !> \param penalty_occ_vol_prefactor ...
    7979             : !> \param eps_filter ...
    7980             : !> \param path_num ...
    7981             : !> \par History
    7982             : !>       2015.04 created [Rustam Z Khaliullin]
    7983             : !> \author Rustam Z Khaliullin
    7984             : ! **************************************************************************************************
    7985           0 :    SUBROUTINE apply_hessian(m_x_in, m_x_out, m_ks, m_s, m_siginv, &
    7986           0 :                             m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_STsiginv, m_s_vv, &
    7987           0 :                             m_ks_vv, m_g_full, m_t, m_sig_sqrti_ii, penalty_occ_vol, &
    7988           0 :                             normalize_orbitals, penalty_occ_vol_prefactor, eps_filter, path_num)
    7989             : 
    7990             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_x_in, m_x_out, m_ks, m_s
    7991             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_siginv, m_quench_t, m_FTsiginv, &
    7992             :                                                             m_siginvTFTsiginv, m_ST, m_STsiginv
    7993             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_s_vv, m_ks_vv, m_g_full
    7994             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_t, m_sig_sqrti_ii
    7995             :       LOGICAL, INTENT(IN)                                :: penalty_occ_vol, normalize_orbitals
    7996             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: penalty_occ_vol_prefactor
    7997             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    7998             :       INTEGER, INTENT(IN)                                :: path_num
    7999             : 
    8000             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'apply_hessian'
    8001             : 
    8002             :       INTEGER                                            :: dim0, handle, ispin, nspins
    8003             :       REAL(KIND=dp)                                      :: penalty_prefactor_local, spin_factor
    8004           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal
    8005             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_no_2, m_tmp_oo_1, &
    8006             :                                                             m_tmp_x_in
    8007             : 
    8008           0 :       CALL timeset(routineN, handle)
    8009             : 
    8010             :       !JHU: test and use for unused debug variables
    8011           0 :       IF (penalty_occ_vol) penalty_prefactor_local = 1._dp
    8012           0 :       CPASSERT(SIZE(m_STsiginv) >= 0)
    8013           0 :       CPASSERT(SIZE(m_siginvTFTsiginv) >= 0)
    8014           0 :       CPASSERT(SIZE(m_s) >= 0)
    8015           0 :       CPASSERT(SIZE(m_g_full) >= 0)
    8016           0 :       CPASSERT(SIZE(m_FTsiginv) >= 0)
    8017             :       MARK_USED(m_siginvTFTsiginv)
    8018             :       MARK_USED(m_STsiginv)
    8019             :       MARK_USED(m_FTsiginv)
    8020             :       MARK_USED(m_g_full)
    8021             :       MARK_USED(m_s)
    8022             : 
    8023           0 :       nspins = SIZE(m_ks)
    8024             : 
    8025           0 :       IF (nspins .EQ. 1) THEN
    8026             :          spin_factor = 2.0_dp
    8027             :       ELSE
    8028           0 :          spin_factor = 1.0_dp
    8029             :       END IF
    8030             : 
    8031           0 :       DO ispin = 1, nspins
    8032             : 
    8033           0 :          penalty_prefactor_local = penalty_occ_vol_prefactor(ispin)/(2.0_dp*spin_factor)
    8034             : 
    8035             :          CALL dbcsr_create(m_tmp_oo_1, &
    8036             :                            template=m_siginv(ispin), &
    8037           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8038             :          CALL dbcsr_create(m_tmp_no_1, &
    8039             :                            template=m_quench_t(ispin), &
    8040           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8041             :          CALL dbcsr_create(m_tmp_no_2, &
    8042             :                            template=m_quench_t(ispin), &
    8043           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8044             :          CALL dbcsr_create(m_tmp_x_in, &
    8045             :                            template=m_quench_t(ispin), &
    8046           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8047             : 
    8048             :          ! transform the input X to take into account the normalization constraint
    8049           0 :          IF (normalize_orbitals) THEN
    8050             : 
    8051             :             ! H.D = ( (H.D) - ST.[tr(T).(H.D)]_ii ) . [sig_sqrti]_ii
    8052             : 
    8053             :             ! get [tr(T).HD]_ii
    8054           0 :             CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii(ispin))
    8055             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    8056             :                                 m_x_in(ispin), &
    8057             :                                 m_ST(ispin), &
    8058             :                                 0.0_dp, m_tmp_oo_1, &
    8059           0 :                                 retain_sparsity=.TRUE.)
    8060           0 :             CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), nfullrows_total=dim0)
    8061           0 :             ALLOCATE (tg_diagonal(dim0))
    8062           0 :             CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    8063           0 :             CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    8064           0 :             CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    8065           0 :             DEALLOCATE (tg_diagonal)
    8066             : 
    8067           0 :             CALL dbcsr_copy(m_tmp_no_1, m_x_in(ispin))
    8068             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    8069             :                                 m_t(ispin), &
    8070             :                                 m_tmp_oo_1, &
    8071             :                                 1.0_dp, m_tmp_no_1, &
    8072           0 :                                 filter_eps=eps_filter)
    8073             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8074             :                                 m_tmp_no_1, &
    8075             :                                 m_sig_sqrti_ii(ispin), &
    8076             :                                 0.0_dp, m_tmp_x_in, &
    8077           0 :                                 filter_eps=eps_filter)
    8078             : 
    8079             :          ELSE
    8080             : 
    8081           0 :             CALL dbcsr_copy(m_tmp_x_in, m_x_in(ispin))
    8082             : 
    8083             :          END IF ! normalize_orbitals
    8084             : 
    8085           0 :          IF (path_num .EQ. hessian_path_reuse) THEN
    8086             : 
    8087             :             ! apply pre-computed F_vv and S_vv to X
    8088             : 
    8089             : #if 0
    8090             : ! RZK-warning: negative sign at penalty_prefactor_local is that
    8091             : ! magical fix for the negative definite problem
    8092             : ! (since penalty_prefactor_local<0 the coeff before S_vv must
    8093             : ! be multiplied by -1 to take the step in the right direction)
    8094             : !CALL dbcsr_multiply("N","N",-4.0_dp*penalty_prefactor_local,&
    8095             : !        m_s_vv(ispin),&
    8096             : !        m_tmp_x_in,&
    8097             : !        0.0_dp,m_tmp_no_1,&
    8098             : !        filter_eps=eps_filter)
    8099             : !CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8100             : !CALL dbcsr_multiply("N","N",1.0_dp,&
    8101             : !        m_tmp_no_1,&
    8102             : !        m_siginv(ispin),&
    8103             : !        0.0_dp,m_x_out(ispin),&
    8104             : !        retain_sparsity=.TRUE.)
    8105             : 
    8106             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8107             :                                 m_s(1), &
    8108             :                                 m_tmp_x_in, &
    8109             :                                 0.0_dp, m_tmp_no_1, &
    8110             :                                 filter_eps=eps_filter)
    8111             :             CALL dbcsr_copy(m_x_out(ispin), m_quench_t(ispin))
    8112             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8113             :                                 m_tmp_no_1, &
    8114             :                                 m_siginv(ispin), &
    8115             :                                 0.0_dp, m_x_out(ispin), &
    8116             :                                 retain_sparsity=.TRUE.)
    8117             : 
    8118             : !CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8119             : !CALL dbcsr_multiply("N","N",1.0_dp,&
    8120             : !        m_s(1),&
    8121             : !        m_tmp_x_in,&
    8122             : !        0.0_dp,m_x_out(ispin),&
    8123             : !        retain_sparsity=.TRUE.)
    8124             : 
    8125             : #else
    8126             : 
    8127             :             ! debugging: only vv matrices, oo matrices are kronecker
    8128           0 :             CALL dbcsr_copy(m_x_out(ispin), m_quench_t(ispin))
    8129             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8130             :                                 m_ks_vv(ispin), &
    8131             :                                 m_tmp_x_in, &
    8132             :                                 0.0_dp, m_x_out(ispin), &
    8133           0 :                                 retain_sparsity=.TRUE.)
    8134             : 
    8135           0 :             CALL dbcsr_copy(m_tmp_no_2, m_quench_t(ispin))
    8136             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8137             :                                 m_s_vv(ispin), &
    8138             :                                 m_tmp_x_in, &
    8139             :                                 0.0_dp, m_tmp_no_2, &
    8140           0 :                                 retain_sparsity=.TRUE.)
    8141             :             CALL dbcsr_add(m_x_out(ispin), m_tmp_no_2, &
    8142           0 :                            1.0_dp, -4.0_dp*penalty_prefactor_local + 1.0_dp)
    8143             : #endif
    8144             : 
    8145             : !          ! F_vv.X.S_oo
    8146             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8147             : !                  m_ks_vv(ispin),&
    8148             : !                  m_tmp_x_in,&
    8149             : !                  0.0_dp,m_tmp_no_1,&
    8150             : !                  filter_eps=eps_filter,&
    8151             : !                  )
    8152             : !          CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8153             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8154             : !                  m_tmp_no_1,&
    8155             : !                  m_siginv(ispin),&
    8156             : !                  0.0_dp,m_x_out(ispin),&
    8157             : !                  retain_sparsity=.TRUE.,&
    8158             : !                  )
    8159             : !
    8160             : !          ! S_vv.X.F_oo
    8161             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8162             : !                  m_s_vv(ispin),&
    8163             : !                  m_tmp_x_in,&
    8164             : !                  0.0_dp,m_tmp_no_1,&
    8165             : !                  filter_eps=eps_filter,&
    8166             : !                  )
    8167             : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8168             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8169             : !                  m_tmp_no_1,&
    8170             : !                  m_siginvTFTsiginv(ispin),&
    8171             : !                  0.0_dp,m_tmp_no_2,&
    8172             : !                  retain_sparsity=.TRUE.,&
    8173             : !                  )
    8174             : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8175             : !               1.0_dp,-1.0_dp)
    8176             : !! we have to add occ voll penalty here (the Svv termi (i.e. both Svv.D.Soo)
    8177             : !!  and STsiginv terms)
    8178             : !
    8179             : !         ! S_vo.X^t.F_vo
    8180             : !          CALL dbcsr_multiply("T","N",1.0_dp,&
    8181             : !                  m_tmp_x_in,&
    8182             : !                  m_g_full(ispin),&
    8183             : !                  0.0_dp,m_tmp_oo_1,&
    8184             : !                  filter_eps=eps_filter,&
    8185             : !                  )
    8186             : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8187             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8188             : !                  m_STsiginv(ispin),&
    8189             : !                  m_tmp_oo_1,&
    8190             : !                  0.0_dp,m_tmp_no_2,&
    8191             : !                  retain_sparsity=.TRUE.,&
    8192             : !                  )
    8193             : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8194             : !                  1.0_dp,-1.0_dp)
    8195             : !
    8196             : !          ! S_vo.X^t.F_vo
    8197             : !          CALL dbcsr_multiply("T","N",1.0_dp,&
    8198             : !                  m_tmp_x_in,&
    8199             : !                  m_STsiginv(ispin),&
    8200             : !                  0.0_dp,m_tmp_oo_1,&
    8201             : !                  filter_eps=eps_filter,&
    8202             : !                  )
    8203             : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8204             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8205             : !                  m_g_full(ispin),&
    8206             : !                  m_tmp_oo_1,&
    8207             : !                  0.0_dp,m_tmp_no_2,&
    8208             : !                  retain_sparsity=.TRUE.,&
    8209             : !                  )
    8210             : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8211             : !                  1.0_dp,-1.0_dp)
    8212             : 
    8213           0 :          ELSE IF (path_num .EQ. hessian_path_assemble) THEN
    8214             : 
    8215             :             ! compute F_vv.X and S_vv.X directly
    8216             :             ! this path will be advantageous if the number
    8217             :             ! of PCG iterations is small
    8218           0 :             CPABORT("path is NYI")
    8219             : 
    8220             :          ELSE
    8221           0 :             CPABORT("illegal path")
    8222             :          END IF ! path
    8223             : 
    8224             :          ! transform the output to take into account the normalization constraint
    8225           0 :          IF (normalize_orbitals) THEN
    8226             : 
    8227             :             ! H.D = ( (H.D) - ST.[tr(T).(H.D)]_ii ) . [sig_sqrti]_ii
    8228             : 
    8229             :             ! get [tr(T).HD]_ii
    8230           0 :             CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii(ispin))
    8231             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    8232             :                                 m_t(ispin), &
    8233             :                                 m_x_out(ispin), &
    8234             :                                 0.0_dp, m_tmp_oo_1, &
    8235           0 :                                 retain_sparsity=.TRUE.)
    8236           0 :             CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), nfullrows_total=dim0)
    8237           0 :             ALLOCATE (tg_diagonal(dim0))
    8238           0 :             CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    8239           0 :             CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    8240           0 :             CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    8241           0 :             DEALLOCATE (tg_diagonal)
    8242             : 
    8243             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    8244             :                                 m_ST(ispin), &
    8245             :                                 m_tmp_oo_1, &
    8246             :                                 1.0_dp, m_x_out(ispin), &
    8247           0 :                                 retain_sparsity=.TRUE.)
    8248           0 :             CALL dbcsr_copy(m_tmp_no_1, m_x_out(ispin))
    8249             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8250             :                                 m_tmp_no_1, &
    8251             :                                 m_sig_sqrti_ii(ispin), &
    8252             :                                 0.0_dp, m_x_out(ispin), &
    8253           0 :                                 retain_sparsity=.TRUE.)
    8254             : 
    8255             :          END IF ! normalize_orbitals
    8256             : 
    8257             :          CALL dbcsr_scale(m_x_out(ispin), &
    8258           0 :                           2.0_dp*spin_factor)
    8259             : 
    8260           0 :          CALL dbcsr_release(m_tmp_oo_1)
    8261           0 :          CALL dbcsr_release(m_tmp_no_1)
    8262           0 :          CALL dbcsr_release(m_tmp_no_2)
    8263           0 :          CALL dbcsr_release(m_tmp_x_in)
    8264             : 
    8265             :       END DO !ispin
    8266             : 
    8267             :       ! there is one more part of the hessian that comes
    8268             :       ! from T-dependence of the KS matrix
    8269             :       ! it is neglected here
    8270             : 
    8271           0 :       CALL timestop(handle)
    8272             : 
    8273           0 :    END SUBROUTINE apply_hessian
    8274             : 
    8275             : ! *****************************************************************************
    8276             : !> \brief Serial code that constructs an approximate Hessian
    8277             : !> \param matrix_grad ...
    8278             : !> \param matrix_step ...
    8279             : !> \param matrix_S_ao ...
    8280             : !> \param matrix_F_ao ...
    8281             : !> \param matrix_S_mo ...
    8282             : !> \param matrix_F_mo ...
    8283             : !> \param matrix_S_vo ...
    8284             : !> \param matrix_F_vo ...
    8285             : !> \param quench_t ...
    8286             : !> \param penalty_occ_vol ...
    8287             : !> \param penalty_occ_vol_prefactor ...
    8288             : !> \param penalty_occ_vol_pf2 ...
    8289             : !> \param spin_factor ...
    8290             : !> \param eps_zero ...
    8291             : !> \param m_s ...
    8292             : !> \param para_env ...
    8293             : !> \param blacs_env ...
    8294             : !> \par History
    8295             : !>       2012.02 created [Rustam Z. Khaliullin]
    8296             : !> \author Rustam Z. Khaliullin
    8297             : ! **************************************************************************************************
    8298           0 :    SUBROUTINE hessian_diag_apply(matrix_grad, matrix_step, matrix_S_ao, &
    8299             :                                  matrix_F_ao, matrix_S_mo, matrix_F_mo, matrix_S_vo, matrix_F_vo, quench_t, &
    8300             :                                  penalty_occ_vol, penalty_occ_vol_prefactor, penalty_occ_vol_pf2, &
    8301             :                                  spin_factor, eps_zero, m_s, para_env, blacs_env)
    8302             : 
    8303             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_grad, matrix_step, matrix_S_ao, &
    8304             :                                                             matrix_F_ao, matrix_S_mo
    8305             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_F_mo
    8306             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_S_vo, matrix_F_vo, quench_t
    8307             :       LOGICAL, INTENT(IN)                                :: penalty_occ_vol
    8308             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, &
    8309             :                                                             penalty_occ_vol_pf2, spin_factor, &
    8310             :                                                             eps_zero
    8311             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_s
    8312             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    8313             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    8314             : 
    8315             :       CHARACTER(len=*), PARAMETER :: routineN = 'hessian_diag_apply'
    8316             : 
    8317             :       INTEGER :: ao_hori_offset, ao_vert_offset, block_col, block_row, col, H_size, handle, ii, &
    8318             :          INFO, jj, lev1_hori_offset, lev1_vert_offset, lev2_hori_offset, lev2_vert_offset, LWORK, &
    8319             :          nblkcols_tot, nblkrows_tot, ncores, orb_i, orb_j, row, unit_nr, zero_neg_eiv
    8320           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ao_block_sizes, ao_domain_sizes, &
    8321           0 :                                                             mo_block_sizes
    8322           0 :       INTEGER, DIMENSION(:), POINTER                     :: ao_blk_sizes, mo_blk_sizes
    8323             :       LOGICAL                                            :: found, found_col, found_row
    8324             :       REAL(KIND=dp)                                      :: penalty_prefactor_local, test_error
    8325           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: eigenvalues, Grad_vec, Step_vec, tmp, &
    8326           0 :                                                             tmpr, work
    8327           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: F_ao_block, F_mo_block, H, Hinv, &
    8328           0 :                                                             new_block, S_ao_block, S_mo_block, &
    8329           0 :                                                             test, test2
    8330           0 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
    8331             :       TYPE(cp_logger_type), POINTER                      :: logger
    8332             :       TYPE(dbcsr_distribution_type)                      :: main_dist
    8333             :       TYPE(dbcsr_type)                                   :: matrix_F_ao_sym, matrix_F_mo_sym, &
    8334             :                                                             matrix_S_ao_sym, matrix_S_mo_sym
    8335             : 
    8336           0 :       CALL timeset(routineN, handle)
    8337             : 
    8338             :       ! get a useful output_unit
    8339           0 :       logger => cp_get_default_logger()
    8340           0 :       IF (logger%para_env%is_source()) THEN
    8341           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    8342             :       ELSE
    8343             :          unit_nr = -1
    8344             :       END IF
    8345             : 
    8346             :       !JHU use and test for unused debug variables
    8347           0 :       CPASSERT(ASSOCIATED(blacs_env))
    8348           0 :       CPASSERT(ASSOCIATED(para_env))
    8349             :       MARK_USED(blacs_env)
    8350             :       MARK_USED(para_env)
    8351             : 
    8352           0 :       CALL dbcsr_get_info(m_s, row_blk_size=ao_blk_sizes)
    8353           0 :       CALL dbcsr_get_info(matrix_S_vo, row_blk_size=ao_blk_sizes)
    8354           0 :       CALL dbcsr_get_info(matrix_F_vo, row_blk_size=ao_blk_sizes)
    8355             : 
    8356             :       ! serial code only
    8357           0 :       CALL dbcsr_get_info(matrix=matrix_S_ao, distribution=main_dist)
    8358           0 :       CALL dbcsr_distribution_get(main_dist, numnodes=ncores)
    8359           0 :       IF (ncores .GT. 1) THEN
    8360           0 :          CPABORT("serial code only")
    8361             :       END IF
    8362             : 
    8363             :       CALL dbcsr_get_info(quench_t, row_blk_size=ao_blk_sizes, col_blk_size=mo_blk_sizes, &
    8364           0 :                           nblkrows_total=nblkrows_tot, nblkcols_total=nblkcols_tot)
    8365           0 :       CPASSERT(nblkrows_tot == nblkcols_tot)
    8366           0 :       ALLOCATE (mo_block_sizes(nblkcols_tot), ao_block_sizes(nblkcols_tot))
    8367           0 :       ALLOCATE (ao_domain_sizes(nblkcols_tot))
    8368           0 :       mo_block_sizes(:) = mo_blk_sizes(:)
    8369           0 :       ao_block_sizes(:) = ao_blk_sizes(:)
    8370           0 :       ao_domain_sizes(:) = 0
    8371             : 
    8372             :       CALL dbcsr_create(matrix_S_ao_sym, &
    8373             :                         template=matrix_S_ao, &
    8374           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8375           0 :       CALL dbcsr_desymmetrize(matrix_S_ao, matrix_S_ao_sym)
    8376           0 :       CALL dbcsr_scale(matrix_S_ao_sym, 2.0_dp*spin_factor)
    8377             : 
    8378             :       CALL dbcsr_create(matrix_F_ao_sym, &
    8379             :                         template=matrix_F_ao, &
    8380           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8381           0 :       CALL dbcsr_desymmetrize(matrix_F_ao, matrix_F_ao_sym)
    8382           0 :       CALL dbcsr_scale(matrix_F_ao_sym, 2.0_dp*spin_factor)
    8383             : 
    8384             :       CALL dbcsr_create(matrix_S_mo_sym, &
    8385             :                         template=matrix_S_mo, &
    8386           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8387           0 :       CALL dbcsr_desymmetrize(matrix_S_mo, matrix_S_mo_sym)
    8388             : 
    8389             :       CALL dbcsr_create(matrix_F_mo_sym, &
    8390             :                         template=matrix_F_mo, &
    8391           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8392           0 :       CALL dbcsr_desymmetrize(matrix_F_mo, matrix_F_mo_sym)
    8393             : 
    8394           0 :       IF (penalty_occ_vol) THEN
    8395           0 :          penalty_prefactor_local = penalty_occ_vol_prefactor/(2.0_dp*spin_factor)
    8396             :       ELSE
    8397           0 :          penalty_prefactor_local = 0.0_dp
    8398             :       END IF
    8399             : 
    8400           0 :       WRITE (unit_nr, *) "penalty_prefactor_local: ", penalty_prefactor_local
    8401           0 :       WRITE (unit_nr, *) "penalty_prefactor_2: ", penalty_occ_vol_pf2
    8402             : 
    8403             :       !CALL dbcsr_print(matrix_grad)
    8404             :       !CALL dbcsr_print(matrix_F_ao_sym)
    8405             :       !CALL dbcsr_print(matrix_S_ao_sym)
    8406             :       !CALL dbcsr_print(matrix_F_mo_sym)
    8407             :       !CALL dbcsr_print(matrix_S_mo_sym)
    8408             : 
    8409             :       ! loop over domains to find the size of the Hessian
    8410           0 :       H_size = 0
    8411           0 :       DO col = 1, nblkcols_tot
    8412             : 
    8413             :          ! find sizes of AO submatrices
    8414           0 :          DO row = 1, nblkrows_tot
    8415             : 
    8416             :             CALL dbcsr_get_block_p(quench_t, &
    8417           0 :                                    row, col, block_p, found)
    8418           0 :             IF (found) THEN
    8419           0 :                ao_domain_sizes(col) = ao_domain_sizes(col) + ao_blk_sizes(row)
    8420             :             END IF
    8421             : 
    8422             :          END DO
    8423             : 
    8424           0 :          H_size = H_size + ao_domain_sizes(col)*mo_block_sizes(col)
    8425             : 
    8426             :       END DO
    8427             : 
    8428           0 :       ALLOCATE (H(H_size, H_size))
    8429           0 :       H(:, :) = 0.0_dp
    8430             : 
    8431             :       ! fill the Hessian matrix
    8432           0 :       lev1_vert_offset = 0
    8433             :       ! loop over all pairs of fragments
    8434           0 :       DO row = 1, nblkcols_tot
    8435             : 
    8436           0 :          lev1_hori_offset = 0
    8437           0 :          DO col = 1, nblkcols_tot
    8438             : 
    8439             :             ! prepare blocks for the current row-column fragment pair
    8440           0 :             ALLOCATE (F_ao_block(ao_domain_sizes(row), ao_domain_sizes(col)))
    8441           0 :             ALLOCATE (S_ao_block(ao_domain_sizes(row), ao_domain_sizes(col)))
    8442           0 :             ALLOCATE (F_mo_block(mo_block_sizes(row), mo_block_sizes(col)))
    8443           0 :             ALLOCATE (S_mo_block(mo_block_sizes(row), mo_block_sizes(col)))
    8444             : 
    8445           0 :             F_ao_block(:, :) = 0.0_dp
    8446           0 :             S_ao_block(:, :) = 0.0_dp
    8447           0 :             F_mo_block(:, :) = 0.0_dp
    8448           0 :             S_mo_block(:, :) = 0.0_dp
    8449             : 
    8450             :             ! fill AO submatrices
    8451             :             ! loop over all blocks of the AO dbcsr matrix
    8452           0 :             ao_vert_offset = 0
    8453           0 :             DO block_row = 1, nblkcols_tot
    8454             : 
    8455             :                CALL dbcsr_get_block_p(quench_t, &
    8456           0 :                                       block_row, row, block_p, found_row)
    8457           0 :                IF (found_row) THEN
    8458             : 
    8459           0 :                   ao_hori_offset = 0
    8460           0 :                   DO block_col = 1, nblkcols_tot
    8461             : 
    8462             :                      CALL dbcsr_get_block_p(quench_t, &
    8463           0 :                                             block_col, col, block_p, found_col)
    8464           0 :                      IF (found_col) THEN
    8465             : 
    8466             :                         CALL dbcsr_get_block_p(matrix_F_ao_sym, &
    8467           0 :                                                block_row, block_col, block_p, found)
    8468           0 :                         IF (found) THEN
    8469             :                            ! copy the block into the submatrix
    8470             :                            F_ao_block(ao_vert_offset + 1:ao_vert_offset + ao_block_sizes(block_row), &
    8471             :                                       ao_hori_offset + 1:ao_hori_offset + ao_block_sizes(block_col)) &
    8472           0 :                               = block_p(:, :)
    8473             :                         END IF
    8474             : 
    8475             :                         CALL dbcsr_get_block_p(matrix_S_ao_sym, &
    8476           0 :                                                block_row, block_col, block_p, found)
    8477           0 :                         IF (found) THEN
    8478             :                            ! copy the block into the submatrix
    8479             :                            S_ao_block(ao_vert_offset + 1:ao_vert_offset + ao_block_sizes(block_row), &
    8480             :                                       ao_hori_offset + 1:ao_hori_offset + ao_block_sizes(block_col)) &
    8481           0 :                               = block_p(:, :)
    8482             :                         END IF
    8483             : 
    8484           0 :                         ao_hori_offset = ao_hori_offset + ao_block_sizes(block_col)
    8485             : 
    8486             :                      END IF
    8487             : 
    8488             :                   END DO
    8489             : 
    8490           0 :                   ao_vert_offset = ao_vert_offset + ao_block_sizes(block_row)
    8491             : 
    8492             :                END IF
    8493             : 
    8494             :             END DO
    8495             : 
    8496             :             ! fill MO submatrices
    8497           0 :             CALL dbcsr_get_block_p(matrix_F_mo_sym, row, col, block_p, found)
    8498           0 :             IF (found) THEN
    8499             :                ! copy the block into the submatrix
    8500           0 :                F_mo_block(1:mo_block_sizes(row), 1:mo_block_sizes(col)) = block_p(:, :)
    8501             :             END IF
    8502           0 :             CALL dbcsr_get_block_p(matrix_S_mo_sym, row, col, block_p, found)
    8503           0 :             IF (found) THEN
    8504             :                ! copy the block into the submatrix
    8505           0 :                S_mo_block(1:mo_block_sizes(row), 1:mo_block_sizes(col)) = block_p(:, :)
    8506             :             END IF
    8507             : 
    8508             :             !WRITE(*,*) "F_AO_BLOCK", row, col, ao_domain_sizes(row), ao_domain_sizes(col)
    8509             :             !DO ii=1,ao_domain_sizes(row)
    8510             :             !  WRITE(*,'(100F13.9)') F_ao_block(ii,:)
    8511             :             !ENDDO
    8512             :             !WRITE(*,*) "S_AO_BLOCK", row, col
    8513             :             !DO ii=1,ao_domain_sizes(row)
    8514             :             !  WRITE(*,'(100F13.9)') S_ao_block(ii,:)
    8515             :             !ENDDO
    8516             :             !WRITE(*,*) "F_MO_BLOCK", row, col
    8517             :             !DO ii=1,mo_block_sizes(row)
    8518             :             !  WRITE(*,'(100F13.9)') F_mo_block(ii,:)
    8519             :             !ENDDO
    8520             :             !WRITE(*,*) "S_MO_BLOCK", row, col, mo_block_sizes(row), mo_block_sizes(col)
    8521             :             !DO ii=1,mo_block_sizes(row)
    8522             :             !  WRITE(*,'(100F13.9)') S_mo_block(ii,:)
    8523             :             !ENDDO
    8524             : 
    8525             :             ! construct tensor products for the current row-column fragment pair
    8526             :             lev2_vert_offset = 0
    8527           0 :             DO orb_j = 1, mo_block_sizes(row)
    8528             : 
    8529             :                lev2_hori_offset = 0
    8530           0 :                DO orb_i = 1, mo_block_sizes(col)
    8531           0 :                   IF (orb_i .EQ. orb_j .AND. row .EQ. col) THEN
    8532             :                      H(lev1_vert_offset + lev2_vert_offset + 1:lev1_vert_offset + lev2_vert_offset + ao_domain_sizes(row), &
    8533             :                        lev1_hori_offset + lev2_hori_offset + 1:lev1_hori_offset + lev2_hori_offset + ao_domain_sizes(col)) &
    8534             :                         != -penalty_prefactor_local*S_ao_block(:,:)
    8535           0 :                         = F_ao_block(:, :) + S_ao_block(:, :)
    8536             : !=S_ao_block(:,:)
    8537             : !RZK-warning               =F_ao_block(:,:)+( 1.0_dp + penalty_prefactor_local )*S_ao_block(:,:)
    8538             : !               =S_mo_block(orb_j,orb_i)*F_ao_block(:,:)&
    8539             : !               -F_mo_block(orb_j,orb_i)*S_ao_block(:,:)&
    8540             : !               +penalty_prefactor_local*S_mo_block(orb_j,orb_i)*S_ao_block(:,:)
    8541             :                   END IF
    8542             :                   !WRITE(*,*) row, col, orb_j, orb_i, lev1_vert_offset+lev2_vert_offset+1, ao_domain_sizes(row),&
    8543             :                   !   lev1_hori_offset+lev2_hori_offset+1, ao_domain_sizes(col), S_mo_block(orb_j,orb_i)
    8544             : 
    8545           0 :                   lev2_hori_offset = lev2_hori_offset + ao_domain_sizes(col)
    8546             : 
    8547             :                END DO
    8548             : 
    8549           0 :                lev2_vert_offset = lev2_vert_offset + ao_domain_sizes(row)
    8550             : 
    8551             :             END DO
    8552             : 
    8553           0 :             lev1_hori_offset = lev1_hori_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8554             : 
    8555           0 :             DEALLOCATE (F_ao_block)
    8556           0 :             DEALLOCATE (S_ao_block)
    8557           0 :             DEALLOCATE (F_mo_block)
    8558           0 :             DEALLOCATE (S_mo_block)
    8559             : 
    8560             :          END DO ! col fragment
    8561             : 
    8562           0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(row)*mo_block_sizes(row)
    8563             : 
    8564             :       END DO ! row fragment
    8565             : 
    8566           0 :       CALL dbcsr_release(matrix_S_ao_sym)
    8567           0 :       CALL dbcsr_release(matrix_F_ao_sym)
    8568           0 :       CALL dbcsr_release(matrix_S_mo_sym)
    8569           0 :       CALL dbcsr_release(matrix_F_mo_sym)
    8570             : 
    8571             : !!    ! Two more terms of the Hessian: S_vo.D.F_vo and F_vo.D.S_vo
    8572             : !!    ! It seems that these terms break positive definite property of the Hessian
    8573             : !!    ALLOCATE(H1(H_size,H_size))
    8574             : !!    ALLOCATE(H2(H_size,H_size))
    8575             : !!    H1=0.0_dp
    8576             : !!    H2=0.0_dp
    8577             : !!    DO row = 1, nblkcols_tot
    8578             : !!
    8579             : !!       lev1_hori_offset=0
    8580             : !!       DO col = 1, nblkcols_tot
    8581             : !!
    8582             : !!          CALL dbcsr_get_block_p(matrix_F_vo,&
    8583             : !!                  row, col, block_p, found)
    8584             : !!          CALL dbcsr_get_block_p(matrix_S_vo,&
    8585             : !!                  row, col, block_p2, found2)
    8586             : !!
    8587             : !!          lev1_vert_offset=0
    8588             : !!          DO block_col = 1, nblkcols_tot
    8589             : !!
    8590             : !!             CALL dbcsr_get_block_p(quench_t,&
    8591             : !!                     row, block_col, p_new_block, found_row)
    8592             : !!
    8593             : !!             IF (found_row) THEN
    8594             : !!
    8595             : !!                ! determine offset in this short loop
    8596             : !!                lev2_vert_offset=0
    8597             : !!                DO block_row=1,row-1
    8598             : !!                   CALL dbcsr_get_block_p(quench_t,&
    8599             : !!                           block_row, block_col, p_new_block, found_col)
    8600             : !!                   IF (found_col) lev2_vert_offset=lev2_vert_offset+ao_block_sizes(block_row)
    8601             : !!                ENDDO
    8602             : !!                !!!!!!!! short loop
    8603             : !!
    8604             : !!                ! over all electrons of the block
    8605             : !!                DO orb_i=1, mo_block_sizes(col)
    8606             : !!
    8607             : !!                   ! into all possible locations
    8608             : !!                   DO orb_j=1, mo_block_sizes(block_col)
    8609             : !!
    8610             : !!                      ! column is copied several times
    8611             : !!                      DO copy=1, ao_domain_sizes(col)
    8612             : !!
    8613             : !!                         IF (found) THEN
    8614             : !!
    8615             : !!                            !WRITE(*,*) row, col, block_col, orb_i, orb_j, copy,&
    8616             : !!                            ! lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1,&
    8617             : !!                            ! lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy
    8618             : !!
    8619             : !!                            H1( lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1:&
    8620             : !!                                lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+ao_block_sizes(row),&
    8621             : !!                                lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy )&
    8622             : !!                              =block_p(:,orb_i)
    8623             : !!
    8624             : !!                         ENDIF ! found block in the data matrix
    8625             : !!
    8626             : !!                         IF (found2) THEN
    8627             : !!
    8628             : !!                            H2( lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1:&
    8629             : !!                                lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+ao_block_sizes(row),&
    8630             : !!                                lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy )&
    8631             : !!                              =block_p2(:,orb_i)
    8632             : !!
    8633             : !!                         ENDIF ! found block in the data matrix
    8634             : !!
    8635             : !!                      ENDDO
    8636             : !!
    8637             : !!                   ENDDO
    8638             : !!
    8639             : !!                ENDDO
    8640             : !!
    8641             : !!                !lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8642             : !!
    8643             : !!             ENDIF ! found block in the quench matrix
    8644             : !!
    8645             : !!             lev1_vert_offset=lev1_vert_offset+&
    8646             : !!                ao_domain_sizes(block_col)*mo_block_sizes(block_col)
    8647             : !!
    8648             : !!          ENDDO
    8649             : !!
    8650             : !!          lev1_hori_offset=lev1_hori_offset+&
    8651             : !!             ao_domain_sizes(col)*mo_block_sizes(col)
    8652             : !!
    8653             : !!       ENDDO
    8654             : !!
    8655             : !!       !lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8656             : !!
    8657             : !!    ENDDO
    8658             : !!    H1(:,:)=H1(:,:)*2.0_dp*spin_factor
    8659             : !!    !!!WRITE(*,*) "F_vo"
    8660             : !!    !!!DO ii=1,H_size
    8661             : !!    !!! WRITE(*,'(100F13.9)') H1(ii,:)
    8662             : !!    !!!ENDDO
    8663             : !!    !!!WRITE(*,*) "S_vo"
    8664             : !!    !!!DO ii=1,H_size
    8665             : !!    !!! WRITE(*,'(100F13.9)') H2(ii,:)
    8666             : !!    !!!ENDDO
    8667             : !!    !!!!! add terms to the hessian
    8668             : !!    DO ii=1,H_size
    8669             : !!       DO jj=1,H_size
    8670             : !!! add penalty_occ_vol term
    8671             : !!          H(ii,jj)=H(ii,jj)-H1(ii,jj)*H2(jj,ii)-H1(jj,ii)*H2(ii,jj)
    8672             : !!       ENDDO
    8673             : !!    ENDDO
    8674             : !!    DEALLOCATE(H1)
    8675             : !!    DEALLOCATE(H2)
    8676             : 
    8677             : !!    ! S_vo.S_vo diagonal component due to determiant constraint
    8678             : !!    ! use grad vector temporarily
    8679             : !!    IF (penalty_occ_vol) THEN
    8680             : !!       ALLOCATE(Grad_vec(H_size))
    8681             : !!       Grad_vec(:)=0.0_dp
    8682             : !!       lev1_vert_offset=0
    8683             : !!       ! loop over all electron blocks
    8684             : !!       DO col = 1, nblkcols_tot
    8685             : !!
    8686             : !!          ! loop over AO-rows of the dbcsr matrix
    8687             : !!          lev2_vert_offset=0
    8688             : !!          DO row = 1, nblkrows_tot
    8689             : !!
    8690             : !!             CALL dbcsr_get_block_p(quench_t,&
    8691             : !!                     row, col, block_p, found_row)
    8692             : !!             IF (found_row) THEN
    8693             : !!
    8694             : !!                CALL dbcsr_get_block_p(matrix_S_vo,&
    8695             : !!                        row, col, block_p, found)
    8696             : !!                IF (found) THEN
    8697             : !!                   ! copy the data into the vector, column by column
    8698             : !!                   DO orb_i=1, mo_block_sizes(col)
    8699             : !!                      Grad_vec(lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1:&
    8700             : !!                               lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+ao_block_sizes(row))&
    8701             : !!                               =block_p(:,orb_i)
    8702             : !!                   ENDDO
    8703             : !!
    8704             : !!                ENDIF
    8705             : !!
    8706             : !!                lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8707             : !!
    8708             : !!             ENDIF
    8709             : !!
    8710             : !!          ENDDO
    8711             : !!
    8712             : !!          lev1_vert_offset=lev1_vert_offset+ao_domain_sizes(col)*mo_block_sizes(col)
    8713             : !!
    8714             : !!       ENDDO ! loop over electron blocks
    8715             : !!       ! update H now
    8716             : !!       DO ii=1,H_size
    8717             : !!          DO jj=1,H_size
    8718             : !!             H(ii,jj)=H(ii,jj)+penalty_occ_vol_prefactor*&
    8719             : !!                      penalty_occ_vol_pf2*Grad_vec(ii)*Grad_vec(jj)
    8720             : !!          ENDDO
    8721             : !!       ENDDO
    8722             : !!       DEALLOCATE(Grad_vec)
    8723             : !!    ENDIF ! penalty_occ_vol
    8724             : 
    8725             : !S-1.G ! invert S using cholesky
    8726             : !S-1.G CALL dbcsr_create(m_prec_out,&
    8727             : !S-1.G         template=m_s,&
    8728             : !S-1.G         matrix_type=dbcsr_type_no_symmetry)
    8729             : !S-1.G CALL dbcsr_copy(m_prec_out,m_s)
    8730             : !S-1.G CALL dbcsr_cholesky_decompose(m_prec_out,&
    8731             : !S-1.G         para_env=para_env,&
    8732             : !S-1.G         blacs_env=blacs_env)
    8733             : !S-1.G CALL dbcsr_cholesky_invert(m_prec_out,&
    8734             : !S-1.G         para_env=para_env,&
    8735             : !S-1.G         blacs_env=blacs_env,&
    8736             : !S-1.G         uplo_to_full=.TRUE.)
    8737             : !S-1.G CALL dbcsr_multiply("N","N",1.0_dp,&
    8738             : !S-1.G         m_prec_out,&
    8739             : !S-1.G         matrix_grad,&
    8740             : !S-1.G         0.0_dp,matrix_step,&
    8741             : !S-1.G         filter_eps=1.0E-10_dp)
    8742             : !S-1.G !CALL dbcsr_release(m_prec_out)
    8743             : !S-1.G ALLOCATE(test3(H_size))
    8744             : 
    8745             :       ! convert gradient from the dbcsr matrix to the vector form
    8746           0 :       ALLOCATE (Grad_vec(H_size))
    8747           0 :       Grad_vec(:) = 0.0_dp
    8748           0 :       lev1_vert_offset = 0
    8749             :       ! loop over all electron blocks
    8750           0 :       DO col = 1, nblkcols_tot
    8751             : 
    8752             :          ! loop over AO-rows of the dbcsr matrix
    8753           0 :          lev2_vert_offset = 0
    8754           0 :          DO row = 1, nblkrows_tot
    8755             : 
    8756             :             CALL dbcsr_get_block_p(quench_t, &
    8757           0 :                                    row, col, block_p, found_row)
    8758           0 :             IF (found_row) THEN
    8759             : 
    8760             :                CALL dbcsr_get_block_p(matrix_grad, &
    8761           0 :                                       row, col, block_p, found)
    8762           0 :                IF (found) THEN
    8763             :                   ! copy the data into the vector, column by column
    8764           0 :                   DO orb_i = 1, mo_block_sizes(col)
    8765             :                      Grad_vec(lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + 1: &
    8766             :                               lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + ao_block_sizes(row)) &
    8767           0 :                         = block_p(:, orb_i)
    8768             : !WRITE(*,*) "GRAD: ", row, col, orb_i, lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1, ao_block_sizes(row)
    8769             :                   END DO
    8770             : 
    8771             :                END IF
    8772             : 
    8773             : !S-1.G CALL dbcsr_get_block_p(matrix_step,&
    8774             : !S-1.G         row, col, block_p, found)
    8775             : !S-1.G IF (found) THEN
    8776             : !S-1.G    ! copy the data into the vector, column by column
    8777             : !S-1.G    DO orb_i=1, mo_block_sizes(col)
    8778             : !S-1.G       test3(lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1:&
    8779             : !S-1.G                lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+ao_block_sizes(row))&
    8780             : !S-1.G                =block_p(:,orb_i)
    8781             : !S-1.G    ENDDO
    8782             : !S-1.G ENDIF
    8783             : 
    8784           0 :                lev2_vert_offset = lev2_vert_offset + ao_block_sizes(row)
    8785             : 
    8786             :             END IF
    8787             : 
    8788             :          END DO
    8789             : 
    8790           0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8791             : 
    8792             :       END DO ! loop over electron blocks
    8793             : 
    8794             :       !WRITE(*,*) "HESSIAN"
    8795             :       !DO ii=1,H_size
    8796             :       ! WRITE(*,*) ii
    8797             :       ! WRITE(*,'(20F14.10)') H(ii,:)
    8798             :       !ENDDO
    8799             : 
    8800             :       ! invert the Hessian
    8801           0 :       INFO = 0
    8802           0 :       ALLOCATE (Hinv(H_size, H_size))
    8803           0 :       Hinv(:, :) = H(:, :)
    8804             : 
    8805             :       ! before inverting diagonalize
    8806           0 :       ALLOCATE (eigenvalues(H_size))
    8807             :       ! Query the optimal workspace for dsyev
    8808           0 :       LWORK = -1
    8809           0 :       ALLOCATE (WORK(MAX(1, LWORK)))
    8810           0 :       CALL dsyev('V', 'L', H_size, Hinv, H_size, eigenvalues, WORK, LWORK, INFO)
    8811           0 :       LWORK = INT(WORK(1))
    8812           0 :       DEALLOCATE (WORK)
    8813             :       ! Allocate the workspace and solve the eigenproblem
    8814           0 :       ALLOCATE (WORK(MAX(1, LWORK)))
    8815           0 :       CALL dsyev('V', 'L', H_size, Hinv, H_size, eigenvalues, WORK, LWORK, INFO)
    8816           0 :       IF (INFO .NE. 0) THEN
    8817           0 :          WRITE (unit_nr, *) 'DSYEV ERROR MESSAGE: ', INFO
    8818           0 :          CPABORT("DSYEV failed")
    8819             :       END IF
    8820           0 :       DEALLOCATE (WORK)
    8821             : 
    8822             :       ! compute grad vector in the basis of Hessian eigenvectors
    8823           0 :       ALLOCATE (Step_vec(H_size))
    8824             :       ! Step_vec contains Grad_vec here
    8825           0 :       Step_vec(:) = MATMUL(TRANSPOSE(Hinv), Grad_vec)
    8826             : 
    8827             :       ! compute U.tr(U)-1 = error
    8828             :       !ALLOCATE(test(H_size,H_size))
    8829             :       !test(:,:)=MATMUL(TRANSPOSE(Hinv),Hinv)
    8830             :       !DO ii=1,H_size
    8831             :       !   test(ii,ii)=test(ii,ii)-1.0_dp
    8832             :       !ENDDO
    8833             :       !test_error=0.0_dp
    8834             :       !DO ii=1,H_size
    8835             :       !   DO jj=1,H_size
    8836             :       !      test_error=test_error+test(jj,ii)*test(jj,ii)
    8837             :       !   ENDDO
    8838             :       !ENDDO
    8839             :       !WRITE(*,*) "U.tr(U)-1 error: ", SQRT(test_error)
    8840             :       !DEALLOCATE(test)
    8841             : 
    8842             :       ! invert eigenvalues and use eigenvectors to compute the Hessian inverse
    8843             :       ! project out zero-eigenvalue directions
    8844           0 :       ALLOCATE (test(H_size, H_size))
    8845           0 :       zero_neg_eiv = 0
    8846           0 :       DO jj = 1, H_size
    8847           0 :          WRITE (unit_nr, "(I10,F20.10,F20.10)") jj, eigenvalues(jj), Step_vec(jj)
    8848           0 :          IF (eigenvalues(jj) .GT. eps_zero) THEN
    8849           0 :             test(jj, :) = Hinv(:, jj)/eigenvalues(jj)
    8850             :          ELSE
    8851           0 :             test(jj, :) = Hinv(:, jj)*0.0_dp
    8852           0 :             zero_neg_eiv = zero_neg_eiv + 1
    8853             :          END IF
    8854             :       END DO
    8855           0 :       WRITE (unit_nr, *) 'ZERO OR NEGATIVE EIGENVALUES: ', zero_neg_eiv
    8856           0 :       DEALLOCATE (Step_vec)
    8857             : 
    8858           0 :       ALLOCATE (test2(H_size, H_size))
    8859           0 :       test2(:, :) = MATMUL(Hinv, test)
    8860           0 :       Hinv(:, :) = test2(:, :)
    8861           0 :       DEALLOCATE (test, test2)
    8862             : 
    8863             :       !! shift to kill singularity
    8864             :       !shift=0.0_dp
    8865             :       !IF (eigenvalues(1).lt.0.0_dp) THEN
    8866             :       !   CPABORT("Negative eigenvalue(s)")
    8867             :       !   shift=abs(eigenvalues(1))
    8868             :       !   WRITE(*,*) "Lowest eigenvalue: ", eigenvalues(1)
    8869             :       !ENDIF
    8870             :       !DO ii=1, H_size
    8871             :       !   IF (eigenvalues(ii).gt.eps_zero) THEN
    8872             :       !      shift=shift+min(1.0_dp,eigenvalues(ii))*1.0E-4_dp
    8873             :       !      EXIT
    8874             :       !   ENDIF
    8875             :       !ENDDO
    8876             :       !WRITE(*,*) "Hessian shift: ", shift
    8877             :       !DO ii=1, H_size
    8878             :       !   H(ii,ii)=H(ii,ii)+shift
    8879             :       !ENDDO
    8880             :       !! end shift
    8881             : 
    8882           0 :       DEALLOCATE (eigenvalues)
    8883             : 
    8884             : !!!!    Hinv=H
    8885             : !!!!    INFO=0
    8886             : !!!!    CALL dpotrf('L', H_size, Hinv, H_size, INFO )
    8887             : !!!!    IF( INFO.NE.0 ) THEN
    8888             : !!!!       WRITE(*,*) 'DPOTRF ERROR MESSAGE: ', INFO
    8889             : !!!!       CPABORT("DPOTRF failed")
    8890             : !!!!    END IF
    8891             : !!!!    CALL dpotri('L', H_size, Hinv, H_size, INFO )
    8892             : !!!!    IF( INFO.NE.0 ) THEN
    8893             : !!!!       WRITE(*,*) 'DPOTRI ERROR MESSAGE: ', INFO
    8894             : !!!!       CPABORT("DPOTRI failed")
    8895             : !!!!    END IF
    8896             : !!!!    ! complete the matrix
    8897             : !!!!    DO ii=1,H_size
    8898             : !!!!       DO jj=ii+1,H_size
    8899             : !!!!          Hinv(ii,jj)=Hinv(jj,ii)
    8900             : !!!!       ENDDO
    8901             : !!!!    ENDDO
    8902             : 
    8903             :       ! compute the inversion error
    8904           0 :       ALLOCATE (test(H_size, H_size))
    8905           0 :       test(:, :) = MATMUL(Hinv, H)
    8906           0 :       DO ii = 1, H_size
    8907           0 :          test(ii, ii) = test(ii, ii) - 1.0_dp
    8908             :       END DO
    8909           0 :       test_error = 0.0_dp
    8910           0 :       DO ii = 1, H_size
    8911           0 :          DO jj = 1, H_size
    8912           0 :             test_error = test_error + test(jj, ii)*test(jj, ii)
    8913             :          END DO
    8914             :       END DO
    8915           0 :       WRITE (unit_nr, *) "Hessian inversion error: ", SQRT(test_error)
    8916           0 :       DEALLOCATE (test)
    8917             : 
    8918             :       ! prepare the output vector
    8919           0 :       ALLOCATE (Step_vec(H_size))
    8920           0 :       ALLOCATE (tmp(H_size))
    8921           0 :       tmp(:) = MATMUL(Hinv, Grad_vec)
    8922             :       !tmp(:)=MATMUL(Hinv,test3)
    8923           0 :       Step_vec(:) = -1.0_dp*tmp(:)
    8924             : 
    8925           0 :       ALLOCATE (tmpr(H_size))
    8926           0 :       tmpr(:) = MATMUL(H, Step_vec)
    8927           0 :       tmp(:) = tmpr(:) + Grad_vec(:)
    8928           0 :       DEALLOCATE (tmpr)
    8929           0 :       WRITE (unit_nr, *) "NEWTOV step error: ", MAXVAL(ABS(tmp))
    8930             : 
    8931           0 :       DEALLOCATE (tmp)
    8932             : 
    8933           0 :       DEALLOCATE (H)
    8934           0 :       DEALLOCATE (Hinv)
    8935           0 :       DEALLOCATE (Grad_vec)
    8936             : 
    8937             : !S-1.G DEALLOCATE(test3)
    8938             : 
    8939             :       ! copy the step from the vector into the dbcsr matrix
    8940             : 
    8941             :       ! re-create the step matrix to remove all blocks
    8942             :       CALL dbcsr_create(matrix_step, &
    8943             :                         template=matrix_grad, &
    8944           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8945           0 :       CALL dbcsr_work_create(matrix_step, work_mutable=.TRUE.)
    8946             : 
    8947           0 :       lev1_vert_offset = 0
    8948             :       ! loop over all electron blocks
    8949           0 :       DO col = 1, nblkcols_tot
    8950             : 
    8951             :          ! loop over AO-rows of the dbcsr matrix
    8952           0 :          lev2_vert_offset = 0
    8953           0 :          DO row = 1, nblkrows_tot
    8954             : 
    8955             :             CALL dbcsr_get_block_p(quench_t, &
    8956           0 :                                    row, col, block_p, found_row)
    8957           0 :             IF (found_row) THEN
    8958             :                ! copy the data column by column
    8959           0 :                ALLOCATE (new_block(ao_block_sizes(row), mo_block_sizes(col)))
    8960           0 :                DO orb_i = 1, mo_block_sizes(col)
    8961             :                   new_block(:, orb_i) = &
    8962             :                      Step_vec(lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + 1: &
    8963           0 :                               lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + ao_block_sizes(row))
    8964             :                END DO
    8965           0 :                CALL dbcsr_put_block(matrix_step, row, col, new_block)
    8966           0 :                DEALLOCATE (new_block)
    8967           0 :                lev2_vert_offset = lev2_vert_offset + ao_block_sizes(row)
    8968             :             END IF
    8969             : 
    8970             :          END DO
    8971             : 
    8972           0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8973             : 
    8974             :       END DO ! loop over electron blocks
    8975             : 
    8976           0 :       DEALLOCATE (Step_vec)
    8977             : 
    8978           0 :       CALL dbcsr_finalize(matrix_step)
    8979             : 
    8980             : !S-1.G CALL dbcsr_create(m_tmp_no_1,&
    8981             : !S-1.G         template=matrix_step,&
    8982             : !S-1.G         matrix_type=dbcsr_type_no_symmetry)
    8983             : !S-1.G CALL dbcsr_multiply("N","N",1.0_dp,&
    8984             : !S-1.G         m_prec_out,&
    8985             : !S-1.G         matrix_step,&
    8986             : !S-1.G         0.0_dp,m_tmp_no_1,&
    8987             : !S-1.G         filter_eps=1.0E-10_dp,&
    8988             : !S-1.G         )
    8989             : !S-1.G CALL dbcsr_copy(matrix_step,m_tmp_no_1)
    8990             : !S-1.G CALL dbcsr_release(m_tmp_no_1)
    8991             : !S-1.G CALL dbcsr_release(m_prec_out)
    8992             : 
    8993           0 :       DEALLOCATE (mo_block_sizes, ao_block_sizes)
    8994           0 :       DEALLOCATE (ao_domain_sizes)
    8995             : 
    8996             :       CALL dbcsr_create(matrix_S_ao_sym, &
    8997             :                         template=quench_t, &
    8998           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8999           0 :       CALL dbcsr_copy(matrix_S_ao_sym, quench_t)
    9000             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9001             :                           matrix_F_ao, &
    9002             :                           matrix_step, &
    9003             :                           0.0_dp, matrix_S_ao_sym, &
    9004           0 :                           retain_sparsity=.TRUE.)
    9005             :       CALL dbcsr_create(matrix_F_ao_sym, &
    9006             :                         template=quench_t, &
    9007           0 :                         matrix_type=dbcsr_type_no_symmetry)
    9008           0 :       CALL dbcsr_copy(matrix_F_ao_sym, quench_t)
    9009             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9010             :                           matrix_S_ao, &
    9011             :                           matrix_step, &
    9012             :                           0.0_dp, matrix_F_ao_sym, &
    9013           0 :                           retain_sparsity=.TRUE.)
    9014             :       CALL dbcsr_add(matrix_S_ao_sym, matrix_F_ao_sym, &
    9015           0 :                      1.0_dp, 1.0_dp)
    9016           0 :       CALL dbcsr_scale(matrix_S_ao_sym, 2.0_dp*spin_factor)
    9017             :       CALL dbcsr_add(matrix_S_ao_sym, matrix_grad, &
    9018           0 :                      1.0_dp, 1.0_dp)
    9019           0 :       test_error = dbcsr_maxabs(matrix_S_ao_sym)
    9020           0 :       WRITE (unit_nr, *) "NEWTOL step error: ", test_error
    9021           0 :       CALL dbcsr_release(matrix_S_ao_sym)
    9022           0 :       CALL dbcsr_release(matrix_F_ao_sym)
    9023             : 
    9024           0 :       CALL timestop(handle)
    9025             : 
    9026           0 :    END SUBROUTINE hessian_diag_apply
    9027             : 
    9028             : ! **************************************************************************************************
    9029             : !> \brief Optimization of ALMOs using trust region minimizers
    9030             : !> \param qs_env ...
    9031             : !> \param almo_scf_env ...
    9032             : !> \param optimizer   controls the optimization algorithm
    9033             : !> \param quench_t ...
    9034             : !> \param matrix_t_in ...
    9035             : !> \param matrix_t_out ...
    9036             : !> \param perturbation_only - perturbative (do not update Hamiltonian)
    9037             : !> \param special_case   to reduce the overhead special cases are implemented:
    9038             : !>                       xalmo_case_normal - no special case (i.e. xALMOs)
    9039             : !>                       xalmo_case_block_diag
    9040             : !>                       xalmo_case_fully_deloc
    9041             : !> \par History
    9042             : !>       2020.01 created [Rustam Z Khaliullin]
    9043             : !> \author Rustam Z Khaliullin
    9044             : ! **************************************************************************************************
    9045          18 :    SUBROUTINE almo_scf_xalmo_trustr(qs_env, almo_scf_env, optimizer, quench_t, &
    9046             :                                     matrix_t_in, matrix_t_out, perturbation_only, &
    9047             :                                     special_case)
    9048             : 
    9049             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    9050             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    9051             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
    9052             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: quench_t, matrix_t_in, matrix_t_out
    9053             :       LOGICAL, INTENT(IN)                                :: perturbation_only
    9054             :       INTEGER, INTENT(IN), OPTIONAL                      :: special_case
    9055             : 
    9056             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_trustr'
    9057             : 
    9058             :       INTEGER :: handle, ispin, iteration, iteration_type_to_report, my_special_case, ndomains, &
    9059             :          nspins, outer_iteration, prec_type, unit_nr
    9060          18 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
    9061             :       LOGICAL :: assume_t0_q0x, border_reached, inner_loop_success, normalize_orbitals, &
    9062             :          optimize_theta, penalty_occ_vol, reset_conjugator, same_position, scf_converged
    9063             :       REAL(kind=dp) :: beta, energy_start, energy_trial, eta, expected_reduction, &
    9064             :          fake_step_size_to_report, grad_norm_ratio, grad_norm_ref, loss_change_to_report, &
    9065             :          loss_start, loss_trial, model_grad_norm, penalty_amplitude, penalty_start, penalty_trial, &
    9066             :          radius_current, radius_max, real_temp, rho, spin_factor, step_norm, step_size, t1, &
    9067             :          t1outer, t2, t2outer, y_scalar
    9068          18 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: grad_norm_spin, &
    9069          18 :                                                             penalty_occ_vol_g_prefactor, &
    9070          18 :                                                             penalty_occ_vol_h_prefactor
    9071             :       TYPE(cp_logger_type), POINTER                      :: logger
    9072             :       TYPE(dbcsr_type)                                   :: m_s_inv
    9073          18 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: FTsiginv, grad, m_model_Bd, m_model_d, &
    9074          18 :          m_model_hessian, m_model_hessian_inv, m_model_r, m_model_r_prev, m_model_rt, &
    9075          18 :          m_model_rt_prev, m_sig_sqrti_ii, m_theta, m_theta_trial, prev_step, siginvTFTsiginv, ST, &
    9076          18 :          step, STsiginv_0
    9077             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
    9078          18 :          DIMENSION(:, :)                                 :: domain_model_hessian_inv, domain_r_down
    9079             : 
    9080             :       ! RZK-warning: number of temporary storage matrices can be reduced
    9081          18 :       CALL timeset(routineN, handle)
    9082             : 
    9083          18 :       t1outer = m_walltime()
    9084             : 
    9085          18 :       my_special_case = xalmo_case_normal
    9086          18 :       IF (PRESENT(special_case)) my_special_case = special_case
    9087             : 
    9088             :       ! get a useful output_unit
    9089          18 :       logger => cp_get_default_logger()
    9090          18 :       IF (logger%para_env%is_source()) THEN
    9091           9 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    9092             :       ELSE
    9093           9 :          unit_nr = -1
    9094             :       END IF
    9095             : 
    9096             :       ! Trust radius code is written to obviate the need in projected orbitals
    9097          18 :       assume_t0_q0x = .FALSE.
    9098             :       ! Smoothing of the orbitals have not been implemented
    9099          18 :       optimize_theta = .FALSE.
    9100             : 
    9101          18 :       nspins = almo_scf_env%nspins
    9102          18 :       IF (nspins == 1) THEN
    9103          18 :          spin_factor = 2.0_dp
    9104             :       ELSE
    9105           0 :          spin_factor = 1.0_dp
    9106             :       END IF
    9107             : 
    9108          18 :       IF (unit_nr > 0) THEN
    9109           9 :          WRITE (unit_nr, *)
    9110           1 :          SELECT CASE (my_special_case)
    9111             :          CASE (xalmo_case_block_diag)
    9112           1 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
    9113           2 :                " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
    9114             :          CASE (xalmo_case_fully_deloc)
    9115           0 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
    9116           0 :                " Optimization of fully delocalized MOs ", REPEAT("-", 20)
    9117             :          CASE (xalmo_case_normal)
    9118           8 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 27), &
    9119          17 :                " Optimization of XALMOs ", REPEAT("-", 28)
    9120             :          END SELECT
    9121           9 :          WRITE (unit_nr, *)
    9122             :          CALL trust_r_report(unit_nr, &
    9123             :                              iter_type=0, & ! print header, all values are ignored
    9124             :                              iteration=0, &
    9125             :                              radius=0.0_dp, &
    9126             :                              loss=0.0_dp, &
    9127             :                              delta_loss=0.0_dp, &
    9128             :                              grad_norm=0.0_dp, &
    9129             :                              predicted_reduction=0.0_dp, &
    9130             :                              rho=0.0_dp, &
    9131             :                              new=.TRUE., &
    9132           9 :                              time=0.0_dp)
    9133           9 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
    9134             :       END IF
    9135             : 
    9136             :       ! penalty amplitude adjusts the strength of volume conservation
    9137          18 :       penalty_occ_vol = .FALSE.
    9138             :       !(almo_scf_env%penalty%occ_vol_method .NE. almo_occ_vol_penalty_none .AND. &
    9139             :       !                   my_special_case .EQ. xalmo_case_fully_deloc)
    9140          18 :       normalize_orbitals = penalty_occ_vol
    9141          18 :       penalty_amplitude = 0.0_dp !almo_scf_env%penalty%occ_vol_coeff
    9142          54 :       ALLOCATE (penalty_occ_vol_g_prefactor(nspins))
    9143          36 :       ALLOCATE (penalty_occ_vol_h_prefactor(nspins))
    9144          36 :       penalty_occ_vol_g_prefactor(:) = 0.0_dp
    9145          36 :       penalty_occ_vol_h_prefactor(:) = 0.0_dp
    9146             : 
    9147             :       ! here preconditioner is the Hessian of model function
    9148          18 :       prec_type = optimizer%preconditioner
    9149             : 
    9150          36 :       ALLOCATE (grad_norm_spin(nspins))
    9151          54 :       ALLOCATE (nocc(nspins))
    9152             : 
    9153             :       ! m_theta contains a set of variational parameters
    9154             :       ! that define one-electron orbitals (simple, projected, etc.)
    9155          72 :       ALLOCATE (m_theta(nspins))
    9156          36 :       DO ispin = 1, nspins
    9157             :          CALL dbcsr_create(m_theta(ispin), &
    9158             :                            template=matrix_t_out(ispin), &
    9159          36 :                            matrix_type=dbcsr_type_no_symmetry)
    9160             :       END DO
    9161             : 
    9162             :       ! create initial guess from the initial orbitals
    9163             :       CALL xalmo_initial_guess(m_guess=m_theta, &
    9164             :                                m_t_in=matrix_t_in, &
    9165             :                                m_t0=almo_scf_env%matrix_t_blk, &
    9166             :                                m_quench_t=quench_t, &
    9167             :                                m_overlap=almo_scf_env%matrix_s(1), &
    9168             :                                m_sigma_tmpl=almo_scf_env%matrix_sigma_inv, &
    9169             :                                nspins=nspins, &
    9170             :                                xalmo_history=almo_scf_env%xalmo_history, &
    9171             :                                assume_t0_q0x=assume_t0_q0x, &
    9172             :                                optimize_theta=optimize_theta, &
    9173             :                                envelope_amplitude=almo_scf_env%envelope_amplitude, &
    9174             :                                eps_filter=almo_scf_env%eps_filter, &
    9175             :                                order_lanczos=almo_scf_env%order_lanczos, &
    9176             :                                eps_lanczos=almo_scf_env%eps_lanczos, &
    9177             :                                max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    9178          18 :                                nocc_of_domain=almo_scf_env%nocc_of_domain)
    9179             : 
    9180          18 :       ndomains = almo_scf_env%ndomains
    9181         218 :       ALLOCATE (domain_r_down(ndomains, nspins))
    9182          18 :       CALL init_submatrices(domain_r_down)
    9183         200 :       ALLOCATE (domain_model_hessian_inv(ndomains, nspins))
    9184          18 :       CALL init_submatrices(domain_model_hessian_inv)
    9185             : 
    9186          54 :       ALLOCATE (m_model_hessian(nspins))
    9187          54 :       ALLOCATE (m_model_hessian_inv(nspins))
    9188          54 :       ALLOCATE (siginvTFTsiginv(nspins))
    9189          54 :       ALLOCATE (STsiginv_0(nspins))
    9190          54 :       ALLOCATE (FTsiginv(nspins))
    9191          54 :       ALLOCATE (ST(nspins))
    9192          54 :       ALLOCATE (grad(nspins))
    9193          72 :       ALLOCATE (prev_step(nspins))
    9194          54 :       ALLOCATE (step(nspins))
    9195          54 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    9196          54 :       ALLOCATE (m_model_r(nspins))
    9197          54 :       ALLOCATE (m_model_rt(nspins))
    9198          54 :       ALLOCATE (m_model_d(nspins))
    9199          54 :       ALLOCATE (m_model_Bd(nspins))
    9200          54 :       ALLOCATE (m_model_r_prev(nspins))
    9201          54 :       ALLOCATE (m_model_rt_prev(nspins))
    9202          54 :       ALLOCATE (m_theta_trial(nspins))
    9203             : 
    9204          36 :       DO ispin = 1, nspins
    9205             : 
    9206             :          ! init temporary storage
    9207             :          CALL dbcsr_create(m_model_hessian_inv(ispin), &
    9208             :                            template=almo_scf_env%matrix_ks(ispin), &
    9209          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9210             :          CALL dbcsr_create(m_model_hessian(ispin), &
    9211             :                            template=almo_scf_env%matrix_ks(ispin), &
    9212          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9213             :          CALL dbcsr_create(siginvTFTsiginv(ispin), &
    9214             :                            template=almo_scf_env%matrix_sigma(ispin), &
    9215          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9216             :          CALL dbcsr_create(STsiginv_0(ispin), &
    9217             :                            template=matrix_t_out(ispin), &
    9218          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9219             :          CALL dbcsr_create(FTsiginv(ispin), &
    9220             :                            template=matrix_t_out(ispin), &
    9221          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9222             :          CALL dbcsr_create(ST(ispin), &
    9223             :                            template=matrix_t_out(ispin), &
    9224          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9225             :          CALL dbcsr_create(grad(ispin), &
    9226             :                            template=matrix_t_out(ispin), &
    9227          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9228             :          CALL dbcsr_create(prev_step(ispin), &
    9229             :                            template=matrix_t_out(ispin), &
    9230          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9231             :          CALL dbcsr_create(step(ispin), &
    9232             :                            template=matrix_t_out(ispin), &
    9233          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9234             :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    9235             :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    9236          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9237             :          CALL dbcsr_create(m_model_r(ispin), &
    9238             :                            template=matrix_t_out(ispin), &
    9239          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9240             :          CALL dbcsr_create(m_model_rt(ispin), &
    9241             :                            template=matrix_t_out(ispin), &
    9242          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9243             :          CALL dbcsr_create(m_model_d(ispin), &
    9244             :                            template=matrix_t_out(ispin), &
    9245          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9246             :          CALL dbcsr_create(m_model_Bd(ispin), &
    9247             :                            template=matrix_t_out(ispin), &
    9248          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9249             :          CALL dbcsr_create(m_model_r_prev(ispin), &
    9250             :                            template=matrix_t_out(ispin), &
    9251          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9252             :          CALL dbcsr_create(m_model_rt_prev(ispin), &
    9253             :                            template=matrix_t_out(ispin), &
    9254          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9255             :          CALL dbcsr_create(m_theta_trial(ispin), &
    9256             :                            template=matrix_t_out(ispin), &
    9257          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9258             : 
    9259          18 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    9260          18 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    9261             : 
    9262             :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
    9263          18 :                              nfullrows_total=nocc(ispin))
    9264             : 
    9265             :          ! invert S domains if necessary
    9266             :          ! Note: domains for alpha and beta electrons might be different
    9267             :          ! that is why the inversion of the AO overlap is inside the spin loop
    9268          36 :          IF (my_special_case .EQ. xalmo_case_normal) THEN
    9269             : 
    9270             :             CALL construct_domain_s_inv( &
    9271             :                matrix_s=almo_scf_env%matrix_s(1), &
    9272             :                subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9273             :                dpattern=quench_t(ispin), &
    9274             :                map=almo_scf_env%domain_map(ispin), &
    9275          16 :                node_of_domain=almo_scf_env%cpu_of_domain)
    9276             : 
    9277             :          END IF
    9278             : 
    9279             :       END DO ! ispin
    9280             : 
    9281             :       ! invert metric for special case where metric is spin independent
    9282          18 :       IF (my_special_case .EQ. xalmo_case_block_diag) THEN
    9283             : 
    9284             :          CALL dbcsr_create(m_s_inv, &
    9285             :                            template=almo_scf_env%matrix_s(1), &
    9286           2 :                            matrix_type=dbcsr_type_no_symmetry)
    9287             :          CALL invert_Hotelling(m_s_inv, &
    9288             :                                almo_scf_env%matrix_s_blk(1), &
    9289             :                                threshold=almo_scf_env%eps_filter, &
    9290           2 :                                filter_eps=almo_scf_env%eps_filter)
    9291             : 
    9292          16 :       ELSE IF (my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9293             : 
    9294             :          ! invert S using cholesky
    9295             :          CALL dbcsr_create(m_s_inv, &
    9296             :                            template=almo_scf_env%matrix_s(1), &
    9297           0 :                            matrix_type=dbcsr_type_no_symmetry)
    9298           0 :          CALL dbcsr_desymmetrize(almo_scf_env%matrix_s(1), m_s_inv)
    9299             :          CALL cp_dbcsr_cholesky_decompose(m_s_inv, &
    9300             :                                           para_env=almo_scf_env%para_env, &
    9301           0 :                                           blacs_env=almo_scf_env%blacs_env)
    9302             :          CALL cp_dbcsr_cholesky_invert(m_s_inv, &
    9303             :                                        para_env=almo_scf_env%para_env, &
    9304             :                                        blacs_env=almo_scf_env%blacs_env, &
    9305           0 :                                        uplo_to_full=.TRUE.)
    9306           0 :          CALL dbcsr_filter(m_s_inv, almo_scf_env%eps_filter)
    9307             : 
    9308             :       END IF ! s_inv
    9309             : 
    9310          18 :       radius_max = optimizer%max_trust_radius
    9311          18 :       radius_current = MIN(optimizer%initial_trust_radius, radius_max)
    9312             :       ! eta must be between 0 and 0.25
    9313          18 :       eta = MIN(MAX(optimizer%rho_do_not_update, 0.0_dp), 0.25_dp)
    9314             :       energy_start = 0.0_dp
    9315          18 :       energy_trial = 0.0_dp
    9316             :       penalty_start = 0.0_dp
    9317          18 :       penalty_trial = 0.0_dp
    9318             :       loss_start = 0.0_dp ! sum of the energy and penalty
    9319          18 :       loss_trial = 0.0_dp
    9320             : 
    9321          18 :       same_position = .FALSE.
    9322             : 
    9323             :       ! compute the energy
    9324             :       CALL main_var_to_xalmos_and_loss_func( &
    9325             :          almo_scf_env=almo_scf_env, &
    9326             :          qs_env=qs_env, &
    9327             :          m_main_var_in=m_theta, &
    9328             :          m_t_out=matrix_t_out, &
    9329             :          m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
    9330             :          energy_out=energy_start, &
    9331             :          penalty_out=penalty_start, &
    9332             :          m_FTsiginv_out=FTsiginv, &
    9333             :          m_siginvTFTsiginv_out=siginvTFTsiginv, &
    9334             :          m_ST_out=ST, &
    9335             :          m_STsiginv0_in=STsiginv_0, &
    9336             :          m_quench_t_in=quench_t, &
    9337             :          domain_r_down_in=domain_r_down, &
    9338             :          assume_t0_q0x=assume_t0_q0x, &
    9339             :          just_started=.TRUE., &
    9340             :          optimize_theta=optimize_theta, &
    9341             :          normalize_orbitals=normalize_orbitals, &
    9342             :          perturbation_only=perturbation_only, &
    9343             :          do_penalty=penalty_occ_vol, &
    9344          18 :          special_case=my_special_case)
    9345          18 :       loss_start = energy_start + penalty_start
    9346          18 :       IF (my_special_case .EQ. xalmo_case_block_diag) THEN
    9347           2 :          almo_scf_env%almo_scf_energy = energy_start
    9348             :       END IF
    9349          36 :       DO ispin = 1, nspins
    9350          36 :          IF (penalty_occ_vol) THEN
    9351             :             penalty_occ_vol_g_prefactor(ispin) = &
    9352           0 :                -2.0_dp*penalty_amplitude*spin_factor*nocc(ispin)
    9353           0 :             penalty_occ_vol_h_prefactor(ispin) = 0.0_dp
    9354             :          END IF
    9355             :       END DO ! ispin
    9356             : 
    9357             :       ! start the outer step-size-adjustment loop
    9358          18 :       scf_converged = .FALSE.
    9359         426 :       adjust_r_loop: DO outer_iteration = 1, optimizer%max_iter_outer_loop
    9360             : 
    9361             :          ! start the inner fixed-radius loop
    9362         426 :          border_reached = .FALSE.
    9363             : 
    9364         852 :          DO ispin = 1, nspins
    9365         426 :             CALL dbcsr_set(step(ispin), 0.0_dp)
    9366         852 :             CALL dbcsr_filter(step(ispin), almo_scf_env%eps_filter)
    9367             :          END DO
    9368             : 
    9369         426 :          IF (.NOT. same_position) THEN
    9370             : 
    9371         852 :             DO ispin = 1, nspins
    9372             : 
    9373             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Compute model gradient"
    9374             :                CALL compute_gradient( &
    9375             :                   m_grad_out=grad(ispin), &
    9376             :                   m_ks=almo_scf_env%matrix_ks(ispin), &
    9377             :                   m_s=almo_scf_env%matrix_s(1), &
    9378             :                   m_t=matrix_t_out(ispin), &
    9379             :                   m_t0=almo_scf_env%matrix_t_blk(ispin), &
    9380             :                   m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9381             :                   m_quench_t=quench_t(ispin), &
    9382             :                   m_FTsiginv=FTsiginv(ispin), &
    9383             :                   m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9384             :                   m_ST=ST(ispin), &
    9385             :                   m_STsiginv0=STsiginv_0(ispin), &
    9386             :                   m_theta=m_theta(ispin), &
    9387             :                   m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    9388             :                   domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9389             :                   domain_r_down=domain_r_down(:, ispin), &
    9390             :                   cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9391             :                   domain_map=almo_scf_env%domain_map(ispin), &
    9392             :                   assume_t0_q0x=assume_t0_q0x, &
    9393             :                   optimize_theta=optimize_theta, &
    9394             :                   normalize_orbitals=normalize_orbitals, &
    9395             :                   penalty_occ_vol=penalty_occ_vol, &
    9396             :                   penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9397             :                   envelope_amplitude=almo_scf_env%envelope_amplitude, &
    9398             :                   eps_filter=almo_scf_env%eps_filter, &
    9399             :                   spin_factor=spin_factor, &
    9400         852 :                   special_case=my_special_case)
    9401             : 
    9402             :             END DO ! ispin
    9403             : 
    9404             :          END IF ! skip_grad
    9405             : 
    9406             :          ! check convergence and other exit criteria
    9407         852 :          DO ispin = 1, nspins
    9408         852 :             grad_norm_spin(ispin) = dbcsr_maxabs(grad(ispin))
    9409             :             !grad_norm_frob = dbcsr_frobenius_norm(grad(ispin)) / &
    9410             :             !                 dbcsr_frobenius_norm(quench_t(ispin))
    9411             :          END DO ! ispin
    9412        1278 :          grad_norm_ref = MAXVAL(grad_norm_spin)
    9413             : 
    9414         426 :          t2outer = m_walltime()
    9415             :          CALL trust_r_report(unit_nr, &
    9416             :                              iter_type=1, & ! only some data is important
    9417             :                              iteration=outer_iteration, &
    9418             :                              loss=loss_start, &
    9419             :                              delta_loss=0.0_dp, &
    9420             :                              grad_norm=grad_norm_ref, &
    9421             :                              predicted_reduction=0.0_dp, &
    9422             :                              rho=0.0_dp, &
    9423             :                              radius=radius_current, &
    9424             :                              new=.NOT. same_position, &
    9425         426 :                              time=t2outer - t1outer)
    9426         426 :          t1outer = m_walltime()
    9427             : 
    9428         426 :          IF (grad_norm_ref .LE. optimizer%eps_error) THEN
    9429          18 :             scf_converged = .TRUE.
    9430          18 :             border_reached = .FALSE.
    9431          18 :             expected_reduction = 0.0_dp
    9432          18 :             IF (.NOT. (optimizer%early_stopping_on .AND. outer_iteration .EQ. 1)) &
    9433             :                EXIT adjust_r_loop
    9434             :          ELSE
    9435             :             scf_converged = .FALSE.
    9436             :          END IF
    9437             : 
    9438         816 :          DO ispin = 1, nspins
    9439             : 
    9440         408 :             CALL dbcsr_copy(m_model_r(ispin), grad(ispin))
    9441         408 :             CALL dbcsr_scale(m_model_r(ispin), -1.0_dp)
    9442             : 
    9443         408 :             IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    9444             :                 my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9445             : 
    9446             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Multiply Sinv.r"
    9447             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9448             :                                    m_s_inv, &
    9449             :                                    m_model_r(ispin), &
    9450             :                                    0.0_dp, m_model_rt(ispin), &
    9451          92 :                                    filter_eps=almo_scf_env%eps_filter)
    9452             : 
    9453         316 :             ELSE IF (my_special_case .EQ. xalmo_case_normal) THEN
    9454             : 
    9455             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Multiply Sinv_xx.r"
    9456             :                CALL apply_domain_operators( &
    9457             :                   matrix_in=m_model_r(ispin), &
    9458             :                   matrix_out=m_model_rt(ispin), &
    9459             :                   operator1=almo_scf_env%domain_s_inv(:, ispin), &
    9460             :                   dpattern=quench_t(ispin), &
    9461             :                   map=almo_scf_env%domain_map(ispin), &
    9462             :                   node_of_domain=almo_scf_env%cpu_of_domain, &
    9463             :                   my_action=0, &
    9464         316 :                   filter_eps=almo_scf_env%eps_filter)
    9465             : 
    9466             :             ELSE
    9467           0 :                CPABORT("Unknown XALMO special case")
    9468             :             END IF
    9469             : 
    9470         816 :             CALL dbcsr_copy(m_model_d(ispin), m_model_rt(ispin))
    9471             : 
    9472             :          END DO ! ispin
    9473             : 
    9474             :          ! compute model Hessian
    9475         408 :          IF (.NOT. same_position) THEN
    9476             : 
    9477             :             SELECT CASE (prec_type)
    9478             :             CASE (xalmo_prec_domain)
    9479             : 
    9480             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Compute model Hessian"
    9481         816 :                DO ispin = 1, nspins
    9482             :                   CALL compute_preconditioner( &
    9483             :                      domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    9484             :                      m_prec_out=m_model_hessian(ispin), &
    9485             :                      m_ks=almo_scf_env%matrix_ks(ispin), &
    9486             :                      m_s=almo_scf_env%matrix_s(1), &
    9487             :                      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9488             :                      m_quench_t=quench_t(ispin), &
    9489             :                      m_FTsiginv=FTsiginv(ispin), &
    9490             :                      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9491             :                      m_ST=ST(ispin), &
    9492             :                      para_env=almo_scf_env%para_env, &
    9493             :                      blacs_env=almo_scf_env%blacs_env, &
    9494             :                      nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    9495             :                      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9496             :                      domain_r_down=domain_r_down(:, ispin), &
    9497             :                      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9498             :                      domain_map=almo_scf_env%domain_map(ispin), &
    9499             :                      assume_t0_q0x=.FALSE., &
    9500             :                      penalty_occ_vol=penalty_occ_vol, &
    9501             :                      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9502             :                      eps_filter=almo_scf_env%eps_filter, &
    9503             :                      neg_thr=0.5_dp, &
    9504             :                      spin_factor=spin_factor, &
    9505             :                      skip_inversion=.TRUE., &
    9506         816 :                      special_case=my_special_case)
    9507             :                END DO ! ispin
    9508             : 
    9509             :             CASE DEFAULT
    9510             : 
    9511         408 :                CPABORT("Unknown preconditioner")
    9512             : 
    9513             :             END SELECT ! preconditioner type fork
    9514             : 
    9515             :          END IF  ! not same position
    9516             : 
    9517             :          ! print the header (argument values are ignored)
    9518             :          CALL fixed_r_report(unit_nr, &
    9519             :                              iter_type=0, &
    9520             :                              iteration=0, &
    9521             :                              step_size=0.0_dp, &
    9522             :                              border_reached=.FALSE., &
    9523             :                              curvature=0.0_dp, &
    9524             :                              grad_norm_ratio=0.0_dp, &
    9525         408 :                              time=0.0_dp)
    9526             : 
    9527             :          IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Start inner loop"
    9528             : 
    9529         408 :          t1 = m_walltime()
    9530         408 :          inner_loop_success = .FALSE.
    9531             :          ! trustr_steihaug, trustr_cauchy, trustr_dogleg
    9532         490 :          fixed_r_loop: DO iteration = 1, optimizer%max_iter
    9533             : 
    9534             :             ! Step 2. Get curvature. If negative, step to the border
    9535         490 :             y_scalar = 0.0_dp
    9536         980 :             DO ispin = 1, nspins
    9537             : 
    9538             :                ! Get B.d
    9539         490 :                IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    9540             :                    my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9541             : 
    9542             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9543             :                                       m_model_hessian(ispin), &
    9544             :                                       m_model_d(ispin), &
    9545             :                                       0.0_dp, m_model_Bd(ispin), &
    9546          92 :                                       filter_eps=almo_scf_env%eps_filter)
    9547             : 
    9548             :                ELSE
    9549             : 
    9550             :                   CALL apply_domain_operators( &
    9551             :                      matrix_in=m_model_d(ispin), &
    9552             :                      matrix_out=m_model_Bd(ispin), &
    9553             :                      operator1=almo_scf_env%domain_preconditioner(:, ispin), &
    9554             :                      dpattern=quench_t(ispin), &
    9555             :                      map=almo_scf_env%domain_map(ispin), &
    9556             :                      node_of_domain=almo_scf_env%cpu_of_domain, &
    9557             :                      my_action=0, &
    9558         398 :                      filter_eps=almo_scf_env%eps_filter)
    9559             : 
    9560             :                END IF ! special case
    9561             : 
    9562             :                ! Get y=d^T.B.d
    9563         490 :                CALL dbcsr_dot(m_model_d(ispin), m_model_Bd(ispin), real_temp)
    9564         980 :                y_scalar = y_scalar + real_temp
    9565             : 
    9566             :             END DO ! ispin
    9567             :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Curvature: ", y_scalar
    9568             : 
    9569             :             ! step to the border
    9570         490 :             IF (y_scalar .LT. 0.0_dp) THEN
    9571             : 
    9572             :                CALL step_size_to_border( &
    9573             :                   step_size_out=step_size, &
    9574             :                   metric_in=almo_scf_env%matrix_s, &
    9575             :                   position_in=step, &
    9576             :                   direction_in=m_model_d, &
    9577             :                   trust_radius_in=radius_current, &
    9578             :                   quench_t_in=quench_t, &
    9579             :                   eps_filter_in=almo_scf_env%eps_filter &
    9580           0 :                   )
    9581             : 
    9582           0 :                DO ispin = 1, nspins
    9583           0 :                   CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9584             :                END DO
    9585             : 
    9586           0 :                border_reached = .TRUE.
    9587           0 :                inner_loop_success = .TRUE.
    9588             : 
    9589             :                CALL predicted_reduction( &
    9590             :                   reduction_out=expected_reduction, &
    9591             :                   grad_in=grad, &
    9592             :                   step_in=step, &
    9593             :                   hess_in=m_model_hessian, &
    9594             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9595             :                   quench_t_in=quench_t, &
    9596             :                   special_case=my_special_case, &
    9597             :                   eps_filter=almo_scf_env%eps_filter, &
    9598             :                   domain_map=almo_scf_env%domain_map, &
    9599             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9600           0 :                   )
    9601             : 
    9602           0 :                t2 = m_walltime()
    9603             :                CALL fixed_r_report(unit_nr, &
    9604             :                                    iter_type=2, &
    9605             :                                    iteration=iteration, &
    9606             :                                    step_size=step_size, &
    9607             :                                    border_reached=border_reached, &
    9608             :                                    curvature=y_scalar, &
    9609             :                                    grad_norm_ratio=expected_reduction, &
    9610           0 :                                    time=t2 - t1)
    9611             : 
    9612             :                EXIT fixed_r_loop ! the inner loop
    9613             : 
    9614             :             END IF ! y is negative
    9615             : 
    9616             :             ! Step 3. Compute the step size along the direction
    9617         490 :             step_size = 0.0_dp
    9618         980 :             DO ispin = 1, nspins
    9619         490 :                CALL dbcsr_dot(m_model_r(ispin), m_model_rt(ispin), real_temp)
    9620         980 :                step_size = step_size + real_temp
    9621             :             END DO ! ispin
    9622         490 :             step_size = step_size/y_scalar
    9623             :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Proposed step size: ", step_size
    9624             : 
    9625             :             ! Update the step matrix
    9626         980 :             DO ispin = 1, nspins
    9627         490 :                CALL dbcsr_copy(prev_step(ispin), step(ispin))
    9628         980 :                CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9629             :             END DO
    9630             : 
    9631             :             ! Compute step norm
    9632             :             CALL contravariant_matrix_norm( &
    9633             :                norm_out=step_norm, &
    9634             :                matrix_in=step, &
    9635             :                metric_in=almo_scf_env%matrix_s, &
    9636             :                quench_t_in=quench_t, &
    9637             :                eps_filter_in=almo_scf_env%eps_filter &
    9638         490 :                )
    9639             :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9640             : 
    9641             :             ! Do not step beyond the trust radius
    9642         490 :             IF (step_norm .GT. radius_current) THEN
    9643             : 
    9644             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Norm is too large"
    9645             :                CALL step_size_to_border( &
    9646             :                   step_size_out=step_size, &
    9647             :                   metric_in=almo_scf_env%matrix_s, &
    9648             :                   position_in=prev_step, &
    9649             :                   direction_in=m_model_d, &
    9650             :                   trust_radius_in=radius_current, &
    9651             :                   quench_t_in=quench_t, &
    9652             :                   eps_filter_in=almo_scf_env%eps_filter &
    9653          34 :                   )
    9654             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step size to border: ", step_size
    9655             : 
    9656          68 :                DO ispin = 1, nspins
    9657          34 :                   CALL dbcsr_copy(step(ispin), prev_step(ispin))
    9658          68 :                   CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9659             :                END DO
    9660             : 
    9661             :                IF (debug_mode) THEN
    9662             :                   ! Compute step norm
    9663             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Extra norm evaluation"
    9664             :                   CALL contravariant_matrix_norm( &
    9665             :                      norm_out=step_norm, &
    9666             :                      matrix_in=step, &
    9667             :                      metric_in=almo_scf_env%matrix_s, &
    9668             :                      quench_t_in=quench_t, &
    9669             :                      eps_filter_in=almo_scf_env%eps_filter &
    9670             :                      )
    9671             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9672             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Current radius: ", radius_current
    9673             :                END IF
    9674             : 
    9675          34 :                border_reached = .TRUE.
    9676          34 :                inner_loop_success = .TRUE.
    9677             : 
    9678             :                CALL predicted_reduction( &
    9679             :                   reduction_out=expected_reduction, &
    9680             :                   grad_in=grad, &
    9681             :                   step_in=step, &
    9682             :                   hess_in=m_model_hessian, &
    9683             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9684             :                   quench_t_in=quench_t, &
    9685             :                   special_case=my_special_case, &
    9686             :                   eps_filter=almo_scf_env%eps_filter, &
    9687             :                   domain_map=almo_scf_env%domain_map, &
    9688             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9689          34 :                   )
    9690             : 
    9691          34 :                t2 = m_walltime()
    9692             :                CALL fixed_r_report(unit_nr, &
    9693             :                                    iter_type=3, &
    9694             :                                    iteration=iteration, &
    9695             :                                    step_size=step_size, &
    9696             :                                    border_reached=border_reached, &
    9697             :                                    curvature=y_scalar, &
    9698             :                                    grad_norm_ratio=expected_reduction, &
    9699          34 :                                    time=t2 - t1)
    9700             : 
    9701             :                EXIT fixed_r_loop ! the inner loop
    9702             : 
    9703             :             END IF
    9704             : 
    9705         456 :             IF (optimizer%trustr_algorithm .EQ. trustr_cauchy) THEN
    9706             :                ! trustr_steihaug, trustr_cauchy, trustr_dogleg
    9707             : 
    9708          80 :                border_reached = .FALSE.
    9709          80 :                inner_loop_success = .TRUE.
    9710             : 
    9711             :                CALL predicted_reduction( &
    9712             :                   reduction_out=expected_reduction, &
    9713             :                   grad_in=grad, &
    9714             :                   step_in=step, &
    9715             :                   hess_in=m_model_hessian, &
    9716             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9717             :                   quench_t_in=quench_t, &
    9718             :                   special_case=my_special_case, &
    9719             :                   eps_filter=almo_scf_env%eps_filter, &
    9720             :                   domain_map=almo_scf_env%domain_map, &
    9721             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9722          80 :                   )
    9723             : 
    9724          80 :                t2 = m_walltime()
    9725             :                CALL fixed_r_report(unit_nr, &
    9726             :                                    iter_type=5, & ! Cauchy point
    9727             :                                    iteration=iteration, &
    9728             :                                    step_size=step_size, &
    9729             :                                    border_reached=border_reached, &
    9730             :                                    curvature=y_scalar, &
    9731             :                                    grad_norm_ratio=expected_reduction, &
    9732          80 :                                    time=t2 - t1)
    9733             : 
    9734             :                EXIT fixed_r_loop ! the inner loop
    9735             : 
    9736         376 :             ELSE IF (optimizer%trustr_algorithm .EQ. trustr_dogleg) THEN
    9737             : 
    9738             :                ! invert or pseudo-invert B
    9739         268 :                SELECT CASE (prec_type)
    9740             :                CASE (xalmo_prec_domain)
    9741             : 
    9742             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Pseudo-invert model Hessian"
    9743         268 :                   IF (special_case .EQ. xalmo_case_block_diag) THEN ! non-overlapping diagonal blocks
    9744             : 
    9745         156 :                      DO ispin = 1, nspins
    9746             :                         CALL pseudo_invert_diagonal_blk( &
    9747             :                            matrix_in=m_model_hessian(ispin), &
    9748             :                            matrix_out=m_model_hessian_inv(ispin), &
    9749             :                            nocc=almo_scf_env%nocc_of_domain(:, ispin) &
    9750         156 :                            )
    9751             :                      END DO
    9752             : 
    9753         190 :                   ELSE IF (special_case .EQ. xalmo_case_fully_deloc) THEN ! the entire system is a block
    9754             : 
    9755             :                      ! invert using cholesky decomposition
    9756           0 :                      DO ispin = 1, nspins
    9757             :                         CALL dbcsr_copy(m_model_hessian_inv(ispin), &
    9758           0 :                                         m_model_hessian(ispin))
    9759             :                         CALL cp_dbcsr_cholesky_decompose(m_model_hessian_inv(ispin), &
    9760             :                                                          para_env=almo_scf_env%para_env, &
    9761           0 :                                                          blacs_env=almo_scf_env%blacs_env)
    9762             :                         CALL cp_dbcsr_cholesky_invert(m_model_hessian_inv(ispin), &
    9763             :                                                       para_env=almo_scf_env%para_env, &
    9764             :                                                       blacs_env=almo_scf_env%blacs_env, &
    9765           0 :                                                       uplo_to_full=.TRUE.)
    9766             :                         CALL dbcsr_filter(m_model_hessian_inv(ispin), &
    9767           0 :                                           almo_scf_env%eps_filter)
    9768             :                      END DO
    9769             : 
    9770             :                   ELSE
    9771             : 
    9772         380 :                      DO ispin = 1, nspins
    9773             :                         CALL construct_domain_preconditioner( &
    9774             :                            matrix_main=m_model_hessian(ispin), &
    9775             :                            subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9776             :                            subm_r_down=domain_r_down(:, ispin), &
    9777             :                            matrix_trimmer=quench_t(ispin), &
    9778             :                            dpattern=quench_t(ispin), &
    9779             :                            map=almo_scf_env%domain_map(ispin), &
    9780             :                            node_of_domain=almo_scf_env%cpu_of_domain, &
    9781             :                            preconditioner=domain_model_hessian_inv(:, ispin), &
    9782             :                            use_trimmer=.FALSE., &
    9783             :                            my_action=0, & ! do not do domain (1-r0) projection
    9784             :                            skip_inversion=.FALSE. &
    9785         380 :                            )
    9786             :                      END DO
    9787             : 
    9788             :                   END IF ! special_case
    9789             : 
    9790             :                   ! slower but more reliable way to get inverted hessian
    9791             :                   !DO ispin = 1, nspins
    9792             :                   !   CALL compute_preconditioner( &
    9793             :                   !      domain_prec_out=domain_model_hessian_inv(:, ispin), &
    9794             :                   !      m_prec_out=m_model_hessian_inv(ispin), & ! RZK-warning: this one is not inverted if DOMAINs
    9795             :                   !      m_ks=almo_scf_env%matrix_ks(ispin), &
    9796             :                   !      m_s=almo_scf_env%matrix_s(1), &
    9797             :                   !      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9798             :                   !      m_quench_t=quench_t(ispin), &
    9799             :                   !      m_FTsiginv=FTsiginv(ispin), &
    9800             :                   !      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9801             :                   !      m_ST=ST(ispin), &
    9802             :                   !      para_env=almo_scf_env%para_env, &
    9803             :                   !      blacs_env=almo_scf_env%blacs_env, &
    9804             :                   !      nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    9805             :                   !      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9806             :                   !      domain_r_down=domain_r_down(:, ispin), &
    9807             :                   !      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9808             :                   !      domain_map=almo_scf_env%domain_map(ispin), &
    9809             :                   !      assume_t0_q0x=.FALSE., &
    9810             :                   !      penalty_occ_vol=penalty_occ_vol, &
    9811             :                   !      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9812             :                   !      eps_filter=almo_scf_env%eps_filter, &
    9813             :                   !      neg_thr=1.0E10_dp, &
    9814             :                   !      spin_factor=spin_factor, &
    9815             :                   !      skip_inversion=.FALSE., &
    9816             :                   !      special_case=my_special_case)
    9817             :                   !ENDDO ! ispin
    9818             : 
    9819             :                CASE DEFAULT
    9820             : 
    9821         268 :                   CPABORT("Unknown preconditioner")
    9822             : 
    9823             :                END SELECT ! preconditioner type fork
    9824             : 
    9825             :                ! get pB = Binv.m_model_r = -Binv.grad
    9826         536 :                DO ispin = 1, nspins
    9827             : 
    9828             :                   ! Get B.d
    9829         268 :                   IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    9830         268 :                       my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9831             : 
    9832             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9833             :                                          m_model_hessian_inv(ispin), &
    9834             :                                          m_model_r(ispin), &
    9835             :                                          0.0_dp, m_model_Bd(ispin), &
    9836          78 :                                          filter_eps=almo_scf_env%eps_filter)
    9837             : 
    9838             :                   ELSE
    9839             : 
    9840             :                      CALL apply_domain_operators( &
    9841             :                         matrix_in=m_model_r(ispin), &
    9842             :                         matrix_out=m_model_Bd(ispin), &
    9843             :                         operator1=domain_model_hessian_inv(:, ispin), &
    9844             :                         dpattern=quench_t(ispin), &
    9845             :                         map=almo_scf_env%domain_map(ispin), &
    9846             :                         node_of_domain=almo_scf_env%cpu_of_domain, &
    9847             :                         my_action=0, &
    9848         190 :                         filter_eps=almo_scf_env%eps_filter)
    9849             : 
    9850             :                   END IF ! special case
    9851             : 
    9852             :                END DO ! ispin
    9853             : 
    9854             :                ! Compute norm of pB
    9855             :                CALL contravariant_matrix_norm( &
    9856             :                   norm_out=step_norm, &
    9857             :                   matrix_in=m_model_Bd, &
    9858             :                   metric_in=almo_scf_env%matrix_s, &
    9859             :                   quench_t_in=quench_t, &
    9860             :                   eps_filter_in=almo_scf_env%eps_filter &
    9861         268 :                   )
    9862             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...pB norm: ", step_norm
    9863             : 
    9864             :                ! Do not step beyond the trust radius
    9865         268 :                IF (step_norm .LE. radius_current) THEN
    9866             : 
    9867             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Full dogleg"
    9868             : 
    9869         266 :                   border_reached = .FALSE.
    9870             : 
    9871         532 :                   DO ispin = 1, nspins
    9872         532 :                      CALL dbcsr_copy(step(ispin), m_model_Bd(ispin))
    9873             :                   END DO
    9874             : 
    9875         266 :                   fake_step_size_to_report = 2.0_dp
    9876         266 :                   iteration_type_to_report = 6
    9877             : 
    9878             :                ELSE ! take a shorter dogleg step
    9879             : 
    9880             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...pB norm is too large"
    9881             : 
    9882           2 :                   border_reached = .TRUE.
    9883             : 
    9884             :                   ! compute the dogleg vector = pB - pU
    9885             :                   ! this destroys -Binv.grad content
    9886           4 :                   DO ispin = 1, nspins
    9887           4 :                      CALL dbcsr_add(m_model_Bd(ispin), step(ispin), 1.0_dp, -1.0_dp)
    9888             :                   END DO
    9889             : 
    9890             :                   CALL step_size_to_border( &
    9891             :                      step_size_out=step_size, &
    9892             :                      metric_in=almo_scf_env%matrix_s, &
    9893             :                      position_in=step, &
    9894             :                      direction_in=m_model_Bd, &
    9895             :                      trust_radius_in=radius_current, &
    9896             :                      quench_t_in=quench_t, &
    9897             :                      eps_filter_in=almo_scf_env%eps_filter &
    9898           2 :                      )
    9899             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step size to border: ", step_size
    9900           2 :                   IF (step_size .GT. 1.0_dp .OR. step_size .LT. 0.0_dp) THEN
    9901           0 :                      IF (unit_nr > 0) &
    9902           0 :                         WRITE (unit_nr, *) "Step size (", step_size, ") must lie inside (0,1)"
    9903           0 :                      CPABORT("Wrong dog leg step. We should never end up here.")
    9904             :                   END IF
    9905             : 
    9906           4 :                   DO ispin = 1, nspins
    9907           4 :                      CALL dbcsr_add(step(ispin), m_model_Bd(ispin), 1.0_dp, step_size)
    9908             :                   END DO
    9909             : 
    9910           2 :                   fake_step_size_to_report = 1.0_dp + step_size
    9911           2 :                   iteration_type_to_report = 7
    9912             : 
    9913             :                END IF ! full or partial dogleg?
    9914             : 
    9915             :                IF (debug_mode) THEN
    9916             :                   ! Compute step norm
    9917             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Extra norm evaluation"
    9918             :                   CALL contravariant_matrix_norm( &
    9919             :                      norm_out=step_norm, &
    9920             :                      matrix_in=step, &
    9921             :                      metric_in=almo_scf_env%matrix_s, &
    9922             :                      quench_t_in=quench_t, &
    9923             :                      eps_filter_in=almo_scf_env%eps_filter &
    9924             :                      )
    9925             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9926             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Current radius: ", radius_current
    9927             :                END IF
    9928             : 
    9929             :                CALL predicted_reduction( &
    9930             :                   reduction_out=expected_reduction, &
    9931             :                   grad_in=grad, &
    9932             :                   step_in=step, &
    9933             :                   hess_in=m_model_hessian, &
    9934             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9935             :                   quench_t_in=quench_t, &
    9936             :                   special_case=my_special_case, &
    9937             :                   eps_filter=almo_scf_env%eps_filter, &
    9938             :                   domain_map=almo_scf_env%domain_map, &
    9939             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9940         268 :                   )
    9941             : 
    9942         268 :                inner_loop_success = .TRUE.
    9943             : 
    9944         268 :                t2 = m_walltime()
    9945             :                CALL fixed_r_report(unit_nr, &
    9946             :                                    iter_type=iteration_type_to_report, &
    9947             :                                    iteration=iteration, &
    9948             :                                    step_size=fake_step_size_to_report, &
    9949             :                                    border_reached=border_reached, &
    9950             :                                    curvature=y_scalar, &
    9951             :                                    grad_norm_ratio=expected_reduction, &
    9952         268 :                                    time=t2 - t1)
    9953             : 
    9954             :                EXIT fixed_r_loop ! the inner loop
    9955             : 
    9956             :             END IF ! Non-iterative subproblem methods exit here
    9957             : 
    9958             :             ! Step 4: update model gradient
    9959         216 :             DO ispin = 1, nspins
    9960             :                ! save previous data
    9961         108 :                CALL dbcsr_copy(m_model_r_prev(ispin), m_model_r(ispin))
    9962             :                CALL dbcsr_add(m_model_r(ispin), m_model_Bd(ispin), &
    9963         216 :                               1.0_dp, -step_size)
    9964             :             END DO ! ispin
    9965             : 
    9966             :             ! Model grad norm
    9967         216 :             DO ispin = 1, nspins
    9968         216 :                grad_norm_spin(ispin) = dbcsr_maxabs(m_model_r(ispin))
    9969             :                !grad_norm_frob = dbcsr_frobenius_norm(grad(ispin)) / &
    9970             :                !                 dbcsr_frobenius_norm(quench_t(ispin))
    9971             :             END DO ! ispin
    9972         324 :             model_grad_norm = MAXVAL(grad_norm_spin)
    9973             : 
    9974             :             ! Check norm reduction
    9975         108 :             grad_norm_ratio = model_grad_norm/grad_norm_ref
    9976         108 :             IF (grad_norm_ratio .LT. optimizer%model_grad_norm_ratio) THEN
    9977             : 
    9978          26 :                border_reached = .FALSE.
    9979          26 :                inner_loop_success = .TRUE.
    9980             : 
    9981             :                CALL predicted_reduction( &
    9982             :                   reduction_out=expected_reduction, &
    9983             :                   grad_in=grad, &
    9984             :                   step_in=step, &
    9985             :                   hess_in=m_model_hessian, &
    9986             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9987             :                   quench_t_in=quench_t, &
    9988             :                   special_case=my_special_case, &
    9989             :                   eps_filter=almo_scf_env%eps_filter, &
    9990             :                   domain_map=almo_scf_env%domain_map, &
    9991             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9992          26 :                   )
    9993             : 
    9994          26 :                t2 = m_walltime()
    9995             :                CALL fixed_r_report(unit_nr, &
    9996             :                                    iter_type=4, &
    9997             :                                    iteration=iteration, &
    9998             :                                    step_size=step_size, &
    9999             :                                    border_reached=border_reached, &
   10000             :                                    curvature=y_scalar, &
   10001             :                                    grad_norm_ratio=expected_reduction, &
   10002          26 :                                    time=t2 - t1)
   10003             : 
   10004             :                EXIT fixed_r_loop ! the inner loop
   10005             : 
   10006             :             END IF
   10007             : 
   10008             :             ! Step 5: update model direction
   10009         164 :             DO ispin = 1, nspins
   10010             :                ! save previous data
   10011         164 :                CALL dbcsr_copy(m_model_rt_prev(ispin), m_model_rt(ispin))
   10012             :             END DO ! ispin
   10013             : 
   10014         164 :             DO ispin = 1, nspins
   10015             : 
   10016          82 :                IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
   10017          82 :                    my_special_case .EQ. xalmo_case_fully_deloc) THEN
   10018             : 
   10019             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10020             :                                       m_s_inv, &
   10021             :                                       m_model_r(ispin), &
   10022             :                                       0.0_dp, m_model_rt(ispin), &
   10023           0 :                                       filter_eps=almo_scf_env%eps_filter)
   10024             : 
   10025          82 :                ELSE IF (my_special_case .EQ. xalmo_case_normal) THEN
   10026             : 
   10027             :                   CALL apply_domain_operators( &
   10028             :                      matrix_in=m_model_r(ispin), &
   10029             :                      matrix_out=m_model_rt(ispin), &
   10030             :                      operator1=almo_scf_env%domain_s_inv(:, ispin), &
   10031             :                      dpattern=quench_t(ispin), &
   10032             :                      map=almo_scf_env%domain_map(ispin), &
   10033             :                      node_of_domain=almo_scf_env%cpu_of_domain, &
   10034             :                      my_action=0, &
   10035          82 :                      filter_eps=almo_scf_env%eps_filter)
   10036             : 
   10037             :                END IF
   10038             : 
   10039             :             END DO ! ispin
   10040             : 
   10041             :             CALL compute_cg_beta( &
   10042             :                beta=beta, &
   10043             :                reset_conjugator=reset_conjugator, &
   10044             :                conjugator=optimizer%conjugator, &
   10045             :                grad=m_model_r(:), &
   10046             :                prev_grad=m_model_r_prev(:), &
   10047             :                step=m_model_rt(:), &
   10048             :                prev_step=m_model_rt_prev(:) &
   10049          82 :                )
   10050             : 
   10051         164 :             DO ispin = 1, nspins
   10052             :                ! update direction
   10053         164 :                CALL dbcsr_add(m_model_d(ispin), m_model_rt(ispin), beta, 1.0_dp)
   10054             :             END DO ! ispin
   10055             : 
   10056          82 :             t2 = m_walltime()
   10057             :             CALL fixed_r_report(unit_nr, &
   10058             :                                 iter_type=1, &
   10059             :                                 iteration=iteration, &
   10060             :                                 step_size=step_size, &
   10061             :                                 border_reached=border_reached, &
   10062             :                                 curvature=y_scalar, &
   10063             :                                 grad_norm_ratio=grad_norm_ratio, &
   10064          82 :                                 time=t2 - t1)
   10065          82 :             t1 = m_walltime()
   10066             : 
   10067             :          END DO fixed_r_loop
   10068             :          !!!! done with the inner loop
   10069             :          ! the inner loop must return: step, predicted reduction,
   10070             :          ! whether it reached the border and completed successfully
   10071             : 
   10072             :          IF (.NOT. inner_loop_success) THEN
   10073           0 :             CPABORT("Inner loop did not produce solution")
   10074             :          END IF
   10075             : 
   10076         816 :          DO ispin = 1, nspins
   10077             : 
   10078         408 :             CALL dbcsr_copy(m_theta_trial(ispin), m_theta(ispin))
   10079         816 :             CALL dbcsr_add(m_theta_trial(ispin), step(ispin), 1.0_dp, 1.0_dp)
   10080             : 
   10081             :          END DO ! ispin
   10082             : 
   10083             :          ! compute the energy
   10084             :          !IF (.NOT. same_position) THEN
   10085             :          CALL main_var_to_xalmos_and_loss_func( &
   10086             :             almo_scf_env=almo_scf_env, &
   10087             :             qs_env=qs_env, &
   10088             :             m_main_var_in=m_theta_trial, &
   10089             :             m_t_out=matrix_t_out, &
   10090             :             m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
   10091             :             energy_out=energy_trial, &
   10092             :             penalty_out=penalty_trial, &
   10093             :             m_FTsiginv_out=FTsiginv, &
   10094             :             m_siginvTFTsiginv_out=siginvTFTsiginv, &
   10095             :             m_ST_out=ST, &
   10096             :             m_STsiginv0_in=STsiginv_0, &
   10097             :             m_quench_t_in=quench_t, &
   10098             :             domain_r_down_in=domain_r_down, &
   10099             :             assume_t0_q0x=assume_t0_q0x, &
   10100             :             just_started=.FALSE., &
   10101             :             optimize_theta=optimize_theta, &
   10102             :             normalize_orbitals=normalize_orbitals, &
   10103             :             perturbation_only=perturbation_only, &
   10104             :             do_penalty=penalty_occ_vol, &
   10105         408 :             special_case=my_special_case)
   10106         408 :          loss_trial = energy_trial + penalty_trial
   10107             :          !ENDIF ! not same_position
   10108             : 
   10109         408 :          rho = (loss_trial - loss_start)/expected_reduction
   10110         408 :          loss_change_to_report = loss_trial - loss_start
   10111             : 
   10112         408 :          IF (rho < 0.25_dp) THEN
   10113           0 :             radius_current = 0.25_dp*radius_current
   10114             :          ELSE
   10115         408 :             IF (rho > 0.75_dp .AND. border_reached) THEN
   10116           2 :                radius_current = MIN(2.0_dp*radius_current, radius_max)
   10117             :             END IF
   10118             :          END IF ! radius adjustment
   10119             : 
   10120         408 :          IF (rho > eta) THEN
   10121         816 :             DO ispin = 1, nspins
   10122         816 :                CALL dbcsr_copy(m_theta(ispin), m_theta_trial(ispin))
   10123             :             END DO ! ispin
   10124         408 :             loss_start = loss_trial
   10125         408 :             energy_start = energy_trial
   10126         408 :             penalty_start = penalty_trial
   10127         408 :             same_position = .FALSE.
   10128         408 :             IF (my_special_case .EQ. xalmo_case_block_diag) THEN
   10129          92 :                almo_scf_env%almo_scf_energy = energy_trial
   10130             :             END IF
   10131             :          ELSE
   10132           0 :             same_position = .TRUE.
   10133           0 :             IF (my_special_case .EQ. xalmo_case_block_diag) THEN
   10134           0 :                almo_scf_env%almo_scf_energy = energy_start
   10135             :             END IF
   10136             :          END IF ! finalize step
   10137             : 
   10138         408 :          t2outer = m_walltime()
   10139             :          CALL trust_r_report(unit_nr, &
   10140             :                              iter_type=2, &
   10141             :                              iteration=outer_iteration, &
   10142             :                              loss=loss_trial, &
   10143             :                              delta_loss=loss_change_to_report, &
   10144             :                              grad_norm=0.0_dp, &
   10145             :                              predicted_reduction=expected_reduction, &
   10146             :                              rho=rho, &
   10147             :                              radius=radius_current, &
   10148             :                              new=.NOT. same_position, &
   10149         408 :                              time=t2outer - t1outer)
   10150         426 :          t1outer = m_walltime()
   10151             : 
   10152             :       END DO adjust_r_loop
   10153             : 
   10154             :       ! post SCF-loop calculations
   10155          18 :       IF (scf_converged) THEN
   10156             : 
   10157             :          CALL wrap_up_xalmo_scf( &
   10158             :             qs_env=qs_env, &
   10159             :             almo_scf_env=almo_scf_env, &
   10160             :             perturbation_in=perturbation_only, &
   10161             :             m_xalmo_in=matrix_t_out, &
   10162             :             m_quench_in=quench_t, &
   10163          18 :             energy_inout=energy_start)
   10164             : 
   10165             :       END IF ! if converged
   10166             : 
   10167          36 :       DO ispin = 1, nspins
   10168          18 :          CALL dbcsr_release(m_model_hessian_inv(ispin))
   10169          18 :          CALL dbcsr_release(m_model_hessian(ispin))
   10170          18 :          CALL dbcsr_release(STsiginv_0(ispin))
   10171          18 :          CALL dbcsr_release(ST(ispin))
   10172          18 :          CALL dbcsr_release(FTsiginv(ispin))
   10173          18 :          CALL dbcsr_release(siginvTFTsiginv(ispin))
   10174          18 :          CALL dbcsr_release(prev_step(ispin))
   10175          18 :          CALL dbcsr_release(grad(ispin))
   10176          18 :          CALL dbcsr_release(step(ispin))
   10177          18 :          CALL dbcsr_release(m_theta(ispin))
   10178          18 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
   10179          18 :          CALL dbcsr_release(m_model_r(ispin))
   10180          18 :          CALL dbcsr_release(m_model_rt(ispin))
   10181          18 :          CALL dbcsr_release(m_model_d(ispin))
   10182          18 :          CALL dbcsr_release(m_model_Bd(ispin))
   10183          18 :          CALL dbcsr_release(m_model_r_prev(ispin))
   10184          18 :          CALL dbcsr_release(m_model_rt_prev(ispin))
   10185          18 :          CALL dbcsr_release(m_theta_trial(ispin))
   10186          18 :          CALL release_submatrices(domain_r_down(:, ispin))
   10187          36 :          CALL release_submatrices(domain_model_hessian_inv(:, ispin))
   10188             :       END DO ! ispin
   10189             : 
   10190          18 :       IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
   10191             :           my_special_case .EQ. xalmo_case_fully_deloc) THEN
   10192           2 :          CALL dbcsr_release(m_s_inv)
   10193             :       END IF
   10194             : 
   10195          18 :       DEALLOCATE (m_model_hessian)
   10196          18 :       DEALLOCATE (m_model_hessian_inv)
   10197          18 :       DEALLOCATE (siginvTFTsiginv)
   10198          18 :       DEALLOCATE (STsiginv_0)
   10199          18 :       DEALLOCATE (FTsiginv)
   10200          18 :       DEALLOCATE (ST)
   10201          18 :       DEALLOCATE (grad)
   10202          18 :       DEALLOCATE (prev_step)
   10203          18 :       DEALLOCATE (step)
   10204          18 :       DEALLOCATE (m_sig_sqrti_ii)
   10205          18 :       DEALLOCATE (m_model_r)
   10206          18 :       DEALLOCATE (m_model_rt)
   10207          18 :       DEALLOCATE (m_model_d)
   10208          18 :       DEALLOCATE (m_model_Bd)
   10209          18 :       DEALLOCATE (m_model_r_prev)
   10210          18 :       DEALLOCATE (m_model_rt_prev)
   10211          18 :       DEALLOCATE (m_theta_trial)
   10212             : 
   10213         146 :       DEALLOCATE (domain_r_down)
   10214         146 :       DEALLOCATE (domain_model_hessian_inv)
   10215             : 
   10216          18 :       DEALLOCATE (penalty_occ_vol_g_prefactor)
   10217          18 :       DEALLOCATE (penalty_occ_vol_h_prefactor)
   10218          18 :       DEALLOCATE (grad_norm_spin)
   10219          18 :       DEALLOCATE (nocc)
   10220             : 
   10221          18 :       DEALLOCATE (m_theta)
   10222             : 
   10223          18 :       IF (.NOT. scf_converged .AND. .NOT. optimizer%early_stopping_on) THEN
   10224           0 :          CPABORT("Optimization not converged! ")
   10225             :       END IF
   10226             : 
   10227          18 :       CALL timestop(handle)
   10228             : 
   10229          36 :    END SUBROUTINE almo_scf_xalmo_trustr
   10230             : 
   10231             : ! **************************************************************************************************
   10232             : !> \brief Computes molecular orbitals and the objective (loss) function from the main variables
   10233             : !>        Most important input and output variables are given as arguments explicitly.
   10234             : !>        Some variables inside almo_scf_env (KS, DM) and qs_env are also updated but are not
   10235             : !>        listed as arguments for brevity
   10236             : !> \param almo_scf_env ...
   10237             : !> \param qs_env ...
   10238             : !> \param m_main_var_in ...
   10239             : !> \param m_t_out ...
   10240             : !> \param energy_out ...
   10241             : !> \param penalty_out ...
   10242             : !> \param m_sig_sqrti_ii_out ...
   10243             : !> \param m_FTsiginv_out ...
   10244             : !> \param m_siginvTFTsiginv_out ...
   10245             : !> \param m_ST_out ...
   10246             : !> \param m_STsiginv0_in ...
   10247             : !> \param m_quench_t_in ...
   10248             : !> \param domain_r_down_in ...
   10249             : !> \param assume_t0_q0x ...
   10250             : !> \param just_started ...
   10251             : !> \param optimize_theta ...
   10252             : !> \param normalize_orbitals ...
   10253             : !> \param perturbation_only ...
   10254             : !> \param do_penalty ...
   10255             : !> \param special_case ...
   10256             : !> \par History
   10257             : !>       2019.12 created [Rustam Z Khaliullin]
   10258             : !> \author Rustam Z Khaliullin
   10259             : ! **************************************************************************************************
   10260        1474 :    SUBROUTINE main_var_to_xalmos_and_loss_func(almo_scf_env, qs_env, m_main_var_in, &
   10261        1474 :                                                m_t_out, energy_out, penalty_out, m_sig_sqrti_ii_out, m_FTsiginv_out, &
   10262        1474 :                                                m_siginvTFTsiginv_out, m_ST_out, m_STsiginv0_in, m_quench_t_in, domain_r_down_in, &
   10263             :                                                assume_t0_q0x, just_started, optimize_theta, normalize_orbitals, perturbation_only, &
   10264             :                                                do_penalty, special_case)
   10265             : 
   10266             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
   10267             :       TYPE(qs_environment_type), POINTER                 :: qs_env
   10268             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_main_var_in
   10269             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_t_out
   10270             :       REAL(KIND=dp), INTENT(OUT)                         :: energy_out, penalty_out
   10271             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_sig_sqrti_ii_out, m_FTsiginv_out, &
   10272             :                                                             m_siginvTFTsiginv_out, m_ST_out, &
   10273             :                                                             m_STsiginv0_in, m_quench_t_in
   10274             :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
   10275             :          INTENT(IN)                                      :: domain_r_down_in
   10276             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, just_started, &
   10277             :                                                             optimize_theta, normalize_orbitals, &
   10278             :                                                             perturbation_only, do_penalty
   10279             :       INTEGER, INTENT(IN)                                :: special_case
   10280             : 
   10281             :       CHARACTER(len=*), PARAMETER :: routineN = 'main_var_to_xalmos_and_loss_func'
   10282             : 
   10283             :       INTEGER                                            :: handle, ispin, nspins
   10284        1474 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10285             :       REAL(KIND=dp)                                      :: det1, energy_ispin, penalty_amplitude, &
   10286             :                                                             spin_factor
   10287             : 
   10288        1474 :       CALL timeset(routineN, handle)
   10289             : 
   10290        1474 :       energy_out = 0.0_dp
   10291        1474 :       penalty_out = 0.0_dp
   10292             : 
   10293        1474 :       nspins = SIZE(m_main_var_in)
   10294        1474 :       IF (nspins == 1) THEN
   10295        1474 :          spin_factor = 2.0_dp
   10296             :       ELSE
   10297           0 :          spin_factor = 1.0_dp
   10298             :       END IF
   10299             : 
   10300        1474 :       penalty_amplitude = 0.0_dp !almo_scf_env%penalty%occ_vol_coeff
   10301             : 
   10302        4422 :       ALLOCATE (nocc(nspins))
   10303        2948 :       DO ispin = 1, nspins
   10304             :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
   10305        2948 :                              nfullrows_total=nocc(ispin))
   10306             :       END DO
   10307             : 
   10308        2948 :       DO ispin = 1, nspins
   10309             : 
   10310             :          ! compute MO coefficients from the main variable
   10311             :          CALL compute_xalmos_from_main_var( &
   10312             :             m_var_in=m_main_var_in(ispin), &
   10313             :             m_t_out=m_t_out(ispin), &
   10314             :             m_quench_t=m_quench_t_in(ispin), &
   10315             :             m_t0=almo_scf_env%matrix_t_blk(ispin), &
   10316             :             m_oo_template=almo_scf_env%matrix_sigma_inv(ispin), &
   10317             :             m_STsiginv0=m_STsiginv0_in(ispin), &
   10318             :             m_s=almo_scf_env%matrix_s(1), &
   10319             :             m_sig_sqrti_ii_out=m_sig_sqrti_ii_out(ispin), &
   10320             :             domain_r_down=domain_r_down_in(:, ispin), &
   10321             :             domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
   10322             :             domain_map=almo_scf_env%domain_map(ispin), &
   10323             :             cpu_of_domain=almo_scf_env%cpu_of_domain, &
   10324             :             assume_t0_q0x=assume_t0_q0x, &
   10325             :             just_started=just_started, &
   10326             :             optimize_theta=optimize_theta, &
   10327             :             normalize_orbitals=normalize_orbitals, &
   10328             :             envelope_amplitude=almo_scf_env%envelope_amplitude, &
   10329             :             eps_filter=almo_scf_env%eps_filter, &
   10330             :             special_case=special_case, &
   10331             :             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
   10332             :             order_lanczos=almo_scf_env%order_lanczos, &
   10333             :             eps_lanczos=almo_scf_env%eps_lanczos, &
   10334        1474 :             max_iter_lanczos=almo_scf_env%max_iter_lanczos)
   10335             : 
   10336             :          ! compute the global projectors (for the density matrix)
   10337             :          CALL almo_scf_t_to_proj( &
   10338             :             t=m_t_out(ispin), &
   10339             :             p=almo_scf_env%matrix_p(ispin), &
   10340             :             eps_filter=almo_scf_env%eps_filter, &
   10341             :             orthog_orbs=.FALSE., &
   10342             :             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
   10343             :             s=almo_scf_env%matrix_s(1), &
   10344             :             sigma=almo_scf_env%matrix_sigma(ispin), &
   10345             :             sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
   10346             :             use_guess=.FALSE., &
   10347             :             algorithm=almo_scf_env%sigma_inv_algorithm, &
   10348             :             inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
   10349             :             inverse_accelerator=almo_scf_env%order_lanczos, &
   10350             :             eps_lanczos=almo_scf_env%eps_lanczos, &
   10351             :             max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
   10352             :             para_env=almo_scf_env%para_env, &
   10353        1474 :             blacs_env=almo_scf_env%blacs_env)
   10354             : 
   10355             :          ! compute dm from the projector(s)
   10356             :          CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
   10357        2948 :                           spin_factor)
   10358             : 
   10359             :       END DO ! ispin
   10360             : 
   10361             :       ! update the KS matrix and energy if necessary
   10362        1474 :       IF (perturbation_only) THEN
   10363             :          ! note: do not combine the two IF statements
   10364         212 :          IF (just_started) THEN
   10365          48 :             DO ispin = 1, nspins
   10366             :                CALL dbcsr_copy(almo_scf_env%matrix_ks(ispin), &
   10367          48 :                                almo_scf_env%matrix_ks_0deloc(ispin))
   10368             :             END DO
   10369             :          END IF
   10370             :       ELSE
   10371             :          ! the KS matrix is updated outside the spin loop
   10372             :          CALL almo_dm_to_almo_ks(qs_env, &
   10373             :                                  almo_scf_env%matrix_p, &
   10374             :                                  almo_scf_env%matrix_ks, &
   10375             :                                  energy_out, &
   10376             :                                  almo_scf_env%eps_filter, &
   10377        1262 :                                  almo_scf_env%mat_distr_aos)
   10378             :       END IF
   10379             : 
   10380        1474 :       penalty_out = 0.0_dp
   10381        2948 :       DO ispin = 1, nspins
   10382             : 
   10383             :          CALL compute_frequently_used_matrices( &
   10384             :             filter_eps=almo_scf_env%eps_filter, &
   10385             :             m_T_in=m_t_out(ispin), &
   10386             :             m_siginv_in=almo_scf_env%matrix_sigma_inv(ispin), &
   10387             :             m_S_in=almo_scf_env%matrix_s(1), &
   10388             :             m_F_in=almo_scf_env%matrix_ks(ispin), &
   10389             :             m_FTsiginv_out=m_FTsiginv_out(ispin), &
   10390             :             m_siginvTFTsiginv_out=m_siginvTFTsiginv_out(ispin), &
   10391        1474 :             m_ST_out=m_ST_out(ispin))
   10392             : 
   10393        1474 :          IF (perturbation_only) THEN
   10394             :             ! calculate objective function Tr(F_0 R)
   10395         212 :             IF (ispin .EQ. 1) energy_out = 0.0_dp
   10396         212 :             CALL dbcsr_dot(m_t_out(ispin), m_FTsiginv_out(ispin), energy_ispin)
   10397         212 :             energy_out = energy_out + energy_ispin*spin_factor
   10398             :          END IF
   10399             : 
   10400        2948 :          IF (do_penalty) THEN
   10401             : 
   10402             :             CALL determinant(almo_scf_env%matrix_sigma(ispin), det1, &
   10403           0 :                              almo_scf_env%eps_filter)
   10404             :             penalty_out = penalty_out - &
   10405           0 :                           penalty_amplitude*spin_factor*nocc(ispin)*LOG(det1)
   10406             : 
   10407             :          END IF
   10408             : 
   10409             :       END DO ! ispin
   10410             : 
   10411        1474 :       DEALLOCATE (nocc)
   10412             : 
   10413        1474 :       CALL timestop(handle)
   10414             : 
   10415        1474 :    END SUBROUTINE main_var_to_xalmos_and_loss_func
   10416             : 
   10417             : ! **************************************************************************************************
   10418             : !> \brief Computes the step size required to reach the trust-radius border,
   10419             : !>        measured from the origin,
   10420             : !>        given the current position (position) in the direction (direction)
   10421             : !> \param step_size_out ...
   10422             : !> \param metric_in ...
   10423             : !> \param position_in ...
   10424             : !> \param direction_in ...
   10425             : !> \param trust_radius_in ...
   10426             : !> \param quench_t_in ...
   10427             : !> \param eps_filter_in ...
   10428             : !> \par History
   10429             : !>       2019.12 created [Rustam Z Khaliullin]
   10430             : !> \author Rustam Z Khaliullin
   10431             : ! **************************************************************************************************
   10432          36 :    SUBROUTINE step_size_to_border(step_size_out, metric_in, position_in, &
   10433          36 :                                   direction_in, trust_radius_in, quench_t_in, eps_filter_in)
   10434             : 
   10435             :       REAL(KIND=dp), INTENT(INOUT)                       :: step_size_out
   10436             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: metric_in, position_in, direction_in
   10437             :       REAL(KIND=dp), INTENT(IN)                          :: trust_radius_in
   10438             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: quench_t_in
   10439             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter_in
   10440             : 
   10441             :       INTEGER                                            :: isol, ispin, nsolutions, &
   10442             :                                                             nsolutions_found, nspins
   10443          36 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10444             :       REAL(KIND=dp)                                      :: discrim_sign, discriminant, solution, &
   10445             :                                                             spin_factor, temp_real
   10446             :       REAL(KIND=dp), DIMENSION(3)                        :: coef
   10447          36 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10448             : 
   10449          36 :       step_size_out = 0.0_dp
   10450             : 
   10451          36 :       nspins = SIZE(position_in)
   10452          36 :       IF (nspins == 1) THEN
   10453             :          spin_factor = 2.0_dp
   10454             :       ELSE
   10455           0 :          spin_factor = 1.0_dp
   10456             :       END IF
   10457             : 
   10458         108 :       ALLOCATE (nocc(nspins))
   10459         144 :       ALLOCATE (m_temp_no(nspins))
   10460             : 
   10461          36 :       coef(:) = 0.0_dp
   10462          72 :       DO ispin = 1, nspins
   10463             : 
   10464             :          CALL dbcsr_create(m_temp_no(ispin), &
   10465          36 :                            template=direction_in(ispin))
   10466             : 
   10467             :          CALL dbcsr_get_info(direction_in(ispin), &
   10468          36 :                              nfullcols_total=nocc(ispin))
   10469             : 
   10470          36 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10471             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10472             :                              metric_in(1), &
   10473             :                              position_in(ispin), &
   10474             :                              0.0_dp, m_temp_no(ispin), &
   10475          36 :                              retain_sparsity=.TRUE.)
   10476          36 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10477          36 :          CALL dbcsr_dot(position_in(ispin), m_temp_no(ispin), temp_real)
   10478          36 :          coef(3) = coef(3) + temp_real/nocc(ispin)
   10479          36 :          CALL dbcsr_dot(direction_in(ispin), m_temp_no(ispin), temp_real)
   10480          36 :          coef(2) = coef(2) + 2.0_dp*temp_real/nocc(ispin)
   10481          36 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10482             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10483             :                              metric_in(1), &
   10484             :                              direction_in(ispin), &
   10485             :                              0.0_dp, m_temp_no(ispin), &
   10486          36 :                              retain_sparsity=.TRUE.)
   10487          36 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10488          36 :          CALL dbcsr_dot(direction_in(ispin), m_temp_no(ispin), temp_real)
   10489          36 :          coef(1) = coef(1) + temp_real/nocc(ispin)
   10490             : 
   10491         108 :          CALL dbcsr_release(m_temp_no(ispin))
   10492             : 
   10493             :       END DO !ispin
   10494             : 
   10495          36 :       DEALLOCATE (nocc)
   10496          36 :       DEALLOCATE (m_temp_no)
   10497             : 
   10498         144 :       coef(:) = coef(:)*spin_factor
   10499          36 :       coef(3) = coef(3) - trust_radius_in*trust_radius_in
   10500             : 
   10501             :       ! solve the quadratic equation
   10502          36 :       discriminant = coef(2)*coef(2) - 4.0_dp*coef(1)*coef(3)
   10503          36 :       IF (discriminant .GT. TINY(discriminant)) THEN
   10504             :          nsolutions = 2
   10505           0 :       ELSE IF (discriminant .LT. 0.0_dp) THEN
   10506           0 :          nsolutions = 0
   10507           0 :          CPABORT("Step to border: no solutions")
   10508             :       ELSE
   10509             :          nsolutions = 1
   10510             :       END IF
   10511             : 
   10512          36 :       discrim_sign = 1.0_dp
   10513          36 :       nsolutions_found = 0
   10514         108 :       DO isol = 1, nsolutions
   10515          72 :          solution = (-coef(2) + discrim_sign*SQRT(discriminant))/(2.0_dp*coef(1))
   10516          72 :          IF (solution .GT. 0.0_dp) THEN
   10517          36 :             nsolutions_found = nsolutions_found + 1
   10518          36 :             step_size_out = solution
   10519             :          END IF
   10520         108 :          discrim_sign = -discrim_sign
   10521             :       END DO
   10522             : 
   10523          36 :       IF (nsolutions_found == 0) THEN
   10524           0 :          CPABORT("Step to border: no positive solutions")
   10525          36 :       ELSE IF (nsolutions_found == 2) THEN
   10526           0 :          CPABORT("Two positive border steps possible!")
   10527             :       END IF
   10528             : 
   10529          36 :    END SUBROUTINE step_size_to_border
   10530             : 
   10531             : ! **************************************************************************************************
   10532             : !> \brief Computes a norm of a contravariant NBasis x Occ matrix using proper metric
   10533             : !> \param norm_out ...
   10534             : !> \param matrix_in ...
   10535             : !> \param metric_in ...
   10536             : !> \param quench_t_in ...
   10537             : !> \param eps_filter_in ...
   10538             : !> \par History
   10539             : !>       2019.12 created [Rustam Z Khaliullin]
   10540             : !> \author Rustam Z Khaliullin
   10541             : ! **************************************************************************************************
   10542         758 :    SUBROUTINE contravariant_matrix_norm(norm_out, matrix_in, metric_in, &
   10543         758 :                                         quench_t_in, eps_filter_in)
   10544             : 
   10545             :       REAL(KIND=dp), INTENT(OUT)                         :: norm_out
   10546             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: matrix_in, metric_in, quench_t_in
   10547             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter_in
   10548             : 
   10549             :       INTEGER                                            :: ispin, nspins
   10550         758 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10551             :       REAL(KIND=dp)                                      :: my_norm, spin_factor, temp_real
   10552         758 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10553             : 
   10554             :       ! Frist thing: assign the output value to avoid norms being undefined
   10555         758 :       norm_out = 0.0_dp
   10556             : 
   10557         758 :       nspins = SIZE(matrix_in)
   10558         758 :       IF (nspins == 1) THEN
   10559             :          spin_factor = 2.0_dp
   10560             :       ELSE
   10561           0 :          spin_factor = 1.0_dp
   10562             :       END IF
   10563             : 
   10564        2274 :       ALLOCATE (nocc(nspins))
   10565        3032 :       ALLOCATE (m_temp_no(nspins))
   10566             : 
   10567         758 :       my_norm = 0.0_dp
   10568        1516 :       DO ispin = 1, nspins
   10569             : 
   10570         758 :          CALL dbcsr_create(m_temp_no(ispin), template=matrix_in(ispin))
   10571             : 
   10572             :          CALL dbcsr_get_info(matrix_in(ispin), &
   10573         758 :                              nfullcols_total=nocc(ispin))
   10574             : 
   10575         758 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10576             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10577             :                              metric_in(1), &
   10578             :                              matrix_in(ispin), &
   10579             :                              0.0_dp, m_temp_no(ispin), &
   10580         758 :                              retain_sparsity=.TRUE.)
   10581         758 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10582         758 :          CALL dbcsr_dot(matrix_in(ispin), m_temp_no(ispin), temp_real)
   10583             : 
   10584         758 :          my_norm = my_norm + temp_real/nocc(ispin)
   10585             : 
   10586        1516 :          CALL dbcsr_release(m_temp_no(ispin))
   10587             : 
   10588             :       END DO !ispin
   10589             : 
   10590         758 :       DEALLOCATE (nocc)
   10591         758 :       DEALLOCATE (m_temp_no)
   10592             : 
   10593         758 :       my_norm = my_norm*spin_factor
   10594         758 :       norm_out = SQRT(my_norm)
   10595             : 
   10596         758 :    END SUBROUTINE contravariant_matrix_norm
   10597             : 
   10598             : ! **************************************************************************************************
   10599             : !> \brief Loss reduction for a given step is estimated using
   10600             : !>        gradient and hessian
   10601             : !> \param reduction_out ...
   10602             : !> \param grad_in ...
   10603             : !> \param step_in ...
   10604             : !> \param hess_in ...
   10605             : !> \param hess_submatrix_in ...
   10606             : !> \param quench_t_in ...
   10607             : !> \param special_case ...
   10608             : !> \param eps_filter ...
   10609             : !> \param domain_map ...
   10610             : !> \param cpu_of_domain ...
   10611             : !> \par History
   10612             : !>       2019.12 created [Rustam Z Khaliullin]
   10613             : !> \author Rustam Z Khaliullin
   10614             : ! **************************************************************************************************
   10615         408 :    SUBROUTINE predicted_reduction(reduction_out, grad_in, step_in, hess_in, &
   10616         408 :                                   hess_submatrix_in, quench_t_in, special_case, eps_filter, domain_map, &
   10617         408 :                                   cpu_of_domain)
   10618             : 
   10619             :       !RZK-noncritical: can be formulated without submatrices
   10620             :       REAL(KIND=dp), INTENT(INOUT)                       :: reduction_out
   10621             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: grad_in, step_in, hess_in
   10622             :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
   10623             :          INTENT(IN)                                      :: hess_submatrix_in
   10624             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: quench_t_in
   10625             :       INTEGER, INTENT(IN)                                :: special_case
   10626             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
   10627             :       TYPE(domain_map_type), DIMENSION(:), INTENT(IN)    :: domain_map
   10628             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
   10629             : 
   10630             :       INTEGER                                            :: ispin, nspins
   10631             :       REAL(KIND=dp)                                      :: my_reduction, spin_factor, temp_real
   10632         408 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10633             : 
   10634         408 :       reduction_out = 0.0_dp
   10635             : 
   10636         408 :       nspins = SIZE(grad_in)
   10637         408 :       IF (nspins == 1) THEN
   10638             :          spin_factor = 2.0_dp
   10639             :       ELSE
   10640           0 :          spin_factor = 1.0_dp
   10641             :       END IF
   10642             : 
   10643        1632 :       ALLOCATE (m_temp_no(nspins))
   10644             : 
   10645         408 :       my_reduction = 0.0_dp
   10646         816 :       DO ispin = 1, nspins
   10647             : 
   10648         408 :          CALL dbcsr_create(m_temp_no(ispin), template=grad_in(ispin))
   10649             : 
   10650         408 :          CALL dbcsr_dot(step_in(ispin), grad_in(ispin), temp_real)
   10651         408 :          my_reduction = my_reduction + temp_real
   10652             : 
   10653             :          ! Get Hess.step
   10654         408 :          IF (special_case .EQ. xalmo_case_block_diag .OR. &
   10655             :              special_case .EQ. xalmo_case_fully_deloc) THEN
   10656             : 
   10657             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10658             :                                 hess_in(ispin), &
   10659             :                                 step_in(ispin), &
   10660             :                                 0.0_dp, m_temp_no(ispin), &
   10661          92 :                                 filter_eps=eps_filter)
   10662             : 
   10663             :          ELSE
   10664             : 
   10665             :             CALL apply_domain_operators( &
   10666             :                matrix_in=step_in(ispin), &
   10667             :                matrix_out=m_temp_no(ispin), &
   10668             :                operator1=hess_submatrix_in(:, ispin), &
   10669             :                dpattern=quench_t_in(ispin), &
   10670             :                map=domain_map(ispin), &
   10671             :                node_of_domain=cpu_of_domain, &
   10672             :                my_action=0, &
   10673         316 :                filter_eps=eps_filter)
   10674             : 
   10675             :          END IF ! special case
   10676             : 
   10677             :          ! Get y=step^T.Hess.step
   10678         408 :          CALL dbcsr_dot(step_in(ispin), m_temp_no(ispin), temp_real)
   10679         408 :          my_reduction = my_reduction + 0.5_dp*temp_real
   10680             : 
   10681        1224 :          CALL dbcsr_release(m_temp_no(ispin))
   10682             : 
   10683             :       END DO ! ispin
   10684             : 
   10685             :       !RZK-critical: do we need to multiply by the spin factor?
   10686         408 :       my_reduction = spin_factor*my_reduction
   10687             : 
   10688         408 :       reduction_out = my_reduction
   10689             : 
   10690         408 :       DEALLOCATE (m_temp_no)
   10691             : 
   10692         408 :    END SUBROUTINE predicted_reduction
   10693             : 
   10694             : ! **************************************************************************************************
   10695             : !> \brief Prints key quantities from the fixed-radius minimizer
   10696             : !> \param unit_nr ...
   10697             : !> \param iter_type ...
   10698             : !> \param iteration ...
   10699             : !> \param step_size ...
   10700             : !> \param border_reached ...
   10701             : !> \param curvature ...
   10702             : !> \param grad_norm_ratio ...
   10703             : !> \param predicted_reduction ...
   10704             : !> \param time ...
   10705             : !> \par History
   10706             : !>       2019.12 created [Rustam Z Khaliullin]
   10707             : !> \author Rustam Z Khaliullin
   10708             : ! **************************************************************************************************
   10709         898 :    SUBROUTINE fixed_r_report(unit_nr, iter_type, iteration, step_size, &
   10710             :                              border_reached, curvature, grad_norm_ratio, predicted_reduction, time)
   10711             : 
   10712             :       INTEGER, INTENT(IN)                                :: unit_nr, iter_type, iteration
   10713             :       REAL(KIND=dp), INTENT(IN)                          :: step_size
   10714             :       LOGICAL, INTENT(IN)                                :: border_reached
   10715             :       REAL(KIND=dp), INTENT(IN)                          :: curvature
   10716             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: grad_norm_ratio, predicted_reduction
   10717             :       REAL(KIND=dp), INTENT(IN)                          :: time
   10718             : 
   10719             :       CHARACTER(LEN=20)                                  :: iter_type_str
   10720             :       REAL(KIND=dp)                                      :: loss_or_grad_change
   10721             : 
   10722         898 :       loss_or_grad_change = 0.0_dp
   10723         898 :       IF (PRESENT(grad_norm_ratio)) THEN
   10724         898 :          loss_or_grad_change = grad_norm_ratio
   10725           0 :       ELSE IF (PRESENT(predicted_reduction)) THEN
   10726           0 :          loss_or_grad_change = predicted_reduction
   10727             :       ELSE
   10728           0 :          CPABORT("one argument is missing")
   10729             :       END IF
   10730             : 
   10731        1306 :       SELECT CASE (iter_type)
   10732             :       CASE (0)
   10733         408 :          iter_type_str = TRIM("Ignored")
   10734             :       CASE (1)
   10735          82 :          iter_type_str = TRIM("PCG")
   10736             :       CASE (2)
   10737           0 :          iter_type_str = TRIM("Neg. curvatr.")
   10738             :       CASE (3)
   10739          34 :          iter_type_str = TRIM("Step too long")
   10740             :       CASE (4)
   10741          26 :          iter_type_str = TRIM("Grad. reduced")
   10742             :       CASE (5)
   10743          80 :          iter_type_str = TRIM("Cauchy point")
   10744             :       CASE (6)
   10745         266 :          iter_type_str = TRIM("Full dogleg")
   10746             :       CASE (7)
   10747           2 :          iter_type_str = TRIM("Part. dogleg")
   10748             :       CASE DEFAULT
   10749         898 :          CPABORT("unknown report type")
   10750             :       END SELECT
   10751             : 
   10752         898 :       IF (unit_nr > 0) THEN
   10753             : 
   10754         204 :          SELECT CASE (iter_type)
   10755             :          CASE (0)
   10756             : 
   10757         204 :             WRITE (unit_nr, *)
   10758             :             WRITE (unit_nr, '(T4,A15,A6,A10,A10,A7,A20,A8)') &
   10759         204 :                "Action", &
   10760         204 :                "Iter", &
   10761         204 :                "Curv", &
   10762         204 :                "Step", &
   10763         204 :                "Edge?", &
   10764         204 :                "Grad/o.f. reduc", &
   10765         408 :                "Time"
   10766             : 
   10767             :          CASE DEFAULT
   10768             : 
   10769             :             WRITE (unit_nr, '(T4,A15,I6,F10.5,F10.5,L7,F20.10,F8.2)') &
   10770         245 :                iter_type_str, &
   10771         245 :                iteration, &
   10772         245 :                curvature, step_size, border_reached, &
   10773         245 :                loss_or_grad_change, &
   10774         694 :                time
   10775             : 
   10776             :          END SELECT
   10777             : 
   10778             :          ! epilogue
   10779         204 :          SELECT CASE (iter_type)
   10780             :          CASE (2, 3, 4, 5, 6, 7)
   10781             : 
   10782         449 :             WRITE (unit_nr, *)
   10783             : 
   10784             :          END SELECT
   10785             : 
   10786             :       END IF
   10787             : 
   10788         898 :    END SUBROUTINE fixed_r_report
   10789             : 
   10790             : ! **************************************************************************************************
   10791             : !> \brief Prints key quantities from the loop that tunes trust radius
   10792             : !> \param unit_nr ...
   10793             : !> \param iter_type ...
   10794             : !> \param iteration ...
   10795             : !> \param radius ...
   10796             : !> \param loss ...
   10797             : !> \param delta_loss ...
   10798             : !> \param grad_norm ...
   10799             : !> \param predicted_reduction ...
   10800             : !> \param rho ...
   10801             : !> \param new ...
   10802             : !> \param time ...
   10803             : !> \par History
   10804             : !>       2019.12 created [Rustam Z Khaliullin]
   10805             : !> \author Rustam Z Khaliullin
   10806             : ! **************************************************************************************************
   10807         843 :    SUBROUTINE trust_r_report(unit_nr, iter_type, iteration, radius, &
   10808             :                              loss, delta_loss, grad_norm, predicted_reduction, rho, new, time)
   10809             : 
   10810             :       INTEGER, INTENT(IN)                                :: unit_nr, iter_type, iteration
   10811             :       REAL(KIND=dp), INTENT(IN)                          :: radius, loss, delta_loss, grad_norm, &
   10812             :                                                             predicted_reduction, rho
   10813             :       LOGICAL, INTENT(IN)                                :: new
   10814             :       REAL(KIND=dp), INTENT(IN)                          :: time
   10815             : 
   10816             :       CHARACTER(LEN=20)                                  :: iter_status, iter_type_str
   10817             : 
   10818         852 :       SELECT CASE (iter_type)
   10819             :       CASE (0) ! header
   10820           9 :          iter_type_str = TRIM("Iter")
   10821           9 :          iter_status = TRIM("Stat")
   10822             :       CASE (1) ! first iteration, not all data is available yet
   10823         426 :          iter_type_str = TRIM("TR INI")
   10824         426 :          IF (new) THEN
   10825         426 :             iter_status = "  New" ! new point
   10826             :          ELSE
   10827           0 :             iter_status = " Redo" ! restarted
   10828             :          END IF
   10829             :       CASE (2) ! typical
   10830         408 :          iter_type_str = TRIM("TR FIN")
   10831         408 :          IF (new) THEN
   10832         408 :             iter_status = "  Acc" ! accepted
   10833             :          ELSE
   10834           0 :             iter_status = "  Rej" ! rejected
   10835             :          END IF
   10836             :       CASE DEFAULT
   10837         843 :          CPABORT("unknown report type")
   10838             :       END SELECT
   10839             : 
   10840         843 :       IF (unit_nr > 0) THEN
   10841             : 
   10842           9 :          SELECT CASE (iter_type)
   10843             :          CASE (0)
   10844             : 
   10845             :             WRITE (unit_nr, '(T2,A6,A5,A6,A22,A10,T67,A7,A6)') &
   10846           9 :                "Method", &
   10847           9 :                "Stat", &
   10848           9 :                "Iter", &
   10849           9 :                "Objective Function", &
   10850           9 :                "Conver", &!"Model Change", "Rho", &
   10851           9 :                "Radius", &
   10852          18 :                "Time"
   10853             :             WRITE (unit_nr, '(T41,A10,A10,A6)') &
   10854             :                !"Method", &
   10855             :                !"Iter", &
   10856             :                !"Objective Function", &
   10857           9 :                "Change", "Expct.", "Rho"
   10858             :             !"Radius", &
   10859             :             !"Time"
   10860             : 
   10861             :          CASE (1)
   10862             : 
   10863             :             WRITE (unit_nr, '(T2,A6,A5,I6,F22.10,ES10.2,T67,ES7.0,F6.1)') &
   10864         213 :                iter_type_str, &
   10865         213 :                iter_status, &
   10866         213 :                iteration, &
   10867         213 :                loss, &
   10868         213 :                grad_norm, & ! distinct
   10869         213 :                radius, &
   10870         426 :                time
   10871             : 
   10872             :          CASE (2)
   10873             : 
   10874             :             WRITE (unit_nr, '(T2,A6,A5,I6,F22.10,ES10.2,ES10.2,F6.1,ES7.0,F6.1)') &
   10875         204 :                iter_type_str, &
   10876         204 :                iter_status, &
   10877         204 :                iteration, &
   10878         204 :                loss, &
   10879         204 :                delta_loss, predicted_reduction, rho, & ! distinct
   10880         204 :                radius, &
   10881         630 :                time
   10882             : 
   10883             :          END SELECT
   10884             :       END IF
   10885             : 
   10886         843 :    END SUBROUTINE trust_r_report
   10887             : 
   10888             : ! **************************************************************************************************
   10889             : !> \brief ...
   10890             : !> \param unit_nr ...
   10891             : !> \param ref_energy ...
   10892             : !> \param energy_lowering ...
   10893             : ! **************************************************************************************************
   10894          26 :    SUBROUTINE energy_lowering_report(unit_nr, ref_energy, energy_lowering)
   10895             : 
   10896             :       INTEGER, INTENT(IN)                                :: unit_nr
   10897             :       REAL(KIND=dp), INTENT(IN)                          :: ref_energy, energy_lowering
   10898             : 
   10899             :       ! print out the energy lowering
   10900          26 :       IF (unit_nr > 0) THEN
   10901          13 :          WRITE (unit_nr, *)
   10902          13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "ENERGY OF BLOCK-DIAGONAL ALMOs:", &
   10903          26 :             ref_energy
   10904          13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "ENERGY LOWERING:", &
   10905          26 :             energy_lowering
   10906          13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "CORRECTED ENERGY:", &
   10907          26 :             ref_energy + energy_lowering
   10908          13 :          WRITE (unit_nr, *)
   10909             :       END IF
   10910             : 
   10911          26 :    END SUBROUTINE energy_lowering_report
   10912             : 
   10913             :    ! post SCF-loop calculations
   10914             : ! **************************************************************************************************
   10915             : !> \brief ...
   10916             : !> \param qs_env ...
   10917             : !> \param almo_scf_env ...
   10918             : !> \param perturbation_in ...
   10919             : !> \param m_xalmo_in ...
   10920             : !> \param m_quench_in ...
   10921             : !> \param energy_inout ...
   10922             : ! **************************************************************************************************
   10923         104 :    SUBROUTINE wrap_up_xalmo_scf(qs_env, almo_scf_env, perturbation_in, &
   10924         104 :                                 m_xalmo_in, m_quench_in, energy_inout)
   10925             : 
   10926             :       TYPE(qs_environment_type), POINTER                 :: qs_env
   10927             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
   10928             :       LOGICAL, INTENT(IN)                                :: perturbation_in
   10929             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_xalmo_in, m_quench_in
   10930             :       REAL(KIND=dp), INTENT(INOUT)                       :: energy_inout
   10931             : 
   10932             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'wrap_up_xalmo_scf'
   10933             : 
   10934             :       INTEGER                                            :: eda_unit, handle, ispin, nspins, unit_nr
   10935             :       TYPE(cp_logger_type), POINTER                      :: logger
   10936         104 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no1, m_temp_no2
   10937             :       TYPE(section_vals_type), POINTER                   :: almo_print_section, input
   10938             : 
   10939         104 :       CALL timeset(routineN, handle)
   10940             : 
   10941             :       ! get a useful output_unit
   10942         104 :       logger => cp_get_default_logger()
   10943         104 :       IF (logger%para_env%is_source()) THEN
   10944          52 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
   10945             :       ELSE
   10946          52 :          unit_nr = -1
   10947             :       END IF
   10948             : 
   10949         104 :       nspins = almo_scf_env%nspins
   10950             : 
   10951             :       ! RZK-warning: must obtain MO coefficients from final theta
   10952             : 
   10953         104 :       IF (perturbation_in) THEN
   10954             : 
   10955          96 :          ALLOCATE (m_temp_no1(nspins))
   10956          72 :          ALLOCATE (m_temp_no2(nspins))
   10957             : 
   10958          48 :          DO ispin = 1, nspins
   10959          24 :             CALL dbcsr_create(m_temp_no1(ispin), template=m_xalmo_in(ispin))
   10960          48 :             CALL dbcsr_create(m_temp_no2(ispin), template=m_xalmo_in(ispin))
   10961             :          END DO
   10962             : 
   10963             :          ! return perturbed density to qs_env
   10964             :          CALL almo_dm_to_qs_env(qs_env, almo_scf_env%matrix_p, &
   10965          24 :                                 almo_scf_env%mat_distr_aos)
   10966             : 
   10967             :          ! compute energy correction and perform
   10968             :          ! detailed decomposition analysis (if requested)
   10969             :          ! reuse step and grad matrices to store decomposition results
   10970             :          CALL xalmo_analysis( &
   10971             :             detailed_analysis=almo_scf_env%almo_analysis%do_analysis, &
   10972             :             eps_filter=almo_scf_env%eps_filter, &
   10973             :             m_T_in=m_xalmo_in, &
   10974             :             m_T0_in=almo_scf_env%matrix_t_blk, &
   10975             :             m_siginv_in=almo_scf_env%matrix_sigma_inv, &
   10976             :             m_siginv0_in=almo_scf_env%matrix_sigma_inv_0deloc, &
   10977             :             m_S_in=almo_scf_env%matrix_s, &
   10978             :             m_KS0_in=almo_scf_env%matrix_ks_0deloc, &
   10979             :             m_quench_t_in=m_quench_in, &
   10980             :             energy_out=energy_inout, & ! get energy loewring
   10981             :             m_eda_out=m_temp_no1, &
   10982             :             m_cta_out=m_temp_no2 &
   10983          24 :             )
   10984             : 
   10985          24 :          IF (almo_scf_env%almo_analysis%do_analysis) THEN
   10986             : 
   10987           4 :             DO ispin = 1, nspins
   10988             : 
   10989             :                ! energy decomposition analysis (EDA)
   10990           2 :                IF (unit_nr > 0) THEN
   10991           1 :                   WRITE (unit_nr, '(T2,A)') "DECOMPOSITION OF THE DELOCALIZATION ENERGY"
   10992             :                END IF
   10993             : 
   10994             :                ! open the output file, print and close
   10995           2 :                CALL get_qs_env(qs_env, input=input)
   10996           2 :                almo_print_section => section_vals_get_subs_vals(input, "DFT%ALMO_SCF%ANALYSIS%PRINT")
   10997             :                eda_unit = cp_print_key_unit_nr(logger, almo_print_section, &
   10998           2 :                                                "ALMO_EDA_CT", extension=".dat", local=.TRUE.)
   10999           2 :                CALL print_block_sum(m_temp_no1(ispin), eda_unit)
   11000             :                CALL cp_print_key_finished_output(eda_unit, logger, almo_print_section, &
   11001           2 :                                                  "ALMO_EDA_CT", local=.TRUE.)
   11002             : 
   11003             :                ! charge transfer analysis (CTA)
   11004           2 :                IF (unit_nr > 0) THEN
   11005           1 :                   WRITE (unit_nr, '(T2,A)') "DECOMPOSITION OF CHARGE TRANSFER TERMS"
   11006             :                END IF
   11007             : 
   11008             :                eda_unit = cp_print_key_unit_nr(logger, almo_print_section, &
   11009           2 :                                                "ALMO_CTA", extension=".dat", local=.TRUE.)
   11010           2 :                CALL print_block_sum(m_temp_no2(ispin), eda_unit)
   11011             :                CALL cp_print_key_finished_output(eda_unit, logger, almo_print_section, &
   11012           4 :                                                  "ALMO_CTA", local=.TRUE.)
   11013             : 
   11014             :             END DO ! ispin
   11015             : 
   11016             :          END IF ! do ALMO EDA/CTA
   11017             : 
   11018             :          CALL energy_lowering_report( &
   11019             :             unit_nr=unit_nr, &
   11020             :             ref_energy=almo_scf_env%almo_scf_energy, &
   11021          24 :             energy_lowering=energy_inout)
   11022             :          CALL almo_scf_update_ks_energy(qs_env, &
   11023             :                                         energy=almo_scf_env%almo_scf_energy, &
   11024          24 :                                         energy_singles_corr=energy_inout)
   11025             : 
   11026          48 :          DO ispin = 1, nspins
   11027          24 :             CALL dbcsr_release(m_temp_no1(ispin))
   11028          48 :             CALL dbcsr_release(m_temp_no2(ispin))
   11029             :          END DO
   11030             : 
   11031          24 :          DEALLOCATE (m_temp_no1)
   11032          24 :          DEALLOCATE (m_temp_no2)
   11033             : 
   11034             :       ELSE ! non-perturbative
   11035             : 
   11036             :          CALL almo_scf_update_ks_energy(qs_env, &
   11037          80 :                                         energy=energy_inout)
   11038             : 
   11039             :       END IF ! if perturbation only
   11040             : 
   11041         104 :       CALL timestop(handle)
   11042             : 
   11043         104 :    END SUBROUTINE wrap_up_xalmo_scf
   11044             : 
   11045             : ! **************************************************************************************************
   11046             : !> \brief Computes tanh(alpha*x) of the matrix elements. Fails if |alpha*x| >= 1.
   11047             : !> \param matrix ...
   11048             : !> \param alpha ...
   11049             : !> \author Ole Schuett
   11050             : ! **************************************************************************************************
   11051           0 :    SUBROUTINE tanh_of_elements(matrix, alpha)
   11052             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
   11053             :       REAL(kind=dp), INTENT(IN)                          :: alpha
   11054             : 
   11055             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'tanh_of_elements'
   11056             : 
   11057             :       INTEGER                                            :: handle
   11058           0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: block
   11059             :       TYPE(dbcsr_iterator_type)                          :: iter
   11060             : 
   11061           0 :       CALL timeset(routineN, handle)
   11062           0 :       CALL dbcsr_iterator_start(iter, matrix)
   11063           0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
   11064           0 :          CALL dbcsr_iterator_next_block(iter, block=block)
   11065           0 :          block = TANH(alpha*block)
   11066             :       END DO
   11067           0 :       CALL dbcsr_iterator_stop(iter)
   11068           0 :       CALL timestop(handle)
   11069             : 
   11070           0 :    END SUBROUTINE tanh_of_elements
   11071             : 
   11072             : ! **************************************************************************************************
   11073             : !> \brief Computes d(tanh(alpha*x)) / dx of the matrix elements. Fails if |alpha*x| >= 1.
   11074             : !> \param matrix ...
   11075             : !> \param alpha ...
   11076             : !> \author Ole Schuett
   11077             : ! **************************************************************************************************
   11078           0 :    SUBROUTINE dtanh_of_elements(matrix, alpha)
   11079             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
   11080             :       REAL(kind=dp), INTENT(IN)                          :: alpha
   11081             : 
   11082             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'dtanh_of_elements'
   11083             : 
   11084             :       INTEGER                                            :: handle
   11085           0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: block
   11086             :       TYPE(dbcsr_iterator_type)                          :: iter
   11087             : 
   11088           0 :       CALL timeset(routineN, handle)
   11089           0 :       CALL dbcsr_iterator_start(iter, matrix)
   11090           0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
   11091           0 :          CALL dbcsr_iterator_next_block(iter, block=block)
   11092           0 :          block = alpha*(1.0_dp - TANH(block)**2)
   11093             :       END DO
   11094           0 :       CALL dbcsr_iterator_stop(iter)
   11095           0 :       CALL timestop(handle)
   11096             : 
   11097           0 :    END SUBROUTINE dtanh_of_elements
   11098             : 
   11099             : ! **************************************************************************************************
   11100             : !> \brief Computes 1/x of the matrix elements.
   11101             : !> \param matrix ...
   11102             : !> \author Ole Schuett
   11103             : ! **************************************************************************************************
   11104           0 :    SUBROUTINE inverse_of_elements(matrix)
   11105             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
   11106             : 
   11107             :       CHARACTER(len=*), PARAMETER :: routineN = 'inverse_of_elements'
   11108             : 
   11109             :       INTEGER                                            :: handle
   11110           0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: block
   11111             :       TYPE(dbcsr_iterator_type)                          :: iter
   11112             : 
   11113           0 :       CALL timeset(routineN, handle)
   11114           0 :       CALL dbcsr_iterator_start(iter, matrix)
   11115           0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
   11116           0 :          CALL dbcsr_iterator_next_block(iter, block=block)
   11117           0 :          block = 1.0_dp/block
   11118             :       END DO
   11119           0 :       CALL dbcsr_iterator_stop(iter)
   11120           0 :       CALL timestop(handle)
   11121             : 
   11122           0 :    END SUBROUTINE inverse_of_elements
   11123             : 
   11124             : ! **************************************************************************************************
   11125             : !> \brief Prints the sum of the elements for each block.
   11126             : !> \param matrix ...
   11127             : !> \param unit_nr ...
   11128             : ! **************************************************************************************************
   11129           4 :    SUBROUTINE print_block_sum(matrix, unit_nr)
   11130             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix
   11131             :       INTEGER, INTENT(IN)                                :: unit_nr
   11132             : 
   11133             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'print_block_sum'
   11134             : 
   11135             :       INTEGER                                            :: col, handle, row
   11136           4 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block
   11137             :       TYPE(dbcsr_iterator_type)                          :: iter
   11138             : 
   11139           4 :       CALL timeset(routineN, handle)
   11140             : 
   11141           4 :       IF (unit_nr > 0) THEN
   11142           4 :          CALL dbcsr_iterator_readonly_start(iter, matrix)
   11143          34 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
   11144          30 :             CALL dbcsr_iterator_next_block(iter, row, col, block)
   11145        2914 :             WRITE (unit_nr, '(I6,I6,ES18.9)') row, col, SUM(block)
   11146             :          END DO
   11147           4 :          CALL dbcsr_iterator_stop(iter)
   11148             :       END IF
   11149             : 
   11150           4 :       CALL timestop(handle)
   11151           4 :    END SUBROUTINE print_block_sum
   11152             : 
   11153             : END MODULE almo_scf_optimizer
   11154             : 

Generated by: LCOV version 1.15