LCOV - code coverage report
Current view: top level - src - almo_scf_optimizer.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:1a29073) Lines: 1710 3112 54.9 %
Date: 2024-04-17 06:30:47 Functions: 21 30 70.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Optimization routines for all ALMO-based SCF methods
      10             : !> \par History
      11             : !>       2011.05 created [Rustam Z Khaliullin]
      12             : !>       2014.10 as a separate file [Rustam Z Khaliullin]
      13             : !> \author Rustam Z Khaliullin
      14             : ! **************************************************************************************************
      15             : MODULE almo_scf_optimizer
      16             :    USE almo_scf_diis_types,             ONLY: almo_scf_diis_extrapolate,&
      17             :                                               almo_scf_diis_init,&
      18             :                                               almo_scf_diis_push,&
      19             :                                               almo_scf_diis_release,&
      20             :                                               almo_scf_diis_type
      21             :    USE almo_scf_lbfgs_types,            ONLY: lbfgs_create,&
      22             :                                               lbfgs_get_direction,&
      23             :                                               lbfgs_history_type,&
      24             :                                               lbfgs_release,&
      25             :                                               lbfgs_seed
      26             :    USE almo_scf_methods,                ONLY: &
      27             :         almo_scf_ks_blk_to_tv_blk, almo_scf_ks_to_ks_blk, almo_scf_ks_to_ks_xx, &
      28             :         almo_scf_ks_xx_to_tv_xx, almo_scf_p_blk_to_t_blk, almo_scf_t_rescaling, &
      29             :         almo_scf_t_to_proj, apply_domain_operators, apply_projector, &
      30             :         construct_domain_preconditioner, construct_domain_r_down, construct_domain_s_inv, &
      31             :         construct_domain_s_sqrt, fill_matrix_with_ones, get_overlap, orthogonalize_mos, &
      32             :         pseudo_invert_diagonal_blk, xalmo_initial_guess
      33             :    USE almo_scf_qs,                     ONLY: almo_dm_to_almo_ks,&
      34             :                                               almo_dm_to_qs_env,&
      35             :                                               almo_scf_update_ks_energy,&
      36             :                                               matrix_qs_to_almo
      37             :    USE almo_scf_types,                  ONLY: almo_scf_env_type,&
      38             :                                               optimizer_options_type
      39             :    USE cell_types,                      ONLY: cell_type
      40             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      41             :    USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
      42             :                                               cp_dbcsr_cholesky_invert,&
      43             :                                               cp_dbcsr_cholesky_restore
      44             :    USE cp_external_control,             ONLY: external_control
      45             :    USE cp_files,                        ONLY: close_file,&
      46             :                                               open_file
      47             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      48             :                                               cp_logger_get_default_unit_nr,&
      49             :                                               cp_logger_type,&
      50             :                                               cp_to_string
      51             :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      52             :                                               cp_print_key_unit_nr
      53             :    USE ct_methods,                      ONLY: analytic_line_search,&
      54             :                                               ct_step_execute,&
      55             :                                               diagonalize_diagonal_blocks
      56             :    USE ct_types,                        ONLY: ct_step_env_clean,&
      57             :                                               ct_step_env_get,&
      58             :                                               ct_step_env_init,&
      59             :                                               ct_step_env_set,&
      60             :                                               ct_step_env_type
      61             :    USE dbcsr_api,                       ONLY: &
      62             :         dbcsr_add, dbcsr_add_on_diag, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, &
      63             :         dbcsr_distribution_get, dbcsr_distribution_type, dbcsr_dot, dbcsr_filter, dbcsr_finalize, &
      64             :         dbcsr_frobenius_norm, dbcsr_func_dtanh, dbcsr_func_inverse, dbcsr_func_tanh, &
      65             :         dbcsr_function_of_elements, dbcsr_get_block_p, dbcsr_get_diag, dbcsr_get_info, &
      66             :         dbcsr_hadamard_product, dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, &
      67             :         dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
      68             :         dbcsr_nblkcols_total, dbcsr_nblkrows_total, dbcsr_norm, dbcsr_norm_maxabsnorm, &
      69             :         dbcsr_p_type, dbcsr_print_block_sum, dbcsr_release, dbcsr_reserve_block2d, dbcsr_scale, &
      70             :         dbcsr_set, dbcsr_set_diag, dbcsr_triu, dbcsr_type, dbcsr_type_no_symmetry, &
      71             :         dbcsr_work_create
      72             :    USE domain_submatrix_methods,        ONLY: add_submatrices,&
      73             :                                               construct_submatrices,&
      74             :                                               copy_submatrices,&
      75             :                                               init_submatrices,&
      76             :                                               maxnorm_submatrices,&
      77             :                                               release_submatrices
      78             :    USE domain_submatrix_types,          ONLY: domain_map_type,&
      79             :                                               domain_submatrix_type,&
      80             :                                               select_row
      81             :    USE input_constants,                 ONLY: &
      82             :         almo_scf_diag, almo_scf_dm_sign, cg_dai_yuan, cg_fletcher, cg_fletcher_reeves, &
      83             :         cg_hager_zhang, cg_hestenes_stiefel, cg_liu_storey, cg_polak_ribiere, cg_zero, &
      84             :         op_loc_berry, op_loc_pipek, trustr_cauchy, trustr_dogleg, virt_full, &
      85             :         xalmo_case_block_diag, xalmo_case_fully_deloc, xalmo_case_normal, xalmo_prec_domain, &
      86             :         xalmo_prec_full, xalmo_prec_zero
      87             :    USE input_section_types,             ONLY: section_vals_get_subs_vals,&
      88             :                                               section_vals_type
      89             :    USE iterate_matrix,                  ONLY: determinant,&
      90             :                                               invert_Hotelling,&
      91             :                                               matrix_sqrt_Newton_Schulz
      92             :    USE kinds,                           ONLY: dp
      93             :    USE machine,                         ONLY: m_flush,&
      94             :                                               m_walltime
      95             :    USE message_passing,                 ONLY: mp_comm_type,&
      96             :                                               mp_para_env_type
      97             :    USE particle_methods,                ONLY: get_particle_set
      98             :    USE particle_types,                  ONLY: particle_type
      99             :    USE qs_energy_types,                 ONLY: qs_energy_type
     100             :    USE qs_environment_types,            ONLY: get_qs_env,&
     101             :                                               qs_environment_type
     102             :    USE qs_kind_types,                   ONLY: qs_kind_type
     103             :    USE qs_loc_utils,                    ONLY: compute_berry_operator
     104             :    USE qs_localization_methods,         ONLY: initialize_weights
     105             : #include "./base/base_uses.f90"
     106             : 
     107             :    IMPLICIT NONE
     108             : 
     109             :    PRIVATE
     110             : 
     111             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'almo_scf_optimizer'
     112             : 
     113             :    PUBLIC :: almo_scf_block_diagonal, &
     114             :              almo_scf_xalmo_eigensolver, &
     115             :              almo_scf_xalmo_trustr, &
     116             :              almo_scf_xalmo_pcg, &
     117             :              almo_scf_construct_nlmos
     118             : 
     119             :    LOGICAL, PARAMETER :: debug_mode = .FALSE.
     120             :    LOGICAL, PARAMETER :: safe_mode = .FALSE.
     121             :    LOGICAL, PARAMETER :: almo_mathematica = .FALSE.
     122             :    INTEGER, PARAMETER :: hessian_path_reuse = 1, &
     123             :                          hessian_path_assemble = 2
     124             : 
     125             : CONTAINS
     126             : 
     127             : ! **************************************************************************************************
     128             : !> \brief An SCF procedure that optimizes block-diagonal ALMOs using DIIS
     129             : !> \param qs_env ...
     130             : !> \param almo_scf_env ...
     131             : !> \param optimizer ...
     132             : !> \par History
     133             : !>       2011.06 created [Rustam Z Khaliullin]
     134             : !>       2018.09 smearing support [Ruben Staub]
     135             : !> \author Rustam Z Khaliullin
     136             : ! **************************************************************************************************
     137          76 :    SUBROUTINE almo_scf_block_diagonal(qs_env, almo_scf_env, optimizer)
     138             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     139             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     140             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     141             : 
     142             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_block_diagonal'
     143             : 
     144             :       INTEGER                                            :: handle, iscf, ispin, nspin, unit_nr
     145          76 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: local_nocc_of_domain
     146             :       LOGICAL                                            :: converged, prepare_to_exit, should_stop, &
     147             :                                                             use_diis, use_prev_as_guess
     148             :       REAL(KIND=dp) :: density_rec, energy_diff, energy_new, energy_old, error_norm, &
     149             :          error_norm_ispin, kTS_sum, prev_error_norm, t1, t2, true_mixing_fraction
     150          76 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: local_mu
     151             :       TYPE(almo_scf_diis_type), ALLOCATABLE, &
     152          76 :          DIMENSION(:)                                    :: almo_diis
     153             :       TYPE(cp_logger_type), POINTER                      :: logger
     154          76 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: matrix_mixing_old_blk
     155             :       TYPE(qs_energy_type), POINTER                      :: qs_energy
     156             : 
     157          76 :       CALL timeset(routineN, handle)
     158             : 
     159             :       ! get a useful output_unit
     160          76 :       logger => cp_get_default_logger()
     161          76 :       IF (logger%para_env%is_source()) THEN
     162          38 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     163             :       ELSE
     164             :          unit_nr = -1
     165             :       END IF
     166             : 
     167             :       ! use DIIS, it's superior to simple mixing
     168          76 :       use_diis = .TRUE.
     169          76 :       use_prev_as_guess = .FALSE.
     170             : 
     171          76 :       nspin = almo_scf_env%nspins
     172         228 :       ALLOCATE (local_mu(almo_scf_env%ndomains))
     173         228 :       ALLOCATE (local_nocc_of_domain(almo_scf_env%ndomains))
     174             : 
     175             :       ! init mixing matrices
     176         304 :       ALLOCATE (matrix_mixing_old_blk(nspin))
     177         304 :       ALLOCATE (almo_diis(nspin))
     178         152 :       DO ispin = 1, nspin
     179             :          CALL dbcsr_create(matrix_mixing_old_blk(ispin), &
     180          76 :                            template=almo_scf_env%matrix_ks_blk(ispin))
     181             :          CALL almo_scf_diis_init(diis_env=almo_diis(ispin), &
     182             :                                  sample_err=almo_scf_env%matrix_ks_blk(ispin), &
     183             :                                  sample_var=almo_scf_env%matrix_s_blk(1), &
     184             :                                  error_type=1, &
     185         152 :                                  max_length=optimizer%ndiis)
     186             :       END DO
     187             : 
     188          76 :       CALL get_qs_env(qs_env, energy=qs_energy)
     189          76 :       energy_old = qs_energy%total
     190             : 
     191          76 :       iscf = 0
     192          76 :       prepare_to_exit = .FALSE.
     193          76 :       true_mixing_fraction = 0.0_dp
     194          76 :       error_norm = 1.0E+10_dp ! arbitrary big step
     195             : 
     196          76 :       IF (unit_nr > 0) THEN
     197          38 :          WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     198          76 :             " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
     199          38 :          WRITE (unit_nr, *)
     200          38 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
     201          76 :             "Total Energy", "Change", "Convergence", "Time"
     202          38 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
     203             :       END IF
     204             : 
     205             :       ! the real SCF loop
     206          76 :       t1 = m_walltime()
     207         424 :       DO
     208             : 
     209         424 :          iscf = iscf + 1
     210             : 
     211             :          ! obtain projected KS matrix and the DIIS-error vector
     212         424 :          CALL almo_scf_ks_to_ks_blk(almo_scf_env)
     213             : 
     214             :          ! inform the DIIS handler about the new KS matrix and its error vector
     215             :          IF (use_diis) THEN
     216         848 :             DO ispin = 1, nspin
     217             :                CALL almo_scf_diis_push(diis_env=almo_diis(ispin), &
     218             :                                        var=almo_scf_env%matrix_ks_blk(ispin), &
     219         848 :                                        err=almo_scf_env%matrix_err_blk(ispin))
     220             :             END DO
     221             :          END IF
     222             : 
     223             :          ! get error_norm: choose the largest of the two spins
     224         848 :          prev_error_norm = error_norm
     225         848 :          DO ispin = 1, nspin
     226             :             !error_norm=dbcsr_frobenius_norm(almo_scf_env%matrix_err_blk(ispin))
     227             :             CALL dbcsr_norm(almo_scf_env%matrix_err_blk(ispin), &
     228             :                             dbcsr_norm_maxabsnorm, &
     229         424 :                             norm_scalar=error_norm_ispin)
     230         424 :             IF (ispin .EQ. 1) error_norm = error_norm_ispin
     231         424 :             IF (ispin .GT. 1 .AND. error_norm_ispin .GT. error_norm) &
     232         424 :                error_norm = error_norm_ispin
     233             :          END DO
     234             : 
     235         424 :          IF (error_norm .LT. almo_scf_env%eps_prev_guess) THEN
     236           0 :             use_prev_as_guess = .TRUE.
     237             :          ELSE
     238         424 :             use_prev_as_guess = .FALSE.
     239             :          END IF
     240             : 
     241             :          ! check convergence
     242         424 :          converged = .TRUE.
     243         424 :          IF (error_norm .GT. optimizer%eps_error) converged = .FALSE.
     244             : 
     245             :          ! check other exit criteria: max SCF steps and timing
     246             :          CALL external_control(should_stop, "SCF", &
     247             :                                start_time=qs_env%start_time, &
     248         424 :                                target_time=qs_env%target_time)
     249         424 :          IF (should_stop .OR. iscf >= optimizer%max_iter .OR. converged) THEN
     250          76 :             prepare_to_exit = .TRUE.
     251          76 :             IF (iscf == 1) energy_new = energy_old
     252             :          END IF
     253             : 
     254             :          ! if early stopping is on do at least one iteration
     255         424 :          IF (optimizer%early_stopping_on .AND. iscf .EQ. 1) &
     256             :             prepare_to_exit = .FALSE.
     257             : 
     258         424 :          IF (.NOT. prepare_to_exit) THEN ! update the ALMOs and density matrix
     259             : 
     260             :             ! perform mixing of KS matrices
     261         348 :             IF (iscf .NE. 1) THEN
     262             :                IF (use_diis) THEN ! use diis instead of mixing
     263         544 :                   DO ispin = 1, nspin
     264             :                      CALL almo_scf_diis_extrapolate(diis_env=almo_diis(ispin), &
     265         544 :                                                     extr_var=almo_scf_env%matrix_ks_blk(ispin))
     266             :                   END DO
     267             :                ELSE ! use mixing
     268             :                   true_mixing_fraction = almo_scf_env%mixing_fraction
     269             :                   DO ispin = 1, nspin
     270             :                      CALL dbcsr_add(almo_scf_env%matrix_ks_blk(ispin), &
     271             :                                     matrix_mixing_old_blk(ispin), &
     272             :                                     true_mixing_fraction, &
     273             :                                     1.0_dp - true_mixing_fraction)
     274             :                   END DO
     275             :                END IF
     276             :             END IF
     277             :             ! save the new matrix for the future mixing
     278         696 :             DO ispin = 1, nspin
     279             :                CALL dbcsr_copy(matrix_mixing_old_blk(ispin), &
     280         696 :                                almo_scf_env%matrix_ks_blk(ispin))
     281             :             END DO
     282             : 
     283             :             ! obtain ALMOs from the new KS matrix
     284         696 :             SELECT CASE (almo_scf_env%almo_update_algorithm)
     285             :             CASE (almo_scf_diag)
     286             : 
     287         348 :                CALL almo_scf_ks_blk_to_tv_blk(almo_scf_env)
     288             : 
     289             :             CASE (almo_scf_dm_sign)
     290             : 
     291             :                ! update the density matrix
     292           0 :                DO ispin = 1, nspin
     293             : 
     294           0 :                   local_nocc_of_domain(:) = almo_scf_env%nocc_of_domain(:, ispin)
     295           0 :                   local_mu(:) = almo_scf_env%mu_of_domain(:, ispin)
     296             :                   ! RZK UPDATE! the update algorithm is removed because
     297             :                   ! RZK UPDATE! it requires updating core LS_SCF routines
     298             :                   ! RZK UPDATE! (the code exists in the CVS version)
     299           0 :                   CPABORT("Density_matrix_sign has not been tested yet")
     300             :                   ! RZK UPDATE!  CALL density_matrix_sign(almo_scf_env%matrix_p_blk(ispin),&
     301             :                   ! RZK UPDATE!          local_mu,&
     302             :                   ! RZK UPDATE!          almo_scf_env%fixed_mu,&
     303             :                   ! RZK UPDATE!          almo_scf_env%matrix_ks_blk(ispin),&
     304             :                   ! RZK UPDATE!          !matrix_mixing_old_blk(ispin),&
     305             :                   ! RZK UPDATE!          almo_scf_env%matrix_s_blk(1), &
     306             :                   ! RZK UPDATE!          almo_scf_env%matrix_s_blk_inv(1), &
     307             :                   ! RZK UPDATE!          local_nocc_of_domain,&
     308             :                   ! RZK UPDATE!          almo_scf_env%eps_filter,&
     309             :                   ! RZK UPDATE!          almo_scf_env%domain_index_of_ao)
     310             :                   ! RZK UPDATE!
     311           0 :                   almo_scf_env%mu_of_domain(:, ispin) = local_mu(:)
     312             : 
     313             :                END DO
     314             : 
     315             :                ! obtain ALMOs from matrix_p_blk: T_new = P_blk S_blk T_old
     316           0 :                CALL almo_scf_p_blk_to_t_blk(almo_scf_env, ionic=.FALSE.)
     317             : 
     318         348 :                DO ispin = 1, almo_scf_env%nspins
     319             : 
     320             :                   CALL orthogonalize_mos(ket=almo_scf_env%matrix_t_blk(ispin), &
     321             :                                          overlap=almo_scf_env%matrix_sigma_blk(ispin), &
     322             :                                          metric=almo_scf_env%matrix_s_blk(1), &
     323             :                                          retain_locality=.TRUE., &
     324             :                                          only_normalize=.FALSE., &
     325             :                                          nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     326             :                                          eps_filter=almo_scf_env%eps_filter, &
     327             :                                          order_lanczos=almo_scf_env%order_lanczos, &
     328             :                                          eps_lanczos=almo_scf_env%eps_lanczos, &
     329           0 :                                          max_iter_lanczos=almo_scf_env%max_iter_lanczos)
     330             : 
     331             :                END DO
     332             : 
     333             :             END SELECT
     334             : 
     335             :             ! obtain density matrix from ALMOs
     336         696 :             DO ispin = 1, almo_scf_env%nspins
     337             : 
     338             :                !! Application of an occupation-rescaling trick for smearing, if requested
     339         348 :                IF (almo_scf_env%smear) THEN
     340             :                   CALL almo_scf_t_rescaling(matrix_t=almo_scf_env%matrix_t_blk(ispin), &
     341             :                                             mo_energies=almo_scf_env%mo_energies(:, ispin), &
     342             :                                             mu_of_domain=almo_scf_env%mu_of_domain(:, ispin), &
     343             :                                             real_ne_of_domain=almo_scf_env%real_ne_of_domain(:, ispin), &
     344             :                                             spin_kTS=almo_scf_env%kTS(ispin), &
     345             :                                             smear_e_temp=almo_scf_env%smear_e_temp, &
     346             :                                             ndomains=almo_scf_env%ndomains, &
     347          16 :                                             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin))
     348             :                END IF
     349             : 
     350             :                CALL almo_scf_t_to_proj(t=almo_scf_env%matrix_t_blk(ispin), &
     351             :                                        p=almo_scf_env%matrix_p(ispin), &
     352             :                                        eps_filter=almo_scf_env%eps_filter, &
     353             :                                        orthog_orbs=.FALSE., &
     354             :                                        nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     355             :                                        s=almo_scf_env%matrix_s(1), &
     356             :                                        sigma=almo_scf_env%matrix_sigma(ispin), &
     357             :                                        sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
     358             :                                        use_guess=use_prev_as_guess, &
     359             :                                        smear=almo_scf_env%smear, &
     360             :                                        algorithm=almo_scf_env%sigma_inv_algorithm, &
     361             :                                        inverse_accelerator=almo_scf_env%order_lanczos, &
     362             :                                        inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
     363             :                                        eps_lanczos=almo_scf_env%eps_lanczos, &
     364             :                                        max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
     365             :                                        para_env=almo_scf_env%para_env, &
     366         696 :                                        blacs_env=almo_scf_env%blacs_env)
     367             : 
     368             :             END DO
     369             : 
     370         348 :             IF (almo_scf_env%nspins == 1) THEN
     371         348 :                CALL dbcsr_scale(almo_scf_env%matrix_p(1), 2.0_dp)
     372             :                !! Rescaling electronic entropy contribution by spin_factor
     373         348 :                IF (almo_scf_env%smear) THEN
     374          16 :                   almo_scf_env%kTS(1) = almo_scf_env%kTS(1)*2.0_dp
     375             :                END IF
     376             :             END IF
     377             : 
     378         348 :             IF (almo_scf_env%smear) THEN
     379          32 :                kTS_sum = SUM(almo_scf_env%kTS)
     380             :             ELSE
     381         332 :                kTS_sum = 0.0_dp
     382             :             END IF
     383             : 
     384             :             ! compute the new KS matrix and new energy
     385             :             CALL almo_dm_to_almo_ks(qs_env, &
     386             :                                     almo_scf_env%matrix_p, &
     387             :                                     almo_scf_env%matrix_ks, &
     388             :                                     energy_new, &
     389             :                                     almo_scf_env%eps_filter, &
     390             :                                     almo_scf_env%mat_distr_aos, &
     391             :                                     smear=almo_scf_env%smear, &
     392         348 :                                     kTS_sum=kTS_sum)
     393             : 
     394             :          END IF ! prepare_to_exit
     395             : 
     396         424 :          energy_diff = energy_new - energy_old
     397         424 :          energy_old = energy_new
     398         424 :          almo_scf_env%almo_scf_energy = energy_new
     399             : 
     400         424 :          t2 = m_walltime()
     401             :          ! brief report on the current SCF loop
     402         424 :          IF (unit_nr > 0) THEN
     403         212 :             WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') "ALMO SCF DIIS", &
     404         212 :                iscf, &
     405         424 :                energy_new, energy_diff, error_norm, t2 - t1
     406             :          END IF
     407         424 :          t1 = m_walltime()
     408             : 
     409         424 :          IF (prepare_to_exit) EXIT
     410             : 
     411             :       END DO ! end scf cycle
     412             : 
     413             :       !! Print number of electrons recovered if smearing was requested
     414          76 :       IF (almo_scf_env%smear) THEN
     415           8 :          DO ispin = 1, nspin
     416           4 :             CALL dbcsr_dot(almo_scf_env%matrix_p(ispin), almo_scf_env%matrix_s(1), density_rec)
     417           8 :             IF (unit_nr > 0) THEN
     418           2 :                WRITE (unit_nr, '(T2,A20,F23.10)') "Electrons recovered:", density_rec
     419             :             END IF
     420             :          END DO
     421             :       END IF
     422             : 
     423          76 :       IF (.NOT. converged .AND. (.NOT. optimizer%early_stopping_on)) THEN
     424           0 :          IF (unit_nr > 0) THEN
     425           0 :             CPABORT("SCF for block-diagonal ALMOs not converged!")
     426             :          END IF
     427             :       END IF
     428             : 
     429         152 :       DO ispin = 1, nspin
     430          76 :          CALL dbcsr_release(matrix_mixing_old_blk(ispin))
     431         152 :          CALL almo_scf_diis_release(diis_env=almo_diis(ispin))
     432             :       END DO
     433         152 :       DEALLOCATE (almo_diis)
     434          76 :       DEALLOCATE (matrix_mixing_old_blk)
     435          76 :       DEALLOCATE (local_mu)
     436          76 :       DEALLOCATE (local_nocc_of_domain)
     437             : 
     438          76 :       CALL timestop(handle)
     439             : 
     440          76 :    END SUBROUTINE almo_scf_block_diagonal
     441             : 
     442             : ! **************************************************************************************************
     443             : !> \brief An eigensolver-based SCF to optimize extended ALMOs (i.e. ALMOs on
     444             : !>        overlapping domains)
     445             : !> \param qs_env ...
     446             : !> \param almo_scf_env ...
     447             : !> \param optimizer ...
     448             : !> \par History
     449             : !>       2013.03 created [Rustam Z Khaliullin]
     450             : !>       2018.09 smearing support [Ruben Staub]
     451             : !> \author Rustam Z Khaliullin
     452             : ! **************************************************************************************************
     453           2 :    SUBROUTINE almo_scf_xalmo_eigensolver(qs_env, almo_scf_env, optimizer)
     454             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     455             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     456             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     457             : 
     458             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_eigensolver'
     459             : 
     460             :       INTEGER                                            :: handle, iscf, ispin, nspin, unit_nr
     461             :       LOGICAL                                            :: converged, prepare_to_exit, should_stop
     462             :       REAL(KIND=dp) :: denergy_tot, density_rec, energy_diff, energy_new, energy_old, error_norm, &
     463             :          error_norm_0, kTS_sum, spin_factor, t1, t2
     464             :       REAL(KIND=dp), DIMENSION(2)                        :: denergy_spin
     465             :       TYPE(almo_scf_diis_type), ALLOCATABLE, &
     466           2 :          DIMENSION(:)                                    :: almo_diis
     467             :       TYPE(cp_logger_type), POINTER                      :: logger
     468             :       TYPE(dbcsr_type)                                   :: matrix_p_almo_scf_converged
     469             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
     470           2 :          DIMENSION(:, :)                                 :: submatrix_mixing_old_blk
     471             : 
     472           2 :       CALL timeset(routineN, handle)
     473             : 
     474             :       ! get a useful output_unit
     475           2 :       logger => cp_get_default_logger()
     476           2 :       IF (logger%para_env%is_source()) THEN
     477           1 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     478             :       ELSE
     479           1 :          unit_nr = -1
     480             :       END IF
     481             : 
     482           2 :       nspin = almo_scf_env%nspins
     483           2 :       IF (nspin == 1) THEN
     484           2 :          spin_factor = 2.0_dp
     485             :       ELSE
     486           0 :          spin_factor = 1.0_dp
     487             :       END IF
     488             : 
     489             :       ! RZK-warning domain_s_sqrt and domain_s_sqrt_inv do not have spin
     490             :       ! components yet (may be used later)
     491           2 :       ispin = 1
     492             :       CALL construct_domain_s_sqrt( &
     493             :          matrix_s=almo_scf_env%matrix_s(1), &
     494             :          subm_s_sqrt=almo_scf_env%domain_s_sqrt(:, ispin), &
     495             :          subm_s_sqrt_inv=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
     496             :          dpattern=almo_scf_env%quench_t(ispin), &
     497             :          map=almo_scf_env%domain_map(ispin), &
     498           2 :          node_of_domain=almo_scf_env%cpu_of_domain)
     499             :       ! TRY: construct s_inv
     500             :       !CALL construct_domain_s_inv(&
     501             :       !       matrix_s=almo_scf_env%matrix_s(1),&
     502             :       !       subm_s_inv=almo_scf_env%domain_s_inv(:,ispin),&
     503             :       !       dpattern=almo_scf_env%quench_t(ispin),&
     504             :       !       map=almo_scf_env%domain_map(ispin),&
     505             :       !       node_of_domain=almo_scf_env%cpu_of_domain)
     506             : 
     507             :       ! construct the domain template for the occupied orbitals
     508           4 :       DO ispin = 1, nspin
     509             :          ! RZK-warning we need only the matrix structure, not data
     510             :          ! replace construct_submatrices with lighter procedure with
     511             :          ! no heavy communications
     512             :          CALL construct_submatrices( &
     513             :             matrix=almo_scf_env%quench_t(ispin), &
     514             :             submatrix=almo_scf_env%domain_t(:, ispin), &
     515             :             distr_pattern=almo_scf_env%quench_t(ispin), &
     516             :             domain_map=almo_scf_env%domain_map(ispin), &
     517             :             node_of_domain=almo_scf_env%cpu_of_domain, &
     518           4 :             job_type=select_row)
     519             :       END DO
     520             : 
     521             :       ! init mixing matrices
     522          20 :       ALLOCATE (submatrix_mixing_old_blk(almo_scf_env%ndomains, nspin))
     523           2 :       CALL init_submatrices(submatrix_mixing_old_blk)
     524           8 :       ALLOCATE (almo_diis(nspin))
     525             : 
     526             :       ! TRY: construct block-projector
     527             :       !ALLOCATE(submatrix_tmp(almo_scf_env%ndomains))
     528             :       !DO ispin=1,nspin
     529             :       !   CALL init_submatrices(submatrix_tmp)
     530             :       !   CALL construct_domain_r_down(&
     531             :       !           matrix_t=almo_scf_env%matrix_t_blk(ispin),&
     532             :       !           matrix_sigma_inv=almo_scf_env%matrix_sigma_inv(ispin),&
     533             :       !           matrix_s=almo_scf_env%matrix_s(1),&
     534             :       !           subm_r_down=submatrix_tmp(:),&
     535             :       !           dpattern=almo_scf_env%quench_t(ispin),&
     536             :       !           map=almo_scf_env%domain_map(ispin),&
     537             :       !           node_of_domain=almo_scf_env%cpu_of_domain,&
     538             :       !           filter_eps=almo_scf_env%eps_filter)
     539             :       !   CALL multiply_submatrices('N','N',1.0_dp,&
     540             :       !           submatrix_tmp(:),&
     541             :       !           almo_scf_env%domain_s_inv(:,1),0.0_dp,&
     542             :       !           almo_scf_env%domain_r_down_up(:,ispin))
     543             :       !   CALL release_submatrices(submatrix_tmp)
     544             :       !ENDDO
     545             :       !DEALLOCATE(submatrix_tmp)
     546             : 
     547           4 :       DO ispin = 1, nspin
     548             :          ! use s_sqrt since they are already properly constructed
     549             :          ! and have the same distributions as domain_err and domain_ks_xx
     550             :          CALL almo_scf_diis_init(diis_env=almo_diis(ispin), &
     551             :                                  sample_err=almo_scf_env%domain_s_sqrt(:, ispin), &
     552             :                                  error_type=1, &
     553           4 :                                  max_length=optimizer%ndiis)
     554             :       END DO
     555             : 
     556           2 :       denergy_tot = 0.0_dp
     557           2 :       energy_old = 0.0_dp
     558           2 :       iscf = 0
     559           2 :       prepare_to_exit = .FALSE.
     560             : 
     561             :       ! the SCF loop
     562           2 :       t1 = m_walltime()
     563           2 :       DO
     564             : 
     565           2 :          iscf = iscf + 1
     566             : 
     567             :          ! obtain projected KS matrix and the DIIS-error vector
     568           2 :          CALL almo_scf_ks_to_ks_xx(almo_scf_env)
     569             : 
     570             :          ! inform the DIIS handler about the new KS matrix and its error vector
     571           4 :          DO ispin = 1, nspin
     572             :             CALL almo_scf_diis_push(diis_env=almo_diis(ispin), &
     573             :                                     d_var=almo_scf_env%domain_ks_xx(:, ispin), &
     574           4 :                                     d_err=almo_scf_env%domain_err(:, ispin))
     575             :          END DO
     576             : 
     577             :          ! check convergence
     578           2 :          converged = .TRUE.
     579           2 :          DO ispin = 1, nspin
     580             :             !error_norm=dbcsr_frobenius_norm(almo_scf_env%matrix_err_blk(ispin))
     581             :             CALL dbcsr_norm(almo_scf_env%matrix_err_xx(ispin), &
     582             :                             dbcsr_norm_maxabsnorm, &
     583           2 :                             norm_scalar=error_norm)
     584             :             CALL maxnorm_submatrices(almo_scf_env%domain_err(:, ispin), &
     585           2 :                                      norm=error_norm_0)
     586           2 :             IF (error_norm .GT. optimizer%eps_error) THEN
     587             :                converged = .FALSE.
     588             :                EXIT ! no need to check the other spin
     589             :             END IF
     590             :          END DO
     591             :          ! check other exit criteria: max SCF steps and timing
     592             :          CALL external_control(should_stop, "SCF", &
     593             :                                start_time=qs_env%start_time, &
     594           2 :                                target_time=qs_env%target_time)
     595           2 :          IF (should_stop .OR. iscf >= optimizer%max_iter .OR. converged) THEN
     596           0 :             prepare_to_exit = .TRUE.
     597             :          END IF
     598             : 
     599             :          ! if early stopping is on do at least one iteration
     600           2 :          IF (optimizer%early_stopping_on .AND. iscf .EQ. 1) &
     601             :             prepare_to_exit = .FALSE.
     602             : 
     603           2 :          IF (.NOT. prepare_to_exit) THEN ! update the ALMOs and density matrix
     604             : 
     605             :             ! perform mixing of KS matrices
     606           2 :             IF (iscf .NE. 1) THEN
     607             :                IF (.FALSE.) THEN ! use diis instead of mixing
     608             :                   DO ispin = 1, nspin
     609             :                      CALL add_submatrices( &
     610             :                         almo_scf_env%mixing_fraction, &
     611             :                         almo_scf_env%domain_ks_xx(:, ispin), &
     612             :                         1.0_dp - almo_scf_env%mixing_fraction, &
     613             :                         submatrix_mixing_old_blk(:, ispin), &
     614             :                         'N')
     615             :                   END DO
     616             :                ELSE
     617           0 :                   DO ispin = 1, nspin
     618             :                      CALL almo_scf_diis_extrapolate(diis_env=almo_diis(ispin), &
     619           0 :                                                     d_extr_var=almo_scf_env%domain_ks_xx(:, ispin))
     620             :                   END DO
     621             :                END IF
     622             :             END IF
     623             :             ! save the new matrix for the future mixing
     624           4 :             DO ispin = 1, nspin
     625             :                CALL copy_submatrices( &
     626             :                   almo_scf_env%domain_ks_xx(:, ispin), &
     627             :                   submatrix_mixing_old_blk(:, ispin), &
     628           4 :                   copy_data=.TRUE.)
     629             :             END DO
     630             : 
     631             :             ! obtain a new set of ALMOs from the updated KS matrix
     632           2 :             CALL almo_scf_ks_xx_to_tv_xx(almo_scf_env)
     633             : 
     634             :             ! update the density matrix
     635           4 :             DO ispin = 1, nspin
     636             : 
     637             :                ! save the initial density matrix (to get the perturbative energy lowering)
     638           2 :                IF (iscf .EQ. 1) THEN
     639             :                   CALL dbcsr_create(matrix_p_almo_scf_converged, &
     640           2 :                                     template=almo_scf_env%matrix_p(ispin))
     641             :                   CALL dbcsr_copy(matrix_p_almo_scf_converged, &
     642           2 :                                   almo_scf_env%matrix_p(ispin))
     643             :                END IF
     644             : 
     645             :                !! Application of an occupation-rescaling trick for smearing, if requested
     646           2 :                IF (almo_scf_env%smear) THEN
     647             :                   CALL almo_scf_t_rescaling(matrix_t=almo_scf_env%matrix_t_blk(ispin), &
     648             :                                             mo_energies=almo_scf_env%mo_energies(:, ispin), &
     649             :                                             mu_of_domain=almo_scf_env%mu_of_domain(:, ispin), &
     650             :                                             real_ne_of_domain=almo_scf_env%real_ne_of_domain(:, ispin), &
     651             :                                             spin_kTS=almo_scf_env%kTS(ispin), &
     652             :                                             smear_e_temp=almo_scf_env%smear_e_temp, &
     653             :                                             ndomains=almo_scf_env%ndomains, &
     654           0 :                                             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin))
     655             :                END IF
     656             : 
     657             :                ! update now
     658             :                CALL almo_scf_t_to_proj( &
     659             :                   t=almo_scf_env%matrix_t(ispin), &
     660             :                   p=almo_scf_env%matrix_p(ispin), &
     661             :                   eps_filter=almo_scf_env%eps_filter, &
     662             :                   orthog_orbs=.FALSE., &
     663             :                   nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     664             :                   s=almo_scf_env%matrix_s(1), &
     665             :                   sigma=almo_scf_env%matrix_sigma(ispin), &
     666             :                   sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
     667             :                   use_guess=.TRUE., &
     668             :                   smear=almo_scf_env%smear, &
     669             :                   algorithm=almo_scf_env%sigma_inv_algorithm, &
     670             :                   inverse_accelerator=almo_scf_env%order_lanczos, &
     671             :                   inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
     672             :                   eps_lanczos=almo_scf_env%eps_lanczos, &
     673             :                   max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
     674             :                   para_env=almo_scf_env%para_env, &
     675           2 :                   blacs_env=almo_scf_env%blacs_env)
     676           2 :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), spin_factor)
     677             :                !! Rescaling electronic entropy contribution by spin_factor
     678           2 :                IF (almo_scf_env%smear) THEN
     679           0 :                   almo_scf_env%kTS(ispin) = almo_scf_env%kTS(ispin)*spin_factor
     680             :                END IF
     681             : 
     682             :                ! obtain perturbative estimate (at no additional cost)
     683             :                ! of the energy lowering relative to the block-diagonal ALMOs
     684           4 :                IF (iscf .EQ. 1) THEN
     685             : 
     686             :                   CALL dbcsr_add(matrix_p_almo_scf_converged, &
     687           2 :                                  almo_scf_env%matrix_p(ispin), -1.0_dp, 1.0_dp)
     688             :                   CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin), &
     689             :                                  matrix_p_almo_scf_converged, &
     690           2 :                                  denergy_spin(ispin))
     691             : 
     692           2 :                   CALL dbcsr_release(matrix_p_almo_scf_converged)
     693             : 
     694             :                   !! RS-WARNING: If smearing ALMO is requested, electronic entropy contribution should probably be included here
     695             : 
     696           2 :                   denergy_tot = denergy_tot + denergy_spin(ispin)
     697             : 
     698             :                   ! RZK-warning Energy correction can be evaluated using matrix_x
     699             :                   ! as shown in the attempt below and in the PCG procedure.
     700             :                   ! Using matrix_x allows immediate decomposition of the energy
     701             :                   ! lowering into 2-body components for EDA. However, it does not
     702             :                   ! work here because the diagonalization routine does not necessarily
     703             :                   ! produce orbitals with the same sign as the block-diagonal ALMOs
     704             :                   ! Any fixes?!
     705             : 
     706             :                   !CALL dbcsr_init(matrix_x)
     707             :                   !CALL dbcsr_create(matrix_x,&
     708             :                   !        template=almo_scf_env%matrix_t(ispin))
     709             :                   !
     710             :                   !CALL dbcsr_init(matrix_tmp_no)
     711             :                   !CALL dbcsr_create(matrix_tmp_no,&
     712             :                   !        template=almo_scf_env%matrix_t(ispin))
     713             :                   !
     714             :                   !CALL dbcsr_copy(matrix_x,&
     715             :                   !        almo_scf_env%matrix_t_blk(ispin))
     716             :                   !CALL dbcsr_add(matrix_x,almo_scf_env%matrix_t(ispin),&
     717             :                   !        -1.0_dp,1.0_dp)
     718             : 
     719             :                   !CALL dbcsr_dot(matrix_x, almo_scf_env%matrix_err_xx(ispin),denergy)
     720             : 
     721             :                   !denergy=denergy*spin_factor
     722             : 
     723             :                   !IF (unit_nr>0) THEN
     724             :                   !   WRITE(unit_nr,*) "_ENERGY-0: ", almo_scf_env%almo_scf_energy
     725             :                   !   WRITE(unit_nr,*) "_ENERGY-D: ", denergy
     726             :                   !   WRITE(unit_nr,*) "_ENERGY-F: ", almo_scf_env%almo_scf_energy+denergy
     727             :                   !ENDIF
     728             :                   !! RZK-warning update will not work since the energy is overwritten almost immediately
     729             :                   !!CALL almo_scf_update_ks_energy(qs_env,&
     730             :                   !!        almo_scf_env%almo_scf_energy+denergy)
     731             :                   !!
     732             : 
     733             :                   !! print out the results of the decomposition analysis
     734             :                   !CALL dbcsr_hadamard_product(matrix_x,&
     735             :                   !        almo_scf_env%matrix_err_xx(ispin),&
     736             :                   !        matrix_tmp_no)
     737             :                   !CALL dbcsr_scale(matrix_tmp_no,spin_factor)
     738             :                   !CALL dbcsr_filter(matrix_tmp_no,almo_scf_env%eps_filter)
     739             :                   !
     740             :                   !IF (unit_nr>0) THEN
     741             :                   !   WRITE(unit_nr,*)
     742             :                   !   WRITE(unit_nr,'(T2,A)') "DECOMPOSITION OF THE DELOCALIZATION ENERGY"
     743             :                   !ENDIF
     744             : 
     745             :                   !mynode=dbcsr_mp_mynode(dbcsr_distribution_mp(&
     746             :                   !   dbcsr_distribution(matrix_tmp_no)))
     747             :                   !WRITE(mynodestr,'(I6.6)') mynode
     748             :                   !mylogfile='EDA.'//TRIM(ADJUSTL(mynodestr))
     749             :                   !OPEN (iunit,file=mylogfile,status='REPLACE')
     750             :                   !CALL dbcsr_print_block_sum(matrix_tmp_no,iunit)
     751             :                   !CLOSE(iunit)
     752             :                   !
     753             :                   !CALL dbcsr_release(matrix_tmp_no)
     754             :                   !CALL dbcsr_release(matrix_x)
     755             : 
     756             :                END IF ! iscf.eq.1
     757             : 
     758             :             END DO
     759             : 
     760             :             ! print out the energy lowering
     761           2 :             IF (iscf .EQ. 1) THEN
     762             :                CALL energy_lowering_report( &
     763             :                   unit_nr=unit_nr, &
     764             :                   ref_energy=almo_scf_env%almo_scf_energy, &
     765           2 :                   energy_lowering=denergy_tot)
     766             :                CALL almo_scf_update_ks_energy(qs_env, &
     767             :                                               energy=almo_scf_env%almo_scf_energy, &
     768           2 :                                               energy_singles_corr=denergy_tot)
     769             :             END IF
     770             : 
     771             :             ! compute the new KS matrix and new energy
     772           2 :             IF (.NOT. almo_scf_env%perturbative_delocalization) THEN
     773             : 
     774           0 :                IF (almo_scf_env%smear) THEN
     775           0 :                   kTS_sum = SUM(almo_scf_env%kTS)
     776             :                ELSE
     777           0 :                   kTS_sum = 0.0_dp
     778             :                END IF
     779             : 
     780             :                CALL almo_dm_to_almo_ks(qs_env, &
     781             :                                        almo_scf_env%matrix_p, &
     782             :                                        almo_scf_env%matrix_ks, &
     783             :                                        energy_new, &
     784             :                                        almo_scf_env%eps_filter, &
     785             :                                        almo_scf_env%mat_distr_aos, &
     786             :                                        smear=almo_scf_env%smear, &
     787           0 :                                        kTS_sum=kTS_sum)
     788             :             END IF
     789             : 
     790             :          END IF ! prepare_to_exit
     791             : 
     792           2 :          IF (almo_scf_env%perturbative_delocalization) THEN
     793             : 
     794             :             ! exit after the first step if we do not need the SCF procedure
     795           2 :             CALL almo_dm_to_qs_env(qs_env, almo_scf_env%matrix_p, almo_scf_env%mat_distr_aos)
     796           2 :             converged = .TRUE.
     797           2 :             prepare_to_exit = .TRUE.
     798             : 
     799             :          ELSE ! not a perturbative treatment
     800             : 
     801           0 :             energy_diff = energy_new - energy_old
     802           0 :             energy_old = energy_new
     803           0 :             almo_scf_env%almo_scf_energy = energy_new
     804             : 
     805           0 :             t2 = m_walltime()
     806             :             ! brief report on the current SCF loop
     807           0 :             IF (unit_nr > 0) THEN
     808           0 :                WRITE (unit_nr, '(T2,A,I6,F20.9,E11.3,E11.3,E11.3,F8.2)') "ALMO SCF", &
     809           0 :                   iscf, &
     810           0 :                   energy_new, energy_diff, error_norm, error_norm_0, t2 - t1
     811             :             END IF
     812           0 :             t1 = m_walltime()
     813             : 
     814             :          END IF
     815             : 
     816           2 :          IF (prepare_to_exit) EXIT
     817             : 
     818             :       END DO ! end scf cycle
     819             : 
     820             :       !! Print number of electrons recovered if smearing was requested
     821           2 :       IF (almo_scf_env%smear) THEN
     822           0 :          DO ispin = 1, nspin
     823           0 :             CALL dbcsr_dot(almo_scf_env%matrix_p(ispin), almo_scf_env%matrix_s(1), density_rec)
     824           0 :             IF (unit_nr > 0) THEN
     825           0 :                WRITE (unit_nr, '(T2,A20,F23.10)') "Electrons recovered:", density_rec
     826             :             END IF
     827             :          END DO
     828             :       END IF
     829             : 
     830           2 :       IF (.NOT. converged .AND. .NOT. optimizer%early_stopping_on) THEN
     831           0 :          CPABORT("SCF for ALMOs on overlapping domains not converged! ")
     832             :       END IF
     833             : 
     834           4 :       DO ispin = 1, nspin
     835           2 :          CALL release_submatrices(submatrix_mixing_old_blk(:, ispin))
     836           4 :          CALL almo_scf_diis_release(diis_env=almo_diis(ispin))
     837             :       END DO
     838           4 :       DEALLOCATE (almo_diis)
     839          12 :       DEALLOCATE (submatrix_mixing_old_blk)
     840             : 
     841           2 :       CALL timestop(handle)
     842             : 
     843           2 :    END SUBROUTINE almo_scf_xalmo_eigensolver
     844             : 
     845             : ! **************************************************************************************************
     846             : !> \brief Optimization of ALMOs using PCG-like minimizers
     847             : !> \param qs_env ...
     848             : !> \param almo_scf_env ...
     849             : !> \param optimizer   controls the optimization algorithm
     850             : !> \param quench_t ...
     851             : !> \param matrix_t_in ...
     852             : !> \param matrix_t_out ...
     853             : !> \param assume_t0_q0x - since it is extremely difficult to converge the iterative
     854             : !>                        procedure using T as an optimized variable, assume
     855             : !>                        T = T_0 + (1-R_0)*X and optimize X
     856             : !>                        T_0 is assumed to be the zero-delocalization reference
     857             : !> \param perturbation_only - perturbative (do not update Hamiltonian)
     858             : !> \param special_case   to reduce the overhead special cases are implemented:
     859             : !>                       xalmo_case_normal - no special case (i.e. xALMOs)
     860             : !>                       xalmo_case_block_diag
     861             : !>                       xalmo_case_fully_deloc
     862             : !> \par History
     863             : !>       2011.11 created [Rustam Z Khaliullin]
     864             : !> \author Rustam Z Khaliullin
     865             : ! **************************************************************************************************
     866          86 :    SUBROUTINE almo_scf_xalmo_pcg(qs_env, almo_scf_env, optimizer, quench_t, &
     867             :                                  matrix_t_in, matrix_t_out, assume_t0_q0x, perturbation_only, &
     868             :                                  special_case)
     869             : 
     870             :       TYPE(qs_environment_type), POINTER                 :: qs_env
     871             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     872             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     873             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
     874             :          INTENT(INOUT)                                   :: quench_t, matrix_t_in, matrix_t_out
     875             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, perturbation_only
     876             :       INTEGER, INTENT(IN), OPTIONAL                      :: special_case
     877             : 
     878             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_pcg'
     879             : 
     880             :       CHARACTER(LEN=20)                                  :: iter_type
     881             :       INTEGER :: cg_iteration, dim_op, fixed_line_search_niter, handle, idim0, ielem, ispin, &
     882             :          iteration, line_search_iteration, max_iter, my_special_case, ndomains, nmo, nspins, &
     883             :          outer_iteration, outer_max_iter, para_group_handle, prec_type, reim, unit_nr
     884          86 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
     885             :       LOGICAL :: blissful_neglect, converged, just_started, line_search, normalize_orbitals, &
     886             :          optimize_theta, outer_prepare_to_exit, penalty_occ_local, penalty_occ_vol, &
     887             :          prepare_to_exit, reset_conjugator, skip_grad, use_guess
     888          86 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: reim_diag, weights, z2
     889             :       REAL(kind=dp) :: appr_sec_der, beta, denom, denom2, e0, e1, energy_coeff, energy_diff, &
     890             :          energy_new, energy_old, eps_skip_gradients, fval, g0, g1, grad_norm, grad_norm_frob, &
     891             :          line_search_error, localiz_coeff, localization_obj_function, next_step_size_guess, &
     892             :          penalty_amplitude, penalty_func_new, spin_factor, step_size, t1, t2, tempreal
     893          86 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: grad_norm_spin, &
     894          86 :                                                             penalty_occ_vol_g_prefactor, &
     895          86 :                                                             penalty_occ_vol_h_prefactor
     896             :       TYPE(cell_type), POINTER                           :: cell
     897             :       TYPE(cp_logger_type), POINTER                      :: logger
     898          86 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: qs_matrix_s
     899          86 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set_almo, op_sm_set_qs
     900          86 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: FTsiginv, grad, m_sig_sqrti_ii, m_t_in_local, &
     901          86 :          m_theta, prec_vv, prev_grad, prev_minus_prec_grad, prev_step, siginvTFTsiginv, ST, step, &
     902          86 :          STsiginv_0, tempNOcc, tempNOcc_1, tempOccOcc
     903             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
     904          86 :          DIMENSION(:, :)                                 :: bad_modes_projector_down, domain_r_down
     905             :       TYPE(mp_comm_type)                                 :: para_group
     906             : 
     907          86 :       CALL timeset(routineN, handle)
     908             : 
     909          86 :       my_special_case = xalmo_case_normal
     910          86 :       IF (PRESENT(special_case)) my_special_case = special_case
     911             : 
     912             :       ! get a useful output_unit
     913          86 :       logger => cp_get_default_logger()
     914          86 :       IF (logger%para_env%is_source()) THEN
     915          43 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     916             :       ELSE
     917             :          unit_nr = -1
     918             :       END IF
     919             : 
     920          86 :       nspins = almo_scf_env%nspins
     921             : 
     922             :       ! if unprojected XALMOs are optimized
     923             :       ! then we must use the "blissful_neglect" procedure
     924          86 :       blissful_neglect = .FALSE.
     925          86 :       IF (my_special_case .EQ. xalmo_case_normal .AND. .NOT. assume_t0_q0x) THEN
     926          14 :          blissful_neglect = .TRUE.
     927             :       END IF
     928             : 
     929          86 :       IF (unit_nr > 0) THEN
     930          43 :          WRITE (unit_nr, *)
     931           2 :          SELECT CASE (my_special_case)
     932             :          CASE (xalmo_case_block_diag)
     933           2 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     934           4 :                " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
     935             :          CASE (xalmo_case_fully_deloc)
     936          22 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     937          44 :                " Optimization of fully delocalized MOs ", REPEAT("-", 20)
     938             :          CASE (xalmo_case_normal)
     939          43 :             IF (blissful_neglect) THEN
     940           7 :                WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 25), &
     941          14 :                   " LCP optimization of XALMOs ", REPEAT("-", 26)
     942             :             ELSE
     943          12 :                WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 27), &
     944          24 :                   " Optimization of XALMOs ", REPEAT("-", 28)
     945             :             END IF
     946             :          END SELECT
     947          43 :          WRITE (unit_nr, *)
     948          43 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
     949          86 :             "Objective Function", "Change", "Convergence", "Time"
     950          43 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
     951             :       END IF
     952             : 
     953             :       ! set local parameters using developer's keywords
     954             :       ! RZK-warning: change to normal keywords later
     955          86 :       optimize_theta = almo_scf_env%logical05
     956          86 :       eps_skip_gradients = almo_scf_env%real01
     957             : 
     958             :       ! penalty amplitude adjusts the strength of volume conservation
     959          86 :       energy_coeff = 1.0_dp !optimizer%opt_penalty%energy_coeff
     960          86 :       localiz_coeff = 0.0_dp !optimizer%opt_penalty%occ_loc_coeff
     961          86 :       penalty_amplitude = 0.0_dp !optimizer%opt_penalty%occ_vol_coeff
     962          86 :       penalty_occ_vol = .FALSE. !( optimizer%opt_penalty%occ_vol_method &
     963             :       !.NE. penalty_type_none .AND. my_special_case .EQ. xalmo_case_fully_deloc )
     964          86 :       penalty_occ_local = .FALSE. !( optimizer%opt_penalty%occ_loc_method &
     965             :       !.NE. penalty_type_none .AND. my_special_case .EQ. xalmo_case_fully_deloc )
     966          86 :       normalize_orbitals = penalty_occ_vol .OR. penalty_occ_local
     967         258 :       ALLOCATE (penalty_occ_vol_g_prefactor(nspins))
     968         258 :       ALLOCATE (penalty_occ_vol_h_prefactor(nspins))
     969         172 :       penalty_occ_vol_g_prefactor(:) = 0.0_dp
     970         172 :       penalty_occ_vol_h_prefactor(:) = 0.0_dp
     971          86 :       penalty_func_new = 0.0_dp
     972             : 
     973             :       ! preconditioner control
     974          86 :       prec_type = optimizer%preconditioner
     975             : 
     976             :       ! control of the line search
     977          86 :       fixed_line_search_niter = 0 ! init to zero, change when eps is small enough
     978             : 
     979          86 :       IF (nspins == 1) THEN
     980          86 :          spin_factor = 2.0_dp
     981             :       ELSE
     982           0 :          spin_factor = 1.0_dp
     983             :       END IF
     984             : 
     985         258 :       ALLOCATE (grad_norm_spin(nspins))
     986         258 :       ALLOCATE (nocc(nspins))
     987             : 
     988             :       ! create a local copy of matrix_t_in because
     989             :       ! matrix_t_in and matrix_t_out can be the same matrix
     990             :       ! we need to make sure data in matrix_t_in is intact
     991             :       ! after we start writing to matrix_t_out
     992         344 :       ALLOCATE (m_t_in_local(nspins))
     993         172 :       DO ispin = 1, nspins
     994             :          CALL dbcsr_create(m_t_in_local(ispin), &
     995             :                            template=matrix_t_in(ispin), &
     996          86 :                            matrix_type=dbcsr_type_no_symmetry)
     997         172 :          CALL dbcsr_copy(m_t_in_local(ispin), matrix_t_in(ispin))
     998             :       END DO
     999             : 
    1000             :       ! m_theta contains a set of variational parameters
    1001             :       ! that define one-electron orbitals (simple, projected, etc.)
    1002         344 :       ALLOCATE (m_theta(nspins))
    1003         172 :       DO ispin = 1, nspins
    1004             :          CALL dbcsr_create(m_theta(ispin), &
    1005             :                            template=matrix_t_out(ispin), &
    1006         172 :                            matrix_type=dbcsr_type_no_symmetry)
    1007             :       END DO
    1008             : 
    1009             :       ! Compute localization matrices
    1010             :       IF (penalty_occ_local) THEN
    1011             : 
    1012             :          CALL get_qs_env(qs_env=qs_env, &
    1013             :                          matrix_s=qs_matrix_s, &
    1014             :                          cell=cell)
    1015             : 
    1016             :          IF (cell%orthorhombic) THEN
    1017             :             dim_op = 3
    1018             :          ELSE
    1019             :             dim_op = 6
    1020             :          END IF
    1021             :          ALLOCATE (weights(6))
    1022             :          weights = 0.0_dp
    1023             : 
    1024             :          CALL initialize_weights(cell, weights)
    1025             : 
    1026             :          ALLOCATE (op_sm_set_qs(2, dim_op))
    1027             :          ALLOCATE (op_sm_set_almo(2, dim_op))
    1028             : 
    1029             :          DO idim0 = 1, dim_op
    1030             :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    1031             :                NULLIFY (op_sm_set_qs(reim, idim0)%matrix)
    1032             :                ALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    1033             :                CALL dbcsr_copy(op_sm_set_qs(reim, idim0)%matrix, qs_matrix_s(1)%matrix, &
    1034             :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    1035             :                CALL dbcsr_set(op_sm_set_qs(reim, idim0)%matrix, 0.0_dp)
    1036             :                NULLIFY (op_sm_set_almo(reim, idim0)%matrix)
    1037             :                ALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    1038             :                CALL dbcsr_copy(op_sm_set_almo(reim, idim0)%matrix, almo_scf_env%matrix_s(1), &
    1039             :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    1040             :                CALL dbcsr_set(op_sm_set_almo(reim, idim0)%matrix, 0.0_dp)
    1041             :             END DO
    1042             :          END DO
    1043             : 
    1044             :          CALL compute_berry_operator(qs_env, cell, op_sm_set_qs, dim_op)
    1045             : 
    1046             :          !CALL matrix_qs_to_almo(op_sm_set_qs, op_sm_set_almo, &
    1047             :          !                       almo_scf_env%mat_distr_aos, .FALSE.)
    1048             : 
    1049             :       END IF
    1050             : 
    1051             :       ! create initial guess from the initial orbitals
    1052             :       CALL xalmo_initial_guess(m_guess=m_theta, &
    1053             :                                m_t_in=m_t_in_local, &
    1054             :                                m_t0=almo_scf_env%matrix_t_blk, &
    1055             :                                m_quench_t=quench_t, &
    1056             :                                m_overlap=almo_scf_env%matrix_s(1), &
    1057             :                                m_sigma_tmpl=almo_scf_env%matrix_sigma_inv, &
    1058             :                                nspins=nspins, &
    1059             :                                xalmo_history=almo_scf_env%xalmo_history, &
    1060             :                                assume_t0_q0x=assume_t0_q0x, &
    1061             :                                optimize_theta=optimize_theta, &
    1062             :                                envelope_amplitude=almo_scf_env%envelope_amplitude, &
    1063             :                                eps_filter=almo_scf_env%eps_filter, &
    1064             :                                order_lanczos=almo_scf_env%order_lanczos, &
    1065             :                                eps_lanczos=almo_scf_env%eps_lanczos, &
    1066             :                                max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    1067          86 :                                nocc_of_domain=almo_scf_env%nocc_of_domain)
    1068             : 
    1069          86 :       ndomains = almo_scf_env%ndomains
    1070        1028 :       ALLOCATE (domain_r_down(ndomains, nspins))
    1071          86 :       CALL init_submatrices(domain_r_down)
    1072        1028 :       ALLOCATE (bad_modes_projector_down(ndomains, nspins))
    1073          86 :       CALL init_submatrices(bad_modes_projector_down)
    1074             : 
    1075         344 :       ALLOCATE (prec_vv(nspins))
    1076         344 :       ALLOCATE (siginvTFTsiginv(nspins))
    1077         344 :       ALLOCATE (STsiginv_0(nspins))
    1078         344 :       ALLOCATE (FTsiginv(nspins))
    1079         344 :       ALLOCATE (ST(nspins))
    1080         344 :       ALLOCATE (prev_grad(nspins))
    1081         344 :       ALLOCATE (grad(nspins))
    1082         344 :       ALLOCATE (prev_step(nspins))
    1083         344 :       ALLOCATE (step(nspins))
    1084         344 :       ALLOCATE (prev_minus_prec_grad(nspins))
    1085         344 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    1086         344 :       ALLOCATE (tempNOcc(nspins))
    1087         344 :       ALLOCATE (tempNOcc_1(nspins))
    1088         344 :       ALLOCATE (tempOccOcc(nspins))
    1089         172 :       DO ispin = 1, nspins
    1090             : 
    1091             :          ! init temporary storage
    1092             :          CALL dbcsr_create(prec_vv(ispin), &
    1093             :                            template=almo_scf_env%matrix_ks(ispin), &
    1094          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1095             :          CALL dbcsr_create(siginvTFTsiginv(ispin), &
    1096             :                            template=almo_scf_env%matrix_sigma(ispin), &
    1097          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1098             :          CALL dbcsr_create(STsiginv_0(ispin), &
    1099             :                            template=matrix_t_out(ispin), &
    1100          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1101             :          CALL dbcsr_create(FTsiginv(ispin), &
    1102             :                            template=matrix_t_out(ispin), &
    1103          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1104             :          CALL dbcsr_create(ST(ispin), &
    1105             :                            template=matrix_t_out(ispin), &
    1106          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1107             :          CALL dbcsr_create(prev_grad(ispin), &
    1108             :                            template=matrix_t_out(ispin), &
    1109          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1110             :          CALL dbcsr_create(grad(ispin), &
    1111             :                            template=matrix_t_out(ispin), &
    1112          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1113             :          CALL dbcsr_create(prev_step(ispin), &
    1114             :                            template=matrix_t_out(ispin), &
    1115          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1116             :          CALL dbcsr_create(step(ispin), &
    1117             :                            template=matrix_t_out(ispin), &
    1118          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1119             :          CALL dbcsr_create(prev_minus_prec_grad(ispin), &
    1120             :                            template=matrix_t_out(ispin), &
    1121          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1122             :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    1123             :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    1124          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1125             :          CALL dbcsr_create(tempNOcc(ispin), &
    1126             :                            template=matrix_t_out(ispin), &
    1127          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1128             :          CALL dbcsr_create(tempNOcc_1(ispin), &
    1129             :                            template=matrix_t_out(ispin), &
    1130          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1131             :          CALL dbcsr_create(tempOccOcc(ispin), &
    1132             :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    1133          86 :                            matrix_type=dbcsr_type_no_symmetry)
    1134             : 
    1135          86 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    1136          86 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    1137             : 
    1138             :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
    1139          86 :                              nfullrows_total=nocc(ispin))
    1140             : 
    1141             :          ! invert S domains if necessary
    1142             :          ! Note: domains for alpha and beta electrons might be different
    1143             :          ! that is why the inversion of the AO overlap is inside the spin loop
    1144          86 :          IF (my_special_case .EQ. xalmo_case_normal) THEN
    1145             :             CALL construct_domain_s_inv( &
    1146             :                matrix_s=almo_scf_env%matrix_s(1), &
    1147             :                subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1148             :                dpattern=quench_t(ispin), &
    1149             :                map=almo_scf_env%domain_map(ispin), &
    1150          38 :                node_of_domain=almo_scf_env%cpu_of_domain)
    1151             : 
    1152             :             CALL construct_domain_s_sqrt( &
    1153             :                matrix_s=almo_scf_env%matrix_s(1), &
    1154             :                subm_s_sqrt=almo_scf_env%domain_s_sqrt(:, ispin), &
    1155             :                subm_s_sqrt_inv=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
    1156             :                dpattern=almo_scf_env%quench_t(ispin), &
    1157             :                map=almo_scf_env%domain_map(ispin), &
    1158          38 :                node_of_domain=almo_scf_env%cpu_of_domain)
    1159             : 
    1160             :          END IF
    1161             : 
    1162          86 :          IF (assume_t0_q0x) THEN
    1163             : 
    1164             :             ! save S.T_0.siginv_0
    1165          42 :             IF (my_special_case .EQ. xalmo_case_fully_deloc) THEN
    1166             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1167             :                                    almo_scf_env%matrix_s(1), &
    1168             :                                    almo_scf_env%matrix_t_blk(ispin), &
    1169             :                                    0.0_dp, ST(ispin), &
    1170          18 :                                    filter_eps=almo_scf_env%eps_filter)
    1171             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1172             :                                    ST(ispin), &
    1173             :                                    almo_scf_env%matrix_sigma_inv_0deloc(ispin), &
    1174             :                                    0.0_dp, STsiginv_0(ispin), &
    1175          18 :                                    filter_eps=almo_scf_env%eps_filter)
    1176             :             END IF
    1177             : 
    1178             :             ! construct domain-projector
    1179          42 :             IF (my_special_case .EQ. xalmo_case_normal) THEN
    1180             :                CALL construct_domain_r_down( &
    1181             :                   matrix_t=almo_scf_env%matrix_t_blk(ispin), &
    1182             :                   matrix_sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
    1183             :                   matrix_s=almo_scf_env%matrix_s(1), &
    1184             :                   subm_r_down=domain_r_down(:, ispin), &
    1185             :                   dpattern=quench_t(ispin), &
    1186             :                   map=almo_scf_env%domain_map(ispin), &
    1187             :                   node_of_domain=almo_scf_env%cpu_of_domain, &
    1188          24 :                   filter_eps=almo_scf_env%eps_filter)
    1189             :             END IF
    1190             : 
    1191             :          END IF ! assume_t0_q0x
    1192             : 
    1193             :          ! localization functional
    1194         172 :          IF (penalty_occ_local) THEN
    1195             : 
    1196             :             ! compute S.R0.B.R0.S
    1197             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1198             :                                 almo_scf_env%matrix_s(1), &
    1199             :                                 matrix_t_in(ispin), &
    1200             :                                 0.0_dp, tempNOcc(ispin), &
    1201           0 :                                 filter_eps=almo_scf_env%eps_filter)
    1202             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1203             :                                 tempNOcc(ispin), &
    1204             :                                 almo_scf_env%matrix_sigma_inv(ispin), &
    1205             :                                 0.0_dp, tempNOCC_1(ispin), &
    1206           0 :                                 filter_eps=almo_scf_env%eps_filter)
    1207             : 
    1208           0 :             DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    1209           0 :                DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    1210             : 
    1211             :                   CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, op_sm_set_almo(reim, idim0)%matrix, &
    1212           0 :                                          almo_scf_env%mat_distr_aos, .FALSE.)
    1213             : 
    1214             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1215             :                                       op_sm_set_almo(reim, idim0)%matrix, &
    1216             :                                       matrix_t_in(ispin), &
    1217             :                                       0.0_dp, tempNOcc(ispin), &
    1218           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1219             : 
    1220             :                   CALL dbcsr_multiply("T", "N", 1.0_dp, &
    1221             :                                       matrix_t_in(ispin), &
    1222             :                                       tempNOcc(ispin), &
    1223             :                                       0.0_dp, tempOccOcc(ispin), &
    1224           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1225             : 
    1226             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1227             :                                       tempNOCC_1(ispin), &
    1228             :                                       tempOccOcc(ispin), &
    1229             :                                       0.0_dp, tempNOcc(ispin), &
    1230           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1231             : 
    1232             :                   CALL dbcsr_multiply("N", "T", 1.0_dp, &
    1233             :                                       tempNOcc(ispin), &
    1234             :                                       tempNOcc_1(ispin), &
    1235             :                                       0.0_dp, op_sm_set_almo(reim, idim0)%matrix, &
    1236           0 :                                       filter_eps=almo_scf_env%eps_filter)
    1237             : 
    1238             :                END DO
    1239             :             END DO ! end loop over idim0
    1240             : 
    1241             :          END IF !penalty_occ_local
    1242             : 
    1243             :       END DO ! ispin
    1244             : 
    1245             :       ! start the outer SCF loop
    1246          86 :       outer_max_iter = optimizer%max_iter_outer_loop
    1247          86 :       outer_prepare_to_exit = .FALSE.
    1248          86 :       outer_iteration = 0
    1249          86 :       grad_norm = 0.0_dp
    1250          86 :       grad_norm_frob = 0.0_dp
    1251          86 :       use_guess = .FALSE.
    1252             : 
    1253             :       DO
    1254             : 
    1255             :          ! start the inner SCF loop
    1256          92 :          max_iter = optimizer%max_iter
    1257          92 :          prepare_to_exit = .FALSE.
    1258          92 :          line_search = .FALSE.
    1259          92 :          converged = .FALSE.
    1260          92 :          iteration = 0
    1261          92 :          cg_iteration = 0
    1262          92 :          line_search_iteration = 0
    1263             :          energy_new = 0.0_dp
    1264          92 :          energy_old = 0.0_dp
    1265          92 :          energy_diff = 0.0_dp
    1266          92 :          localization_obj_function = 0.0_dp
    1267          92 :          line_search_error = 0.0_dp
    1268             : 
    1269          92 :          t1 = m_walltime()
    1270             : 
    1271        1048 :          DO
    1272             : 
    1273        1048 :             just_started = (iteration .EQ. 0) .AND. (outer_iteration .EQ. 0)
    1274             : 
    1275             :             CALL main_var_to_xalmos_and_loss_func( &
    1276             :                almo_scf_env=almo_scf_env, &
    1277             :                qs_env=qs_env, &
    1278             :                m_main_var_in=m_theta, &
    1279             :                m_t_out=matrix_t_out, &
    1280             :                m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
    1281             :                energy_out=energy_new, &
    1282             :                penalty_out=penalty_func_new, &
    1283             :                m_FTsiginv_out=FTsiginv, &
    1284             :                m_siginvTFTsiginv_out=siginvTFTsiginv, &
    1285             :                m_ST_out=ST, &
    1286             :                m_STsiginv0_in=STsiginv_0, &
    1287             :                m_quench_t_in=quench_t, &
    1288             :                domain_r_down_in=domain_r_down, &
    1289             :                assume_t0_q0x=assume_t0_q0x, &
    1290             :                just_started=just_started, &
    1291             :                optimize_theta=optimize_theta, &
    1292             :                normalize_orbitals=normalize_orbitals, &
    1293             :                perturbation_only=perturbation_only, &
    1294             :                do_penalty=penalty_occ_vol, &
    1295        1048 :                special_case=my_special_case)
    1296        1048 :             IF (penalty_occ_vol) THEN
    1297             :                ! this is not pure energy anymore
    1298           0 :                energy_new = energy_new + penalty_func_new
    1299             :             END IF
    1300        2096 :             DO ispin = 1, nspins
    1301        2096 :                IF (penalty_occ_vol) THEN
    1302             :                   penalty_occ_vol_g_prefactor(ispin) = &
    1303           0 :                      -2.0_dp*penalty_amplitude*spin_factor*nocc(ispin)
    1304           0 :                   penalty_occ_vol_h_prefactor(ispin) = 0.0_dp
    1305             :                END IF
    1306             :             END DO
    1307             : 
    1308        1048 :             localization_obj_function = 0.0_dp
    1309             :             ! RZK-warning: This block must be combined with the loss function
    1310        1048 :             IF (penalty_occ_local) THEN
    1311           0 :                DO ispin = 1, nspins
    1312             : 
    1313             :                   ! LzL insert localization penalty
    1314           0 :                   localization_obj_function = 0.0_dp
    1315           0 :                   CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), nfullrows_total=nmo)
    1316           0 :                   ALLOCATE (z2(nmo))
    1317           0 :                   ALLOCATE (reim_diag(nmo))
    1318             : 
    1319           0 :                   CALL dbcsr_get_info(tempOccOcc(ispin), group=para_group_handle)
    1320           0 :                   CALL para_group%set_handle(para_group_handle)
    1321             : 
    1322           0 :                   DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    1323             : 
    1324           0 :                      z2(:) = 0.0_dp
    1325             : 
    1326           0 :                      DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    1327             : 
    1328             :                         !CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, op_sm_set_almo(reim, idim0)%matrix, &
    1329             :                         !                       almo_scf_env%mat_distr_aos, .FALSE.)
    1330             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1331             :                                             op_sm_set_almo(reim, idim0)%matrix, &
    1332             :                                             matrix_t_out(ispin), &
    1333             :                                             0.0_dp, tempNOcc(ispin), &
    1334           0 :                                             filter_eps=almo_scf_env%eps_filter)
    1335             :                         !warning - save time by computing only the diagonal elements
    1336             :                         CALL dbcsr_multiply("T", "N", 1.0_dp, &
    1337             :                                             matrix_t_out(ispin), &
    1338             :                                             tempNOcc(ispin), &
    1339             :                                             0.0_dp, tempOccOcc(ispin), &
    1340           0 :                                             filter_eps=almo_scf_env%eps_filter)
    1341             : 
    1342           0 :                         reim_diag = 0.0_dp
    1343           0 :                         CALL dbcsr_get_diag(tempOccOcc(ispin), reim_diag)
    1344           0 :                         CALL para_group%sum(reim_diag)
    1345           0 :                         z2(:) = z2(:) + reim_diag(:)*reim_diag(:)
    1346             : 
    1347             :                      END DO
    1348             : 
    1349           0 :                      DO ielem = 1, nmo
    1350             :                         SELECT CASE (2) ! allows for selection of different spread functionals
    1351             :                         CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    1352           0 :                            fval = -weights(idim0)*LOG(ABS(z2(ielem)))
    1353             :                         CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    1354           0 :                            fval = weights(idim0) - weights(idim0)*ABS(z2(ielem))
    1355             :                         CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    1356             :                            fval = weights(idim0) - weights(idim0)*SQRT(ABS(z2(ielem)))
    1357             :                         END SELECT
    1358           0 :                         localization_obj_function = localization_obj_function + fval
    1359             :                      END DO
    1360             : 
    1361             :                   END DO ! end loop over idim0
    1362             : 
    1363           0 :                   DEALLOCATE (z2)
    1364           0 :                   DEALLOCATE (reim_diag)
    1365             : 
    1366           0 :                   energy_new = energy_new + localiz_coeff*localization_obj_function
    1367             : 
    1368             :                END DO ! ispin
    1369             :             END IF ! penalty_occ_local
    1370             : 
    1371        2096 :             DO ispin = 1, nspins
    1372             : 
    1373             :                IF (just_started .AND. almo_mathematica) THEN
    1374             :                   IF (ispin .GT. 1) CPWARN("Mathematica files will be overwritten")
    1375             :                   CALL print_mathematica_matrix(almo_scf_env%matrix_s(1), "matrixS.dat")
    1376             :                   CALL print_mathematica_matrix(almo_scf_env%matrix_ks(ispin), "matrixF.dat")
    1377             :                   CALL print_mathematica_matrix(matrix_t_out(ispin), "matrixT.dat")
    1378             :                   CALL print_mathematica_matrix(quench_t(ispin), "matrixQ.dat")
    1379             :                END IF
    1380             : 
    1381             :                ! save the previous gradient to compute beta
    1382             :                ! do it only if the previous grad was computed
    1383             :                ! for .NOT.line_search
    1384        1048 :                IF (line_search_iteration .EQ. 0 .AND. iteration .NE. 0) &
    1385        1542 :                   CALL dbcsr_copy(prev_grad(ispin), grad(ispin))
    1386             : 
    1387             :             END DO ! ispin
    1388             : 
    1389             :             ! compute the energy gradient if necessary
    1390             :             skip_grad = (iteration .GT. 0 .AND. &
    1391             :                          fixed_line_search_niter .NE. 0 .AND. &
    1392        1048 :                          line_search_iteration .NE. fixed_line_search_niter)
    1393             : 
    1394             :             IF (.NOT. skip_grad) THEN
    1395             : 
    1396        2096 :                DO ispin = 1, nspins
    1397             : 
    1398             :                   CALL compute_gradient( &
    1399             :                      m_grad_out=grad(ispin), &
    1400             :                      m_ks=almo_scf_env%matrix_ks(ispin), &
    1401             :                      m_s=almo_scf_env%matrix_s(1), &
    1402             :                      m_t=matrix_t_out(ispin), &
    1403             :                      m_t0=almo_scf_env%matrix_t_blk(ispin), &
    1404             :                      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1405             :                      m_quench_t=quench_t(ispin), &
    1406             :                      m_FTsiginv=FTsiginv(ispin), &
    1407             :                      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1408             :                      m_ST=ST(ispin), &
    1409             :                      m_STsiginv0=STsiginv_0(ispin), &
    1410             :                      m_theta=m_theta(ispin), &
    1411             :                      m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    1412             :                      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1413             :                      domain_r_down=domain_r_down(:, ispin), &
    1414             :                      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1415             :                      domain_map=almo_scf_env%domain_map(ispin), &
    1416             :                      assume_t0_q0x=assume_t0_q0x, &
    1417             :                      optimize_theta=optimize_theta, &
    1418             :                      normalize_orbitals=normalize_orbitals, &
    1419             :                      penalty_occ_vol=penalty_occ_vol, &
    1420             :                      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1421             :                      envelope_amplitude=almo_scf_env%envelope_amplitude, &
    1422             :                      eps_filter=almo_scf_env%eps_filter, &
    1423             :                      spin_factor=spin_factor, &
    1424             :                      special_case=my_special_case, &
    1425             :                      penalty_occ_local=penalty_occ_local, &
    1426             :                      op_sm_set=op_sm_set_almo, &
    1427             :                      weights=weights, &
    1428             :                      energy_coeff=energy_coeff, &
    1429        2096 :                      localiz_coeff=localiz_coeff)
    1430             : 
    1431             :                END DO ! ispin
    1432             : 
    1433             :             END IF ! skip_grad
    1434             : 
    1435             :             ! if unprojected XALMOs are optimized then compute both
    1436             :             ! HessianInv/preconditioner and the "bad-mode" projector
    1437             : 
    1438        1048 :             IF (blissful_neglect) THEN
    1439         460 :                DO ispin = 1, nspins
    1440             :                   !compute the prec only for the first step,
    1441             :                   !but project the gradient every step
    1442         230 :                   IF (iteration .EQ. 0) THEN
    1443             :                      CALL compute_preconditioner( &
    1444             :                         domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    1445             :                         bad_modes_projector_down_out=bad_modes_projector_down(:, ispin), &
    1446             :                         m_prec_out=prec_vv(ispin), &
    1447             :                         m_ks=almo_scf_env%matrix_ks(ispin), &
    1448             :                         m_s=almo_scf_env%matrix_s(1), &
    1449             :                         m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1450             :                         m_quench_t=quench_t(ispin), &
    1451             :                         m_FTsiginv=FTsiginv(ispin), &
    1452             :                         m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1453             :                         m_ST=ST(ispin), &
    1454             :                         para_env=almo_scf_env%para_env, &
    1455             :                         blacs_env=almo_scf_env%blacs_env, &
    1456             :                         nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    1457             :                         domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1458             :                         domain_s_inv_half=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
    1459             :                         domain_s_half=almo_scf_env%domain_s_sqrt(:, ispin), &
    1460             :                         domain_r_down=domain_r_down(:, ispin), &
    1461             :                         cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1462             :                         domain_map=almo_scf_env%domain_map(ispin), &
    1463             :                         assume_t0_q0x=assume_t0_q0x, &
    1464             :                         penalty_occ_vol=penalty_occ_vol, &
    1465             :                         penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1466             :                         eps_filter=almo_scf_env%eps_filter, &
    1467             :                         neg_thr=optimizer%neglect_threshold, &
    1468             :                         spin_factor=spin_factor, &
    1469             :                         skip_inversion=.FALSE., &
    1470          18 :                         special_case=my_special_case)
    1471             :                   END IF
    1472             :                   ! remove bad modes from the gradient
    1473             :                   CALL apply_domain_operators( &
    1474             :                      matrix_in=grad(ispin), &
    1475             :                      matrix_out=grad(ispin), &
    1476             :                      operator1=almo_scf_env%domain_s_inv(:, ispin), &
    1477             :                      operator2=bad_modes_projector_down(:, ispin), &
    1478             :                      dpattern=quench_t(ispin), &
    1479             :                      map=almo_scf_env%domain_map(ispin), &
    1480             :                      node_of_domain=almo_scf_env%cpu_of_domain, &
    1481             :                      my_action=1, &
    1482         460 :                      filter_eps=almo_scf_env%eps_filter)
    1483             : 
    1484             :                END DO ! ispin
    1485             : 
    1486             :             END IF ! blissful neglect
    1487             : 
    1488             :             ! check convergence and other exit criteria
    1489        2096 :             DO ispin = 1, nspins
    1490             :                CALL dbcsr_norm(grad(ispin), dbcsr_norm_maxabsnorm, &
    1491        2096 :                                norm_scalar=grad_norm_spin(ispin))
    1492             :             END DO ! ispin
    1493        3144 :             grad_norm = MAXVAL(grad_norm_spin)
    1494             : 
    1495        1048 :             converged = (grad_norm .LE. optimizer%eps_error)
    1496        1048 :             IF (converged .OR. (iteration .GE. max_iter)) THEN
    1497          92 :                prepare_to_exit = .TRUE.
    1498             :             END IF
    1499             :             ! if early stopping is on do at least one iteration
    1500        1048 :             IF (optimizer%early_stopping_on .AND. just_started) &
    1501           0 :                prepare_to_exit = .FALSE.
    1502             : 
    1503             :             IF (grad_norm .LT. almo_scf_env%eps_prev_guess) &
    1504        1048 :                use_guess = .TRUE.
    1505             : 
    1506             :             ! it is not time to exit just yet
    1507        1048 :             IF (.NOT. prepare_to_exit) THEN
    1508             : 
    1509             :                ! check the gradient along the step direction
    1510             :                ! and decide whether to switch to the line-search mode
    1511             :                ! do not do this in the first iteration
    1512         956 :                IF (iteration .NE. 0) THEN
    1513             : 
    1514         864 :                   IF (fixed_line_search_niter .EQ. 0) THEN
    1515             : 
    1516             :                      ! enforce at least one line search
    1517             :                      ! without even checking the error
    1518         864 :                      IF (.NOT. line_search) THEN
    1519             : 
    1520         422 :                         line_search = .TRUE.
    1521         422 :                         line_search_iteration = line_search_iteration + 1
    1522             : 
    1523             :                      ELSE
    1524             : 
    1525             :                         ! check the line-search error and decide whether to
    1526             :                         ! change the direction
    1527             :                         line_search_error = 0.0_dp
    1528             :                         denom = 0.0_dp
    1529             :                         denom2 = 0.0_dp
    1530             : 
    1531         884 :                         DO ispin = 1, nspins
    1532             : 
    1533         442 :                            CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1534         442 :                            line_search_error = line_search_error + tempreal
    1535         442 :                            CALL dbcsr_dot(grad(ispin), grad(ispin), tempreal)
    1536         442 :                            denom = denom + tempreal
    1537         442 :                            CALL dbcsr_dot(step(ispin), step(ispin), tempreal)
    1538         884 :                            denom2 = denom2 + tempreal
    1539             : 
    1540             :                         END DO ! ispin
    1541             : 
    1542             :                         ! cosine of the angle between the step and grad
    1543             :                         ! (must be close to zero at convergence)
    1544         442 :                         line_search_error = line_search_error/SQRT(denom)/SQRT(denom2)
    1545             : 
    1546         442 :                         IF (ABS(line_search_error) .GT. optimizer%lin_search_eps_error) THEN
    1547          40 :                            line_search = .TRUE.
    1548          40 :                            line_search_iteration = line_search_iteration + 1
    1549             :                         ELSE
    1550         402 :                            line_search = .FALSE.
    1551         402 :                            line_search_iteration = 0
    1552         402 :                            IF (grad_norm .LT. eps_skip_gradients) THEN
    1553           0 :                               fixed_line_search_niter = ABS(almo_scf_env%integer04)
    1554             :                            END IF
    1555             :                         END IF
    1556             : 
    1557             :                      END IF
    1558             : 
    1559             :                   ELSE ! decision for fixed_line_search_niter
    1560             : 
    1561           0 :                      IF (.NOT. line_search) THEN
    1562           0 :                         line_search = .TRUE.
    1563           0 :                         line_search_iteration = line_search_iteration + 1
    1564             :                      ELSE
    1565           0 :                         IF (line_search_iteration .EQ. fixed_line_search_niter) THEN
    1566           0 :                            line_search = .FALSE.
    1567             :                            line_search_iteration = 0
    1568           0 :                            line_search_iteration = line_search_iteration + 1
    1569             :                         END IF
    1570             :                      END IF
    1571             : 
    1572             :                   END IF ! fixed_line_search_niter fork
    1573             : 
    1574             :                END IF ! iteration.ne.0
    1575             : 
    1576         956 :                IF (line_search) THEN
    1577         462 :                   energy_diff = 0.0_dp
    1578             :                ELSE
    1579         494 :                   energy_diff = energy_new - energy_old
    1580         494 :                   energy_old = energy_new
    1581             :                END IF
    1582             : 
    1583             :                ! update the step direction
    1584         956 :                IF (.NOT. line_search) THEN
    1585             : 
    1586             :                   !IF (unit_nr>0) THEN
    1587             :                   !   WRITE(unit_nr,*) "....updating step direction...."
    1588             :                   !ENDIF
    1589             : 
    1590         988 :                   cg_iteration = cg_iteration + 1
    1591             : 
    1592             :                   ! save the previous step
    1593         988 :                   DO ispin = 1, nspins
    1594         988 :                      CALL dbcsr_copy(prev_step(ispin), step(ispin))
    1595             :                   END DO ! ispin
    1596             : 
    1597             :                   ! compute the new step (apply preconditioner if available)
    1598           0 :                   SELECT CASE (prec_type)
    1599             :                   CASE (xalmo_prec_full)
    1600             : 
    1601             :                      ! solving approximate Newton eq in the full (linearized) space
    1602             :                      CALL newton_grad_to_step( &
    1603             :                         optimizer=almo_scf_env%opt_xalmo_newton_pcg_solver, &
    1604             :                         m_grad=grad(:), &
    1605             :                         m_delta=step(:), &
    1606             :                         m_s=almo_scf_env%matrix_s(:), &
    1607             :                         m_ks=almo_scf_env%matrix_ks(:), &
    1608             :                         m_siginv=almo_scf_env%matrix_sigma_inv(:), &
    1609             :                         m_quench_t=quench_t(:), &
    1610             :                         m_FTsiginv=FTsiginv(:), &
    1611             :                         m_siginvTFTsiginv=siginvTFTsiginv(:), &
    1612             :                         m_ST=ST(:), &
    1613             :                         m_t=matrix_t_out(:), &
    1614             :                         m_sig_sqrti_ii=m_sig_sqrti_ii(:), &
    1615             :                         domain_s_inv=almo_scf_env%domain_s_inv(:, :), &
    1616             :                         domain_r_down=domain_r_down(:, :), &
    1617             :                         domain_map=almo_scf_env%domain_map(:), &
    1618             :                         cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1619             :                         nocc_of_domain=almo_scf_env%nocc_of_domain(:, :), &
    1620             :                         para_env=almo_scf_env%para_env, &
    1621             :                         blacs_env=almo_scf_env%blacs_env, &
    1622             :                         eps_filter=almo_scf_env%eps_filter, &
    1623             :                         optimize_theta=optimize_theta, &
    1624             :                         penalty_occ_vol=penalty_occ_vol, &
    1625             :                         normalize_orbitals=normalize_orbitals, &
    1626             :                         penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(:), &
    1627             :                         penalty_occ_vol_pf2=penalty_occ_vol_h_prefactor(:), &
    1628             :                         special_case=my_special_case &
    1629           0 :                         )
    1630             : 
    1631             :                   CASE (xalmo_prec_domain)
    1632             : 
    1633             :                      ! compute and invert preconditioner?
    1634         494 :                      IF (.NOT. blissful_neglect .AND. &
    1635             :                          ((just_started .AND. perturbation_only) .OR. &
    1636             :                           (iteration .EQ. 0 .AND. (.NOT. perturbation_only))) &
    1637             :                          ) THEN
    1638             : 
    1639             :                         ! computing preconditioner
    1640         148 :                         DO ispin = 1, nspins
    1641             :                            CALL compute_preconditioner( &
    1642             :                               domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    1643             :                               m_prec_out=prec_vv(ispin), &
    1644             :                               m_ks=almo_scf_env%matrix_ks(ispin), &
    1645             :                               m_s=almo_scf_env%matrix_s(1), &
    1646             :                               m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1647             :                               m_quench_t=quench_t(ispin), &
    1648             :                               m_FTsiginv=FTsiginv(ispin), &
    1649             :                               m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1650             :                               m_ST=ST(ispin), &
    1651             :                               para_env=almo_scf_env%para_env, &
    1652             :                               blacs_env=almo_scf_env%blacs_env, &
    1653             :                               nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    1654             :                               domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1655             :                               domain_r_down=domain_r_down(:, ispin), &
    1656             :                               cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1657             :                               domain_map=almo_scf_env%domain_map(ispin), &
    1658             :                               assume_t0_q0x=assume_t0_q0x, &
    1659             :                               penalty_occ_vol=penalty_occ_vol, &
    1660             :                               penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1661             :                               eps_filter=almo_scf_env%eps_filter, &
    1662             :                               neg_thr=0.5_dp, &
    1663             :                               spin_factor=spin_factor, &
    1664             :                               skip_inversion=.FALSE., &
    1665         568 :                               special_case=my_special_case)
    1666             :                         END DO ! ispin
    1667             :                      END IF ! compute_prec
    1668             : 
    1669             :                      !IF (unit_nr>0) THEN
    1670             :                      !   WRITE(unit_nr,*) "....applying precomputed preconditioner...."
    1671             :                      !ENDIF
    1672             : 
    1673         494 :                      IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    1674             :                          my_special_case .EQ. xalmo_case_fully_deloc) THEN
    1675             : 
    1676         488 :                         DO ispin = 1, nspins
    1677             : 
    1678             :                            CALL dbcsr_multiply("N", "N", -1.0_dp, &
    1679             :                                                prec_vv(ispin), &
    1680             :                                                grad(ispin), &
    1681             :                                                0.0_dp, step(ispin), &
    1682         488 :                                                filter_eps=almo_scf_env%eps_filter)
    1683             : 
    1684             :                         END DO ! ispin
    1685             : 
    1686             :                      ELSE
    1687             : 
    1688             :                         !!! RZK-warning Currently for non-theta only
    1689         250 :                         IF (optimize_theta) THEN
    1690           0 :                            CPABORT("theta is NYI")
    1691             :                         END IF
    1692             : 
    1693         500 :                         DO ispin = 1, nspins
    1694             : 
    1695             :                            CALL apply_domain_operators( &
    1696             :                               matrix_in=grad(ispin), &
    1697             :                               matrix_out=step(ispin), &
    1698             :                               operator1=almo_scf_env%domain_preconditioner(:, ispin), &
    1699             :                               dpattern=quench_t(ispin), &
    1700             :                               map=almo_scf_env%domain_map(ispin), &
    1701             :                               node_of_domain=almo_scf_env%cpu_of_domain, &
    1702             :                               my_action=0, &
    1703         250 :                               filter_eps=almo_scf_env%eps_filter)
    1704         500 :                            CALL dbcsr_scale(step(ispin), -1.0_dp)
    1705             : 
    1706             :                            !CALL dbcsr_copy(m_tmp_no_3,&
    1707             :                            !        quench_t(ispin))
    1708             :                            !CALL dbcsr_function_of_elements(m_tmp_no_3,&
    1709             :                            !        func=dbcsr_func_inverse,&
    1710             :                            !        a0=0.0_dp,&
    1711             :                            !        a1=1.0_dp)
    1712             :                            !CALL dbcsr_copy(m_tmp_no_2,step)
    1713             :                            !CALL dbcsr_hadamard_product(&
    1714             :                            !        m_tmp_no_2,&
    1715             :                            !        m_tmp_no_3,&
    1716             :                            !        step)
    1717             :                            !CALL dbcsr_copy(m_tmp_no_3,quench_t(ispin))
    1718             : 
    1719             :                         END DO ! ispin
    1720             : 
    1721             :                      END IF ! special case
    1722             : 
    1723             :                   CASE (xalmo_prec_zero)
    1724             : 
    1725             :                      ! no preconditioner
    1726         494 :                      DO ispin = 1, nspins
    1727             : 
    1728           0 :                         CALL dbcsr_copy(step(ispin), grad(ispin))
    1729           0 :                         CALL dbcsr_scale(step(ispin), -1.0_dp)
    1730             : 
    1731             :                      END DO ! ispin
    1732             : 
    1733             :                   END SELECT ! preconditioner type fork
    1734             : 
    1735             :                   ! check whether we need to reset conjugate directions
    1736         494 :                   IF (iteration .EQ. 0) THEN
    1737          92 :                      reset_conjugator = .TRUE.
    1738             :                   END IF
    1739             : 
    1740             :                   ! compute the conjugation coefficient - beta
    1741         494 :                   IF (.NOT. reset_conjugator) THEN
    1742             : 
    1743             :                      CALL compute_cg_beta( &
    1744             :                         beta=beta, &
    1745             :                         reset_conjugator=reset_conjugator, &
    1746             :                         conjugator=optimizer%conjugator, &
    1747             :                         grad=grad(:), &
    1748             :                         prev_grad=prev_grad(:), &
    1749             :                         step=step(:), &
    1750             :                         prev_step=prev_step(:), &
    1751             :                         prev_minus_prec_grad=prev_minus_prec_grad(:) &
    1752         402 :                         )
    1753             : 
    1754             :                   END IF
    1755             : 
    1756         494 :                   IF (reset_conjugator) THEN
    1757             : 
    1758          92 :                      beta = 0.0_dp
    1759          92 :                      IF (unit_nr > 0 .AND. (.NOT. just_started)) THEN
    1760           3 :                         WRITE (unit_nr, '(T2,A35)') "Re-setting conjugator to zero"
    1761             :                      END IF
    1762          92 :                      reset_conjugator = .FALSE.
    1763             : 
    1764             :                   END IF
    1765             : 
    1766             :                   ! save the preconditioned gradient (useful for beta)
    1767         988 :                   DO ispin = 1, nspins
    1768             : 
    1769         494 :                      CALL dbcsr_copy(prev_minus_prec_grad(ispin), step(ispin))
    1770             : 
    1771             :                      !IF (unit_nr>0) THEN
    1772             :                      !   WRITE(unit_nr,*) "....final beta....", beta
    1773             :                      !ENDIF
    1774             : 
    1775             :                      ! conjugate the step direction
    1776         988 :                      CALL dbcsr_add(step(ispin), prev_step(ispin), 1.0_dp, beta)
    1777             : 
    1778             :                   END DO ! ispin
    1779             : 
    1780             :                END IF ! update the step direction
    1781             : 
    1782             :                ! estimate the step size
    1783         956 :                IF (.NOT. line_search) THEN
    1784             :                   ! we just changed the direction and
    1785             :                   ! we have only E and grad from the current step
    1786             :                   ! it is not enouhg to compute step_size - just guess it
    1787         494 :                   e0 = energy_new
    1788         494 :                   g0 = 0.0_dp
    1789         988 :                   DO ispin = 1, nspins
    1790         494 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1791         988 :                      g0 = g0 + tempreal
    1792             :                   END DO ! ispin
    1793         494 :                   IF (iteration .EQ. 0) THEN
    1794          92 :                      step_size = optimizer%lin_search_step_size_guess
    1795             :                   ELSE
    1796         402 :                      IF (next_step_size_guess .LE. 0.0_dp) THEN
    1797           2 :                         step_size = optimizer%lin_search_step_size_guess
    1798             :                      ELSE
    1799             :                         ! take the last value
    1800         400 :                         step_size = next_step_size_guess*1.05_dp
    1801             :                      END IF
    1802             :                   END IF
    1803             :                   !IF (unit_nr > 0) THEN
    1804             :                   !   WRITE (unit_nr, '(A2,3F12.5)') &
    1805             :                   !      "EG", e0, g0, step_size
    1806             :                   !ENDIF
    1807         494 :                   next_step_size_guess = step_size
    1808             :                ELSE
    1809         462 :                   IF (fixed_line_search_niter .EQ. 0) THEN
    1810         462 :                      e1 = energy_new
    1811         462 :                      g1 = 0.0_dp
    1812         924 :                      DO ispin = 1, nspins
    1813         462 :                         CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1814         924 :                         g1 = g1 + tempreal
    1815             :                      END DO ! ispin
    1816             :                      ! we have accumulated some points along this direction
    1817             :                      ! use only the most recent g0 (quadratic approximation)
    1818         462 :                      appr_sec_der = (g1 - g0)/step_size
    1819             :                      !IF (unit_nr > 0) THEN
    1820             :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    1821             :                      !      "EG", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    1822             :                      !ENDIF
    1823         462 :                      step_size = -g1/appr_sec_der
    1824         462 :                      e0 = e1
    1825         462 :                      g0 = g1
    1826             :                   ELSE
    1827             :                      ! use e0, g0 and e1 to compute g1 and make a step
    1828             :                      ! if the next iteration is also line_search
    1829             :                      ! use e1 and the calculated g1 as e0 and g0
    1830           0 :                      e1 = energy_new
    1831           0 :                      appr_sec_der = 2.0*((e1 - e0)/step_size - g0)/step_size
    1832           0 :                      g1 = appr_sec_der*step_size + g0
    1833             :                      !IF (unit_nr > 0) THEN
    1834             :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    1835             :                      !      "EG", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    1836             :                      !ENDIF
    1837             :                      !appr_sec_der=(g1-g0)/step_size
    1838           0 :                      step_size = -g1/appr_sec_der
    1839           0 :                      e0 = e1
    1840           0 :                      g0 = g1
    1841             :                   END IF
    1842         462 :                   next_step_size_guess = next_step_size_guess + step_size
    1843             :                END IF
    1844             : 
    1845             :                ! update theta
    1846        1912 :                DO ispin = 1, nspins
    1847        1912 :                   CALL dbcsr_add(m_theta(ispin), step(ispin), 1.0_dp, step_size)
    1848             :                END DO ! ispin
    1849             : 
    1850             :             END IF ! not.prepare_to_exit
    1851             : 
    1852        1048 :             IF (line_search) THEN
    1853         482 :                iter_type = "LS"
    1854             :             ELSE
    1855         566 :                iter_type = "CG"
    1856             :             END IF
    1857             : 
    1858        1048 :             t2 = m_walltime()
    1859        1048 :             IF (unit_nr > 0) THEN
    1860         524 :                iter_type = TRIM("ALMO SCF "//iter_type)
    1861             :                WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') &
    1862         524 :                   iter_type, iteration, &
    1863         524 :                   energy_new, energy_diff, grad_norm, &
    1864        1048 :                   t2 - t1
    1865         524 :                IF (penalty_occ_local .OR. penalty_occ_vol) THEN
    1866             :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1867           0 :                      "Energy component:", (energy_new - penalty_func_new - localization_obj_function)
    1868             :                END IF
    1869         524 :                IF (penalty_occ_local) THEN
    1870             :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1871           0 :                      "Localization component:", localization_obj_function
    1872             :                END IF
    1873         524 :                IF (penalty_occ_vol) THEN
    1874             :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1875           0 :                      "Penalty component:", penalty_func_new
    1876             :                END IF
    1877             :             END IF
    1878             : 
    1879        1048 :             IF (my_special_case .EQ. xalmo_case_block_diag) THEN
    1880          46 :                IF (penalty_occ_vol) THEN
    1881           0 :                   almo_scf_env%almo_scf_energy = energy_new - penalty_func_new - localization_obj_function
    1882             :                ELSE
    1883          46 :                   almo_scf_env%almo_scf_energy = energy_new - localization_obj_function
    1884             :                END IF
    1885             :             END IF
    1886             : 
    1887        1048 :             t1 = m_walltime()
    1888             : 
    1889        1048 :             iteration = iteration + 1
    1890        1048 :             IF (prepare_to_exit) EXIT
    1891             : 
    1892             :          END DO ! inner SCF loop
    1893             : 
    1894          92 :          IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
    1895             :             outer_prepare_to_exit = .TRUE.
    1896             :          END IF
    1897             : 
    1898          86 :          outer_iteration = outer_iteration + 1
    1899           6 :          IF (outer_prepare_to_exit) EXIT
    1900             : 
    1901             :       END DO ! outer SCF loop
    1902             : 
    1903         172 :       DO ispin = 1, nspins
    1904          86 :          IF (converged .AND. almo_mathematica) THEN
    1905             :             IF (ispin .GT. 1) CPWARN("Mathematica files will be overwritten")
    1906             :             CALL print_mathematica_matrix(matrix_t_out(ispin), "matrixTf.dat")
    1907             :          END IF
    1908             :       END DO ! ispin
    1909             : 
    1910             :       ! post SCF-loop calculations
    1911          86 :       IF (converged) THEN
    1912             : 
    1913             :          CALL wrap_up_xalmo_scf( &
    1914             :             qs_env=qs_env, &
    1915             :             almo_scf_env=almo_scf_env, &
    1916             :             perturbation_in=perturbation_only, &
    1917             :             m_xalmo_in=matrix_t_out, &
    1918             :             m_quench_in=quench_t, &
    1919          86 :             energy_inout=energy_new)
    1920             : 
    1921             :       END IF ! if converged
    1922             : 
    1923         172 :       DO ispin = 1, nspins
    1924          86 :          CALL dbcsr_release(prec_vv(ispin))
    1925          86 :          CALL dbcsr_release(STsiginv_0(ispin))
    1926          86 :          CALL dbcsr_release(ST(ispin))
    1927          86 :          CALL dbcsr_release(FTsiginv(ispin))
    1928          86 :          CALL dbcsr_release(siginvTFTsiginv(ispin))
    1929          86 :          CALL dbcsr_release(prev_grad(ispin))
    1930          86 :          CALL dbcsr_release(prev_step(ispin))
    1931          86 :          CALL dbcsr_release(grad(ispin))
    1932          86 :          CALL dbcsr_release(step(ispin))
    1933          86 :          CALL dbcsr_release(prev_minus_prec_grad(ispin))
    1934          86 :          CALL dbcsr_release(m_theta(ispin))
    1935          86 :          CALL dbcsr_release(m_t_in_local(ispin))
    1936          86 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
    1937          86 :          CALL release_submatrices(domain_r_down(:, ispin))
    1938          86 :          CALL release_submatrices(bad_modes_projector_down(:, ispin))
    1939          86 :          CALL dbcsr_release(tempNOcc(ispin))
    1940          86 :          CALL dbcsr_release(tempNOcc_1(ispin))
    1941         172 :          CALL dbcsr_release(tempOccOcc(ispin))
    1942             :       END DO ! ispin
    1943             : 
    1944          86 :       DEALLOCATE (tempNOcc)
    1945          86 :       DEALLOCATE (tempNOcc_1)
    1946          86 :       DEALLOCATE (tempOccOcc)
    1947          86 :       DEALLOCATE (prec_vv)
    1948          86 :       DEALLOCATE (siginvTFTsiginv)
    1949          86 :       DEALLOCATE (STsiginv_0)
    1950          86 :       DEALLOCATE (FTsiginv)
    1951          86 :       DEALLOCATE (ST)
    1952          86 :       DEALLOCATE (prev_grad)
    1953          86 :       DEALLOCATE (grad)
    1954          86 :       DEALLOCATE (prev_step)
    1955          86 :       DEALLOCATE (step)
    1956          86 :       DEALLOCATE (prev_minus_prec_grad)
    1957          86 :       DEALLOCATE (m_sig_sqrti_ii)
    1958             : 
    1959         684 :       DEALLOCATE (domain_r_down)
    1960         684 :       DEALLOCATE (bad_modes_projector_down)
    1961             : 
    1962          86 :       DEALLOCATE (penalty_occ_vol_g_prefactor)
    1963          86 :       DEALLOCATE (penalty_occ_vol_h_prefactor)
    1964          86 :       DEALLOCATE (grad_norm_spin)
    1965          86 :       DEALLOCATE (nocc)
    1966             : 
    1967          86 :       DEALLOCATE (m_theta, m_t_in_local)
    1968          86 :       IF (penalty_occ_local) THEN
    1969           0 :          DO idim0 = 1, dim_op
    1970           0 :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    1971           0 :                DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    1972           0 :                DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    1973             :             END DO
    1974             :          END DO
    1975           0 :          DEALLOCATE (op_sm_set_qs)
    1976           0 :          DEALLOCATE (op_sm_set_almo)
    1977           0 :          DEALLOCATE (weights)
    1978             :       END IF
    1979             : 
    1980          86 :       IF (.NOT. converged .AND. .NOT. optimizer%early_stopping_on) THEN
    1981           0 :          CPABORT("Optimization not converged! ")
    1982             :       END IF
    1983             : 
    1984          86 :       CALL timestop(handle)
    1985             : 
    1986         172 :    END SUBROUTINE almo_scf_xalmo_pcg
    1987             : 
    1988             : ! **************************************************************************************************
    1989             : !> \brief Optimization of NLMOs using PCG minimizers
    1990             : !> \param qs_env ...
    1991             : !> \param optimizer   controls the optimization algorithm
    1992             : !> \param matrix_s - AO overlap (NAOs x NAOs)
    1993             : !> \param matrix_mo_in - initial MOs (NAOs x NMOs)
    1994             : !> \param matrix_mo_out - final MOs (NAOs x NMOs)
    1995             : !> \param template_matrix_sigma - template (NMOs x NMOs)
    1996             : !> \param overlap_determinant - the determinant of the MOs overlap
    1997             : !> \param mat_distr_aos - info on the distribution of AOs
    1998             : !> \param virtuals ...
    1999             : !> \param eps_filter ...
    2000             : !> \par History
    2001             : !>       2018.10 created [Rustam Z Khaliullin]
    2002             : !> \author Rustam Z Khaliullin
    2003             : ! **************************************************************************************************
    2004           8 :    SUBROUTINE almo_scf_construct_nlmos(qs_env, optimizer, &
    2005             :                                        matrix_s, matrix_mo_in, matrix_mo_out, &
    2006             :                                        template_matrix_sigma, overlap_determinant, &
    2007             :                                        mat_distr_aos, virtuals, eps_filter)
    2008             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2009             :       TYPE(optimizer_options_type), INTENT(INOUT)        :: optimizer
    2010             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_s
    2011             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
    2012             :          INTENT(INOUT)                                   :: matrix_mo_in, matrix_mo_out
    2013             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
    2014             :          INTENT(IN)                                      :: template_matrix_sigma
    2015             :       REAL(KIND=dp), INTENT(INOUT)                       :: overlap_determinant
    2016             :       INTEGER, INTENT(IN)                                :: mat_distr_aos
    2017             :       LOGICAL, INTENT(IN)                                :: virtuals
    2018             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    2019             : 
    2020             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_construct_nlmos'
    2021             : 
    2022             :       CHARACTER(LEN=30)                                  :: iter_type, print_string
    2023             :       INTEGER :: cg_iteration, dim_op, handle, iatom, idim0, isgf, ispin, iteration, &
    2024             :          line_search_iteration, linear_search_type, max_iter, natom, ncol, nspins, &
    2025             :          outer_iteration, outer_max_iter, para_group_handle, prec_type, reim, unit_nr
    2026          16 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_sgf, last_sgf, nocc, nsgf
    2027             :       LOGICAL                                            :: converged, d_bfgs, just_started, l_bfgs, &
    2028             :                                                             line_search, outer_prepare_to_exit, &
    2029             :                                                             prepare_to_exit, reset_conjugator
    2030             :       REAL(KIND=dp) :: appr_sec_der, beta, bfgs_rho, bfgs_sum, denom, denom2, e0, e1, g0, g0sign, &
    2031             :          g1, g1sign, grad_norm, line_search_error, localization_obj_function, &
    2032             :          localization_obj_function_ispin, next_step_size_guess, obj_function_ispin, objf_diff, &
    2033             :          objf_new, objf_old, penalty_amplitude, penalty_func_ispin, penalty_func_new, spin_factor, &
    2034             :          step_size, t1, t2, tempreal
    2035           8 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: diagonal, grad_norm_spin, &
    2036           8 :                                                             penalty_vol_prefactor, &
    2037           8 :                                                             suggested_vol_penalty, weights
    2038             :       TYPE(cell_type), POINTER                           :: cell
    2039             :       TYPE(cp_logger_type), POINTER                      :: logger
    2040           8 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: qs_matrix_s
    2041           8 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set_almo, op_sm_set_qs
    2042           8 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: approx_inv_hessian, bfgs_s, bfgs_y, grad, &
    2043           8 :          m_S0, m_sig_sqrti_ii, m_siginv, m_sigma, m_t_mo_local, m_theta, m_theta_normalized, &
    2044           8 :          prev_grad, prev_m_theta, prev_minus_prec_grad, prev_step, step, tempNOcc1, tempOccOcc1, &
    2045           8 :          tempOccOcc2, tempOccOcc3
    2046           8 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :, :)  :: m_B0
    2047          24 :       TYPE(lbfgs_history_type)                           :: nlmo_lbfgs_history
    2048             :       TYPE(mp_comm_type)                                 :: para_group
    2049           8 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    2050           8 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    2051             : 
    2052           8 :       CALL timeset(routineN, handle)
    2053             : 
    2054             :       ! get a useful output_unit
    2055           8 :       logger => cp_get_default_logger()
    2056           8 :       IF (logger%para_env%is_source()) THEN
    2057           4 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    2058             :       ELSE
    2059             :          unit_nr = -1
    2060             :       END IF
    2061             : 
    2062           8 :       nspins = SIZE(matrix_mo_in)
    2063             : 
    2064           8 :       IF (unit_nr > 0) THEN
    2065           4 :          WRITE (unit_nr, *)
    2066           4 :          IF (.NOT. virtuals) THEN
    2067           4 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 24), &
    2068           8 :                " Optimization of occupied NLMOs ", REPEAT("-", 23)
    2069             :          ELSE
    2070           0 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 24), &
    2071           0 :                " Optimization of virtual NLMOs ", REPEAT("-", 24)
    2072             :          END IF
    2073           4 :          WRITE (unit_nr, *)
    2074           4 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
    2075           8 :             "Objective Function", "Change", "Convergence", "Time"
    2076           4 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
    2077             :       END IF
    2078             : 
    2079           8 :       NULLIFY (particle_set)
    2080             : 
    2081             :       CALL get_qs_env(qs_env=qs_env, &
    2082             :                       matrix_s=qs_matrix_s, &
    2083             :                       cell=cell, &
    2084             :                       particle_set=particle_set, &
    2085           8 :                       qs_kind_set=qs_kind_set)
    2086             : 
    2087           8 :       natom = SIZE(particle_set, 1)
    2088          24 :       ALLOCATE (first_sgf(natom))
    2089          24 :       ALLOCATE (last_sgf(natom))
    2090          24 :       ALLOCATE (nsgf(natom))
    2091             :       !   construction of
    2092             :       CALL get_particle_set(particle_set, qs_kind_set, &
    2093           8 :                             first_sgf=first_sgf, last_sgf=last_sgf, nsgf=nsgf)
    2094             : 
    2095             :       ! m_theta contains a set of variational parameters
    2096             :       ! that define one-electron orbitals
    2097          32 :       ALLOCATE (m_theta(nspins))
    2098          16 :       DO ispin = 1, nspins
    2099             :          CALL dbcsr_create(m_theta(ispin), &
    2100             :                            template=template_matrix_sigma(ispin), &
    2101           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2102             :          ! create initial guess for the main variable - identity matrix
    2103           8 :          CALL dbcsr_set(m_theta(ispin), 0.0_dp)
    2104          16 :          CALL dbcsr_add_on_diag(m_theta(ispin), 1.0_dp)
    2105             :       END DO
    2106             : 
    2107           8 :       SELECT CASE (optimizer%opt_penalty%operator_type)
    2108             :       CASE (op_loc_berry)
    2109             : 
    2110           0 :          IF (cell%orthorhombic) THEN
    2111           0 :             dim_op = 3
    2112             :          ELSE
    2113           0 :             dim_op = 6
    2114             :          END IF
    2115           0 :          ALLOCATE (weights(6))
    2116           0 :          weights = 0.0_dp
    2117           0 :          CALL initialize_weights(cell, weights)
    2118           0 :          ALLOCATE (op_sm_set_qs(2, dim_op))
    2119           0 :          ALLOCATE (op_sm_set_almo(2, dim_op))
    2120             :          ! allocate space for T0^t.B.T0
    2121           0 :          ALLOCATE (m_B0(2, dim_op, nspins))
    2122           0 :          DO idim0 = 1, dim_op
    2123           0 :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    2124           0 :                NULLIFY (op_sm_set_qs(reim, idim0)%matrix, op_sm_set_almo(reim, idim0)%matrix)
    2125           0 :                ALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2126           0 :                ALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2127             :                CALL dbcsr_copy(op_sm_set_qs(reim, idim0)%matrix, qs_matrix_s(1)%matrix, &
    2128           0 :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    2129           0 :                CALL dbcsr_set(op_sm_set_qs(reim, idim0)%matrix, 0.0_dp)
    2130             :                CALL dbcsr_copy(op_sm_set_almo(reim, idim0)%matrix, matrix_s, &
    2131           0 :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    2132           0 :                CALL dbcsr_set(op_sm_set_almo(reim, idim0)%matrix, 0.0_dp)
    2133           0 :                DO ispin = 1, nspins
    2134             :                   CALL dbcsr_create(m_B0(reim, idim0, ispin), &
    2135             :                                     template=m_theta(ispin), &
    2136           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    2137           0 :                   CALL dbcsr_set(m_B0(reim, idim0, ispin), 0.0_dp)
    2138             :                END DO
    2139             :             END DO
    2140             :          END DO
    2141             : 
    2142           0 :          CALL compute_berry_operator(qs_env, cell, op_sm_set_qs, dim_op)
    2143             : 
    2144             :       CASE (op_loc_pipek)
    2145             : 
    2146           8 :          dim_op = natom
    2147          24 :          ALLOCATE (weights(dim_op))
    2148          80 :          weights = 1.0_dp
    2149             : 
    2150         184 :          ALLOCATE (m_B0(1, dim_op, nspins))
    2151             :          !m_B0 first dim is 1 now!
    2152          88 :          DO idim0 = 1, dim_op
    2153         152 :             DO reim = 1, 1 !SIZE(op_sm_set_qs, 1)
    2154         216 :                DO ispin = 1, nspins
    2155             :                   CALL dbcsr_create(m_B0(reim, idim0, ispin), &
    2156             :                                     template=m_theta(ispin), &
    2157          72 :                                     matrix_type=dbcsr_type_no_symmetry)
    2158         144 :                   CALL dbcsr_set(m_B0(reim, idim0, ispin), 0.0_dp)
    2159             :                END DO
    2160             :             END DO
    2161             :          END DO
    2162             : 
    2163             :       END SELECT
    2164             : 
    2165             :       ! penalty amplitude adjusts the strenght of volume conservation
    2166           8 :       penalty_amplitude = optimizer%opt_penalty%penalty_strength
    2167             :       !penalty_occ_vol = ( optimizer%opt_penalty%occ_vol_method .NE. penalty_type_none )
    2168             :       !penalty_local = ( optimizer%opt_penalty%occ_loc_method .NE. penalty_type_none )
    2169             : 
    2170             :       ! preconditioner control
    2171           8 :       prec_type = optimizer%preconditioner
    2172             : 
    2173             :       ! use diagonal BFGS if preconditioner is set
    2174           8 :       d_bfgs = .FALSE.
    2175           8 :       l_bfgs = .FALSE.
    2176           8 :       IF (prec_type .NE. xalmo_prec_zero) l_bfgs = .TRUE.
    2177           8 :       IF (l_bfgs .AND. (optimizer%conjugator .NE. cg_zero)) THEN
    2178           0 :          CPABORT("Cannot use conjugators with BFGS")
    2179             :       END IF
    2180           8 :       IF (l_bfgs) THEN
    2181           8 :          CALL lbfgs_create(nlmo_lbfgs_history, nspins, nstore=10)
    2182             :       END IF
    2183             : 
    2184             :       IF (nspins == 1) THEN
    2185             :          spin_factor = 2.0_dp
    2186             :       ELSE
    2187             :          spin_factor = 1.0_dp
    2188             :       END IF
    2189             : 
    2190          24 :       ALLOCATE (grad_norm_spin(nspins))
    2191          24 :       ALLOCATE (nocc(nspins))
    2192          24 :       ALLOCATE (penalty_vol_prefactor(nspins))
    2193          24 :       ALLOCATE (suggested_vol_penalty(nspins))
    2194             : 
    2195             :       ! create a local copy of matrix_mo_in because
    2196             :       ! matrix_mo_in and matrix_mo_out can be the same matrix
    2197             :       ! we need to make sure data in matrix_mo_in is intact
    2198             :       ! after we start writing to matrix_mo_out
    2199          32 :       ALLOCATE (m_t_mo_local(nspins))
    2200          16 :       DO ispin = 1, nspins
    2201             :          CALL dbcsr_create(m_t_mo_local(ispin), &
    2202             :                            template=matrix_mo_in(ispin), &
    2203           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2204          16 :          CALL dbcsr_copy(m_t_mo_local(ispin), matrix_mo_in(ispin))
    2205             :       END DO
    2206             : 
    2207          32 :       ALLOCATE (approx_inv_hessian(nspins))
    2208          32 :       ALLOCATE (m_theta_normalized(nspins))
    2209          32 :       ALLOCATE (prev_m_theta(nspins))
    2210          32 :       ALLOCATE (m_S0(nspins))
    2211          32 :       ALLOCATE (prev_grad(nspins))
    2212          32 :       ALLOCATE (grad(nspins))
    2213          32 :       ALLOCATE (prev_step(nspins))
    2214          32 :       ALLOCATE (step(nspins))
    2215          32 :       ALLOCATE (prev_minus_prec_grad(nspins))
    2216          32 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    2217          32 :       ALLOCATE (m_sigma(nspins))
    2218          32 :       ALLOCATE (m_siginv(nspins))
    2219          32 :       ALLOCATE (tempNOcc1(nspins))
    2220          32 :       ALLOCATE (tempOccOcc1(nspins))
    2221          32 :       ALLOCATE (tempOccOcc2(nspins))
    2222          32 :       ALLOCATE (tempOccOcc3(nspins))
    2223          32 :       ALLOCATE (bfgs_y(nspins))
    2224          32 :       ALLOCATE (bfgs_s(nspins))
    2225             : 
    2226          16 :       DO ispin = 1, nspins
    2227             : 
    2228             :          ! init temporary storage
    2229             :          CALL dbcsr_create(tempNOcc1(ispin), &
    2230             :                            template=matrix_mo_out(ispin), &
    2231           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2232             :          CALL dbcsr_create(approx_inv_hessian(ispin), &
    2233             :                            template=m_theta(ispin), &
    2234           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2235             :          CALL dbcsr_create(m_theta_normalized(ispin), &
    2236             :                            template=m_theta(ispin), &
    2237           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2238             :          CALL dbcsr_create(prev_m_theta(ispin), &
    2239             :                            template=m_theta(ispin), &
    2240           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2241             :          CALL dbcsr_create(m_S0(ispin), &
    2242             :                            template=m_theta(ispin), &
    2243           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2244             :          CALL dbcsr_create(prev_grad(ispin), &
    2245             :                            template=m_theta(ispin), &
    2246           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2247             :          CALL dbcsr_create(grad(ispin), &
    2248             :                            template=m_theta(ispin), &
    2249           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2250             :          CALL dbcsr_create(prev_step(ispin), &
    2251             :                            template=m_theta(ispin), &
    2252           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2253             :          CALL dbcsr_create(step(ispin), &
    2254             :                            template=m_theta(ispin), &
    2255           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2256             :          CALL dbcsr_create(prev_minus_prec_grad(ispin), &
    2257             :                            template=m_theta(ispin), &
    2258           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2259             :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    2260             :                            template=m_theta(ispin), &
    2261           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2262             :          CALL dbcsr_create(m_sigma(ispin), &
    2263             :                            template=m_theta(ispin), &
    2264           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2265             :          CALL dbcsr_create(m_siginv(ispin), &
    2266             :                            template=m_theta(ispin), &
    2267           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2268             :          CALL dbcsr_create(tempOccOcc1(ispin), &
    2269             :                            template=m_theta(ispin), &
    2270           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2271             :          CALL dbcsr_create(tempOccOcc2(ispin), &
    2272             :                            template=m_theta(ispin), &
    2273           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2274             :          CALL dbcsr_create(tempOccOcc3(ispin), &
    2275             :                            template=m_theta(ispin), &
    2276           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2277             :          CALL dbcsr_create(bfgs_s(ispin), &
    2278             :                            template=m_theta(ispin), &
    2279           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2280             :          CALL dbcsr_create(bfgs_y(ispin), &
    2281             :                            template=m_theta(ispin), &
    2282           8 :                            matrix_type=dbcsr_type_no_symmetry)
    2283             : 
    2284           8 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    2285           8 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    2286             : 
    2287             :          CALL dbcsr_get_info(template_matrix_sigma(ispin), &
    2288           8 :                              nfullrows_total=nocc(ispin))
    2289             : 
    2290           8 :          penalty_vol_prefactor(ispin) = -penalty_amplitude !KEEP: * spin_factor * nocc(ispin)
    2291             : 
    2292             :          ! compute m_S0=T0^t.S.T0
    2293             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2294             :                              matrix_s, &
    2295             :                              m_t_mo_local(ispin), &
    2296             :                              0.0_dp, tempNOcc1(ispin), &
    2297           8 :                              filter_eps=eps_filter)
    2298             :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2299             :                              m_t_mo_local(ispin), &
    2300             :                              tempNOcc1(ispin), &
    2301             :                              0.0_dp, m_S0(ispin), &
    2302           8 :                              filter_eps=eps_filter)
    2303             : 
    2304           8 :          SELECT CASE (optimizer%opt_penalty%operator_type)
    2305             : 
    2306             :          CASE (op_loc_berry)
    2307             : 
    2308             :             ! compute m_B0=T0^t.B.T0
    2309           0 :             DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    2310             : 
    2311           0 :                DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    2312             : 
    2313             :                   CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, op_sm_set_almo(reim, idim0)%matrix, &
    2314           0 :                                          mat_distr_aos, .FALSE.)
    2315             : 
    2316             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2317             :                                       op_sm_set_almo(reim, idim0)%matrix, &
    2318             :                                       m_t_mo_local(ispin), &
    2319             :                                       0.0_dp, tempNOcc1(ispin), &
    2320           0 :                                       filter_eps=eps_filter)
    2321             : 
    2322             :                   CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2323             :                                       m_t_mo_local(ispin), &
    2324             :                                       tempNOcc1(ispin), &
    2325             :                                       0.0_dp, m_B0(reim, idim0, ispin), &
    2326           0 :                                       filter_eps=eps_filter)
    2327             : 
    2328           0 :                   DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2329           0 :                   DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2330             : 
    2331             :                END DO
    2332             : 
    2333             :             END DO ! end loop over idim0
    2334             : 
    2335             :          CASE (op_loc_pipek)
    2336             : 
    2337             :             ! compute m_B0=T0^t.B.T0
    2338          80 :             DO iatom = 1, natom ! this loop is over "miller" ind
    2339             : 
    2340          72 :                isgf = first_sgf(iatom)
    2341          72 :                ncol = nsgf(iatom)
    2342             : 
    2343             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2344             :                                    matrix_s, &
    2345             :                                    m_t_mo_local(ispin), &
    2346             :                                    0.0_dp, tempNOcc1(ispin), &
    2347          72 :                                    filter_eps=eps_filter)
    2348             : 
    2349             :                CALL dbcsr_multiply("T", "N", 0.5_dp, &
    2350             :                                    m_t_mo_local(ispin), &
    2351             :                                    tempNOcc1(ispin), &
    2352             :                                    0.0_dp, m_B0(1, iatom, ispin), &
    2353             :                                    first_k=isgf, last_k=isgf + ncol - 1, &
    2354          72 :                                    filter_eps=eps_filter)
    2355             : 
    2356             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2357             :                                    matrix_s, &
    2358             :                                    m_t_mo_local(ispin), &
    2359             :                                    0.0_dp, tempNOcc1(ispin), &
    2360             :                                    first_k=isgf, last_k=isgf + ncol - 1, &
    2361          72 :                                    filter_eps=eps_filter)
    2362             : 
    2363             :                CALL dbcsr_multiply("T", "N", 0.5_dp, &
    2364             :                                    m_t_mo_local(ispin), &
    2365             :                                    tempNOcc1(ispin), &
    2366             :                                    1.0_dp, m_B0(1, iatom, ispin), &
    2367          80 :                                    filter_eps=eps_filter)
    2368             : 
    2369             :             END DO ! end loop over iatom
    2370             : 
    2371             :          END SELECT
    2372             : 
    2373             :       END DO ! ispin
    2374             : 
    2375           8 :       IF (optimizer%opt_penalty%operator_type .EQ. op_loc_berry) THEN
    2376           0 :          DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    2377           0 :             DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    2378           0 :                DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2379           0 :                DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2380             :             END DO
    2381             :          END DO
    2382           0 :          DEALLOCATE (op_sm_set_qs, op_sm_set_almo)
    2383             :       END IF
    2384             : 
    2385             :       ! start the outer SCF loop
    2386           8 :       outer_max_iter = optimizer%max_iter_outer_loop
    2387           8 :       outer_prepare_to_exit = .FALSE.
    2388           8 :       outer_iteration = 0
    2389           8 :       grad_norm = 0.0_dp
    2390           8 :       penalty_func_new = 0.0_dp
    2391           8 :       linear_search_type = 1 ! safe restart, no quadratic assumption, takes more steps
    2392             :       localization_obj_function = 0.0_dp
    2393             :       penalty_func_new = 0.0_dp
    2394             : 
    2395             :       DO
    2396             : 
    2397             :          ! start the inner SCF loop
    2398           8 :          max_iter = optimizer%max_iter
    2399           8 :          prepare_to_exit = .FALSE.
    2400           8 :          line_search = .FALSE.
    2401           8 :          converged = .FALSE.
    2402           8 :          iteration = 0
    2403           8 :          cg_iteration = 0
    2404           8 :          line_search_iteration = 0
    2405           8 :          obj_function_ispin = 0.0_dp
    2406           8 :          objf_new = 0.0_dp
    2407           8 :          objf_old = 0.0_dp
    2408           8 :          objf_diff = 0.0_dp
    2409           8 :          line_search_error = 0.0_dp
    2410           8 :          t1 = m_walltime()
    2411           8 :          next_step_size_guess = 0.0_dp
    2412             : 
    2413             :          DO
    2414             : 
    2415          82 :             just_started = (iteration .EQ. 0) .AND. (outer_iteration .EQ. 0)
    2416             : 
    2417         164 :             DO ispin = 1, nspins
    2418             : 
    2419          82 :                CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), group=para_group_handle)
    2420          82 :                CALL para_group%set_handle(para_group_handle)
    2421             : 
    2422             :                ! compute diagonal (a^t.sigma0.a)^(-1/2)
    2423             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2424             :                                    m_S0(ispin), m_theta(ispin), 0.0_dp, &
    2425             :                                    tempOccOcc1(ispin), &
    2426          82 :                                    filter_eps=eps_filter)
    2427          82 :                CALL dbcsr_set(m_sig_sqrti_ii(ispin), 0.0_dp)
    2428          82 :                CALL dbcsr_add_on_diag(m_sig_sqrti_ii(ispin), 1.0_dp)
    2429             :                CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2430             :                                    m_theta(ispin), tempOccOcc1(ispin), 0.0_dp, &
    2431             :                                    m_sig_sqrti_ii(ispin), &
    2432          82 :                                    retain_sparsity=.TRUE.)
    2433         246 :                ALLOCATE (diagonal(nocc(ispin)))
    2434          82 :                CALL dbcsr_get_diag(m_sig_sqrti_ii(ispin), diagonal)
    2435          82 :                CALL para_group%sum(diagonal)
    2436             :                ! TODO: works for zero diagonal elements?
    2437        1368 :                diagonal(:) = 1.0_dp/SQRT(diagonal(:))
    2438          82 :                CALL dbcsr_set(m_sig_sqrti_ii(ispin), 0.0_dp)
    2439          82 :                CALL dbcsr_set_diag(m_sig_sqrti_ii(ispin), diagonal)
    2440          82 :                DEALLOCATE (diagonal)
    2441             : 
    2442             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2443             :                                    m_theta(ispin), &
    2444             :                                    m_sig_sqrti_ii(ispin), &
    2445             :                                    0.0_dp, m_theta_normalized(ispin), &
    2446          82 :                                    filter_eps=eps_filter)
    2447             : 
    2448             :                ! compute new orbitals
    2449             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2450             :                                    m_t_mo_local(ispin), &
    2451             :                                    m_theta_normalized(ispin), &
    2452             :                                    0.0_dp, matrix_mo_out(ispin), &
    2453         246 :                                    filter_eps=eps_filter)
    2454             : 
    2455             :             END DO
    2456             : 
    2457             :             ! compute objective function
    2458          82 :             localization_obj_function = 0.0_dp
    2459          82 :             penalty_func_new = 0.0_dp
    2460         164 :             DO ispin = 1, nspins
    2461             : 
    2462             :                CALL compute_obj_nlmos( &
    2463             :                   !obj_function_ispin=obj_function_ispin, &
    2464             :                   localization_obj_function_ispin=localization_obj_function_ispin, &
    2465             :                   penalty_func_ispin=penalty_func_ispin, &
    2466             :                   overlap_determinant=overlap_determinant, &
    2467             :                   m_sigma=m_sigma(ispin), &
    2468             :                   nocc=nocc(ispin), &
    2469             :                   m_B0=m_B0(:, :, ispin), &
    2470             :                   m_theta_normalized=m_theta_normalized(ispin), &
    2471             :                   template_matrix_mo=matrix_mo_out(ispin), &
    2472             :                   weights=weights, &
    2473             :                   m_S0=m_S0(ispin), &
    2474             :                   just_started=just_started, &
    2475             :                   penalty_vol_prefactor=penalty_vol_prefactor(ispin), &
    2476             :                   penalty_amplitude=penalty_amplitude, &
    2477          82 :                   eps_filter=eps_filter)
    2478             : 
    2479          82 :                localization_obj_function = localization_obj_function + localization_obj_function_ispin
    2480         164 :                penalty_func_new = penalty_func_new + penalty_func_ispin
    2481             : 
    2482             :             END DO ! ispin
    2483          82 :             objf_new = penalty_func_new + localization_obj_function
    2484             : 
    2485         164 :             DO ispin = 1, nspins
    2486             :                ! save the previous gradient to compute beta
    2487             :                ! do it only if the previous grad was computed
    2488             :                ! for .NOT.line_search
    2489         164 :                IF (line_search_iteration .EQ. 0 .AND. iteration .NE. 0) THEN
    2490          30 :                   CALL dbcsr_copy(prev_grad(ispin), grad(ispin))
    2491             :                END IF
    2492             : 
    2493             :             END DO ! ispin
    2494             : 
    2495             :             ! compute the gradient
    2496         164 :             DO ispin = 1, nspins
    2497             : 
    2498             :                CALL invert_Hotelling( &
    2499             :                   matrix_inverse=m_siginv(ispin), &
    2500             :                   matrix=m_sigma(ispin), &
    2501             :                   threshold=eps_filter*10.0_dp, &
    2502             :                   filter_eps=eps_filter, &
    2503          82 :                   silent=.FALSE.)
    2504             : 
    2505             :                CALL compute_gradient_nlmos( &
    2506             :                   m_grad_out=grad(ispin), &
    2507             :                   m_B0=m_B0(:, :, ispin), &
    2508             :                   weights=weights, &
    2509             :                   m_S0=m_S0(ispin), &
    2510             :                   m_theta_normalized=m_theta_normalized(ispin), &
    2511             :                   m_siginv=m_siginv(ispin), &
    2512             :                   m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    2513             :                   penalty_vol_prefactor=penalty_vol_prefactor(ispin), &
    2514             :                   eps_filter=eps_filter, &
    2515         164 :                   suggested_vol_penalty=suggested_vol_penalty(ispin))
    2516             : 
    2517             :             END DO ! ispin
    2518             : 
    2519             :             ! check convergence and other exit criteria
    2520         164 :             DO ispin = 1, nspins
    2521             :                CALL dbcsr_norm(grad(ispin), dbcsr_norm_maxabsnorm, &
    2522         164 :                                norm_scalar=grad_norm_spin(ispin))
    2523             :             END DO ! ispin
    2524         246 :             grad_norm = MAXVAL(grad_norm_spin)
    2525             : 
    2526          82 :             converged = (grad_norm .LE. optimizer%eps_error)
    2527          82 :             IF (converged .OR. (iteration .GE. max_iter)) THEN
    2528             :                prepare_to_exit = .TRUE.
    2529             :             END IF
    2530             : 
    2531             :             ! it is not time to exit just yet
    2532          74 :             IF (.NOT. prepare_to_exit) THEN
    2533             : 
    2534             :                ! check the gradient along the step direction
    2535             :                ! and decide whether to switch to the line-search mode
    2536             :                ! do not do this in the first iteration
    2537          74 :                IF (iteration .NE. 0) THEN
    2538             : 
    2539             :                   ! enforce at least one line search
    2540             :                   ! without even checking the error
    2541          68 :                   IF (.NOT. line_search) THEN
    2542             : 
    2543          30 :                      line_search = .TRUE.
    2544          30 :                      line_search_iteration = line_search_iteration + 1
    2545             : 
    2546             :                   ELSE
    2547             : 
    2548             :                      ! check the line-search error and decide whether to
    2549             :                      ! change the direction
    2550             :                      line_search_error = 0.0_dp
    2551             :                      denom = 0.0_dp
    2552             :                      denom2 = 0.0_dp
    2553             : 
    2554          76 :                      DO ispin = 1, nspins
    2555             : 
    2556          38 :                         CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2557          38 :                         line_search_error = line_search_error + tempreal
    2558          38 :                         CALL dbcsr_dot(grad(ispin), grad(ispin), tempreal)
    2559          38 :                         denom = denom + tempreal
    2560          38 :                         CALL dbcsr_dot(step(ispin), step(ispin), tempreal)
    2561          76 :                         denom2 = denom2 + tempreal
    2562             : 
    2563             :                      END DO ! ispin
    2564             : 
    2565             :                      ! cosine of the angle between the step and grad
    2566             :                      ! (must be close to zero at convergence)
    2567          38 :                      line_search_error = line_search_error/SQRT(denom)/SQRT(denom2)
    2568             : 
    2569          38 :                      IF (ABS(line_search_error) .GT. optimizer%lin_search_eps_error) THEN
    2570          14 :                         line_search = .TRUE.
    2571          14 :                         line_search_iteration = line_search_iteration + 1
    2572             :                      ELSE
    2573             :                         line_search = .FALSE.
    2574             :                         line_search_iteration = 0
    2575             :                      END IF
    2576             : 
    2577             :                   END IF
    2578             : 
    2579             :                END IF ! iteration.ne.0
    2580             : 
    2581           6 :                IF (line_search) THEN
    2582          44 :                   objf_diff = 0.0_dp
    2583             :                ELSE
    2584          30 :                   objf_diff = objf_new - objf_old
    2585          30 :                   objf_old = objf_new
    2586             :                END IF
    2587             : 
    2588             :                ! update the step direction
    2589          74 :                IF (.NOT. line_search) THEN
    2590             : 
    2591          60 :                   cg_iteration = cg_iteration + 1
    2592             : 
    2593             :                   ! save the previous step
    2594          60 :                   DO ispin = 1, nspins
    2595          60 :                      CALL dbcsr_copy(prev_step(ispin), step(ispin))
    2596             :                   END DO ! ispin
    2597             : 
    2598             :                   ! compute the new step:
    2599             :                   ! if available use second derivative info - bfgs, hessian, preconditioner
    2600          30 :                   IF (prec_type .EQ. xalmo_prec_zero) THEN ! no second derivatives
    2601             : 
    2602             :                      ! no preconditioner
    2603           0 :                      DO ispin = 1, nspins
    2604             : 
    2605           0 :                         CALL dbcsr_copy(step(ispin), grad(ispin))
    2606           0 :                         CALL dbcsr_scale(step(ispin), -1.0_dp)
    2607             : 
    2608             :                      END DO ! ispin
    2609             : 
    2610             :                   ELSE ! use second derivatives
    2611             : 
    2612             :                      ! compute and invert hessian/precond?
    2613          30 :                      IF (iteration .EQ. 0) THEN
    2614             : 
    2615             :                         IF (d_bfgs) THEN
    2616             : 
    2617             :                            ! create matrix filled with 1.0 here
    2618             :                            CALL fill_matrix_with_ones(approx_inv_hessian(1))
    2619             :                            IF (nspins .GT. 1) THEN
    2620             :                               DO ispin = 2, nspins
    2621             :                                  CALL dbcsr_copy(approx_inv_hessian(ispin), approx_inv_hessian(1))
    2622             :                               END DO
    2623             :                            END IF
    2624             : 
    2625           6 :                         ELSE IF (l_bfgs) THEN
    2626             : 
    2627           6 :                            CALL lbfgs_seed(nlmo_lbfgs_history, m_theta, grad)
    2628          12 :                            DO ispin = 1, nspins
    2629           6 :                               CALL dbcsr_copy(step(ispin), grad(ispin))
    2630          12 :                               CALL dbcsr_scale(step(ispin), -1.0_dp)
    2631             :                            END DO ! ispin
    2632             : 
    2633             :                         ELSE
    2634             : 
    2635             :                            ! computing preconditioner
    2636           0 :                            DO ispin = 1, nspins
    2637             : 
    2638             :                               ! TODO: write preconditioner code later
    2639             :                               ! For now, create matrix filled with 1.0 here
    2640           0 :                               CALL fill_matrix_with_ones(approx_inv_hessian(ispin))
    2641             :                               !CALL compute_preconditioner(&
    2642             :                               !       m_prec_out=approx_hessian(ispin),&
    2643             :                               !       m_ks=almo_scf_env%matrix_ks(ispin),&
    2644             :                               !       m_s=matrix_s,&
    2645             :                               !       m_siginv=almo_scf_env%template_matrix_sigma(ispin),&
    2646             :                               !       m_quench_t=quench_t(ispin),&
    2647             :                               !       m_FTsiginv=FTsiginv(ispin),&
    2648             :                               !       m_siginvTFTsiginv=siginvTFTsiginv(ispin),&
    2649             :                               !       m_ST=ST(ispin),&
    2650             :                               !       para_env=almo_scf_env%para_env,&
    2651             :                               !       blacs_env=almo_scf_env%blacs_env,&
    2652             :                               !       nocc_of_domain=almo_scf_env%nocc_of_domain(:,ispin),&
    2653             :                               !       domain_s_inv=almo_scf_env%domain_s_inv(:,ispin),&
    2654             :                               !       domain_r_down=domain_r_down(:,ispin),&
    2655             :                               !       cpu_of_domain=almo_scf_env%cpu_of_domain,&
    2656             :                               !       domain_map=almo_scf_env%domain_map(ispin),&
    2657             :                               !       assume_t0_q0x=assume_t0_q0x,&
    2658             :                               !       penalty_occ_vol=penalty_occ_vol,&
    2659             :                               !       penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin),&
    2660             :                               !       eps_filter=eps_filter,&
    2661             :                               !       neg_thr=0.5_dp,&
    2662             :                               !       spin_factor=spin_factor,&
    2663             :                               !       special_case=my_special_case)
    2664             :                               !CALL invert hessian
    2665             :                            END DO ! ispin
    2666             : 
    2667             :                         END IF
    2668             : 
    2669             :                      ELSE ! not iteration zero
    2670             : 
    2671             :                         ! update approx inverse hessian
    2672             :                         IF (d_bfgs) THEN ! diagonal BFGS
    2673             : 
    2674             :                            DO ispin = 1, nspins
    2675             : 
    2676             :                               ! compute s and y
    2677             :                               CALL dbcsr_copy(bfgs_y(ispin), grad(ispin))
    2678             :                               CALL dbcsr_add(bfgs_y(ispin), prev_grad(ispin), 1.0_dp, -1.0_dp)
    2679             :                               CALL dbcsr_copy(bfgs_s(ispin), m_theta(ispin))
    2680             :                               CALL dbcsr_add(bfgs_s(ispin), prev_m_theta(ispin), 1.0_dp, -1.0_dp)
    2681             : 
    2682             :                               ! compute rho
    2683             :                               CALL dbcsr_dot(grad(ispin), step(ispin), bfgs_rho)
    2684             :                               bfgs_rho = 1.0_dp/bfgs_rho
    2685             : 
    2686             :                               ! compute the sum of the squared elements of bfgs_y
    2687             :                               CALL dbcsr_dot(bfgs_y(ispin), bfgs_y(ispin), bfgs_sum)
    2688             : 
    2689             :                               ! first term: start collecting new inv hessian in this temp matrix
    2690             :                               CALL dbcsr_copy(tempOccOcc2(ispin), approx_inv_hessian(ispin))
    2691             : 
    2692             :                               ! second term: + rho * s * s
    2693             :                               CALL dbcsr_hadamard_product(bfgs_s(ispin), bfgs_s(ispin), tempOccOcc1(ispin))
    2694             :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc1(ispin), 1.0_dp, bfgs_rho)
    2695             : 
    2696             :                               ! third term: + rho^2 * s * s * H * sum_(y * y)
    2697             :                               CALL dbcsr_hadamard_product(tempOccOcc1(ispin), &
    2698             :                                                           approx_inv_hessian(ispin), tempOccOcc3(ispin))
    2699             :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc3(ispin), &
    2700             :                                              1.0_dp, bfgs_rho*bfgs_rho*bfgs_sum)
    2701             : 
    2702             :                               ! fourth term: - 2 * rho * s * y * H
    2703             :                               CALL dbcsr_hadamard_product(bfgs_y(ispin), &
    2704             :                                                           approx_inv_hessian(ispin), tempOccOcc1(ispin))
    2705             :                               CALL dbcsr_hadamard_product(bfgs_s(ispin), tempOccOcc1(ispin), tempOccOcc3(ispin))
    2706             :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc3(ispin), &
    2707             :                                              1.0_dp, -2.0_dp*bfgs_rho)
    2708             : 
    2709             :                               CALL dbcsr_copy(approx_inv_hessian(ispin), tempOccOcc2(ispin))
    2710             : 
    2711             :                            END DO
    2712             : 
    2713          24 :                         ELSE IF (l_bfgs) THEN
    2714             : 
    2715          24 :                            CALL lbfgs_get_direction(nlmo_lbfgs_history, m_theta, grad, step)
    2716             : 
    2717             :                         END IF ! which method?
    2718             : 
    2719             :                      END IF ! compute approximate inverse hessian
    2720             : 
    2721          30 :                      IF (.NOT. l_bfgs) THEN
    2722             : 
    2723           0 :                         DO ispin = 1, nspins
    2724             : 
    2725             :                            CALL dbcsr_hadamard_product(approx_inv_hessian(ispin), &
    2726           0 :                                                        grad(ispin), step(ispin))
    2727           0 :                            CALL dbcsr_scale(step(ispin), -1.0_dp)
    2728             : 
    2729             :                         END DO ! ispin
    2730             : 
    2731             :                      END IF
    2732             : 
    2733             :                   END IF ! second derivative type fork
    2734             : 
    2735             :                   ! check whether we need to reset conjugate directions
    2736          30 :                   IF (iteration .EQ. 0) THEN
    2737           6 :                      reset_conjugator = .TRUE.
    2738             :                   END IF
    2739             : 
    2740             :                   ! compute the conjugation coefficient - beta
    2741          30 :                   IF (.NOT. reset_conjugator) THEN
    2742             :                      CALL compute_cg_beta( &
    2743             :                         beta=beta, &
    2744             :                         reset_conjugator=reset_conjugator, &
    2745             :                         conjugator=optimizer%conjugator, &
    2746             :                         grad=grad(:), &
    2747             :                         prev_grad=prev_grad(:), &
    2748             :                         step=step(:), &
    2749             :                         prev_step=prev_step(:), &
    2750             :                         prev_minus_prec_grad=prev_minus_prec_grad(:) &
    2751          24 :                         )
    2752             : 
    2753             :                   END IF
    2754             : 
    2755          30 :                   IF (reset_conjugator) THEN
    2756             : 
    2757           6 :                      beta = 0.0_dp
    2758           6 :                      IF (unit_nr > 0 .AND. (.NOT. just_started)) THEN
    2759           0 :                         WRITE (unit_nr, '(T2,A35)') "Re-setting conjugator to zero"
    2760             :                      END IF
    2761           6 :                      reset_conjugator = .FALSE.
    2762             : 
    2763             :                   END IF
    2764             : 
    2765             :                   ! save the preconditioned gradient (useful for beta)
    2766          60 :                   DO ispin = 1, nspins
    2767             : 
    2768          30 :                      CALL dbcsr_copy(prev_minus_prec_grad(ispin), step(ispin))
    2769             : 
    2770             :                      ! conjugate the step direction
    2771          60 :                      CALL dbcsr_add(step(ispin), prev_step(ispin), 1.0_dp, beta)
    2772             : 
    2773             :                   END DO ! ispin
    2774             : 
    2775             :                END IF ! update the step direction
    2776             : 
    2777             :                ! estimate the step size
    2778          74 :                IF (.NOT. line_search) THEN
    2779             :                   ! we just changed the direction and
    2780             :                   ! we have only E and grad from the current step
    2781             :                   ! it is not enough to compute step_size - just guess it
    2782          30 :                   e0 = objf_new
    2783          30 :                   g0 = 0.0_dp
    2784          60 :                   DO ispin = 1, nspins
    2785          30 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2786          60 :                      g0 = g0 + tempreal
    2787             :                   END DO ! ispin
    2788             :                   g0sign = SIGN(1.0_dp, g0) ! sign of g0
    2789             :                   IF (linear_search_type .EQ. 1) THEN ! this is quadratic LS
    2790          30 :                      IF (iteration .EQ. 0) THEN
    2791           6 :                         step_size = optimizer%lin_search_step_size_guess
    2792             :                      ELSE
    2793          24 :                         IF (next_step_size_guess .LE. 0.0_dp) THEN
    2794           0 :                            step_size = optimizer%lin_search_step_size_guess
    2795             :                         ELSE
    2796             :                            ! take the last value
    2797          24 :                            step_size = optimizer%lin_search_step_size_guess
    2798             :                            !step_size = next_step_size_guess*1.05_dp
    2799             :                         END IF
    2800             :                      END IF
    2801             :                   ELSE IF (linear_search_type .EQ. 2) THEN ! this is cautious LS
    2802             :                      ! this LS type is designed not to trust quadratic appr
    2803             :                      ! so it always restarts from a safe step size
    2804             :                      step_size = optimizer%lin_search_step_size_guess
    2805             :                   END IF
    2806          30 :                   IF (unit_nr > 0) THEN
    2807          15 :                      WRITE (unit_nr, '(T21,3A19)') "Line position", "Line grad", "Next line step"
    2808          15 :                      WRITE (unit_nr, '(T2,A19,3F19.5)') "Line search", 0.0_dp, g0, step_size
    2809             :                   END IF
    2810          30 :                   next_step_size_guess = step_size
    2811             :                ELSE ! this is not the first line search
    2812          44 :                   e1 = objf_new
    2813          44 :                   g1 = 0.0_dp
    2814          88 :                   DO ispin = 1, nspins
    2815          44 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2816          88 :                      g1 = g1 + tempreal
    2817             :                   END DO ! ispin
    2818          44 :                   g1sign = SIGN(1.0_dp, g1) ! sign of g1
    2819             :                   IF (linear_search_type .EQ. 1) THEN
    2820             :                      ! we have accumulated some points along this direction
    2821             :                      ! use only the most recent g0 (quadratic approximation)
    2822          44 :                      appr_sec_der = (g1 - g0)/step_size
    2823             :                      !IF (unit_nr > 0) THEN
    2824             :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    2825             :                      !      "DT", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    2826             :                      !ENDIF
    2827          44 :                      step_size = -g1/appr_sec_der
    2828             :                   ELSE IF (linear_search_type .EQ. 2) THEN
    2829             :                      ! alternative method for finding step size
    2830             :                      ! do not use quadratic approximation, only gradient signs
    2831             :                      IF (g1sign .NE. g0sign) THEN
    2832             :                         step_size = -step_size/2.0; 
    2833             :                      ELSE
    2834             :                         step_size = step_size*1.5; 
    2835             :                      END IF
    2836             :                   END IF
    2837             :                   ! end alternative LS types
    2838          44 :                   IF (unit_nr > 0) THEN
    2839          22 :                      WRITE (unit_nr, '(T21,3A19)') "Line position", "Line grad", "Next line step"
    2840          22 :                      WRITE (unit_nr, '(T2,A19,3F19.5)') "Line search", next_step_size_guess, g1, step_size
    2841             :                   END IF
    2842          44 :                   e0 = e1
    2843          44 :                   g0 = g1
    2844             :                   g0sign = g1sign
    2845          44 :                   next_step_size_guess = next_step_size_guess + step_size
    2846             :                END IF
    2847             : 
    2848             :                ! update theta
    2849         148 :                DO ispin = 1, nspins
    2850          74 :                   IF (.NOT. line_search) THEN ! we prepared to perform the first line search
    2851             :                      ! "previous" refers to the previous CG step, not the previous LS step
    2852          30 :                      CALL dbcsr_copy(prev_m_theta(ispin), m_theta(ispin))
    2853             :                   END IF
    2854         148 :                   CALL dbcsr_add(m_theta(ispin), step(ispin), 1.0_dp, step_size)
    2855             :                END DO ! ispin
    2856             : 
    2857             :             END IF ! not.prepare_to_exit
    2858             : 
    2859          82 :             IF (line_search) THEN
    2860          50 :                iter_type = "LS"
    2861             :             ELSE
    2862          32 :                iter_type = "CG"
    2863             :             END IF
    2864             : 
    2865          82 :             t2 = m_walltime()
    2866          82 :             IF (unit_nr > 0) THEN
    2867          41 :                iter_type = TRIM("NLMO OPT "//iter_type)
    2868             :                WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') &
    2869          41 :                   iter_type, iteration, &
    2870          41 :                   objf_new, objf_diff, grad_norm, &
    2871          82 :                   t2 - t1
    2872             :                WRITE (unit_nr, '(T2,A19,F23.10)') &
    2873          41 :                   "Localization:", localization_obj_function
    2874             :                WRITE (unit_nr, '(T2,A19,F23.10)') &
    2875          41 :                   "Orthogonalization:", penalty_func_new
    2876             :             END IF
    2877          82 :             t1 = m_walltime()
    2878             : 
    2879          82 :             iteration = iteration + 1
    2880          82 :             IF (prepare_to_exit) EXIT
    2881             : 
    2882             :          END DO ! inner loop
    2883             : 
    2884           8 :          IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
    2885             :             outer_prepare_to_exit = .TRUE.
    2886             :          END IF
    2887             : 
    2888           8 :          outer_iteration = outer_iteration + 1
    2889           0 :          IF (outer_prepare_to_exit) EXIT
    2890             : 
    2891             :       END DO ! outer loop
    2892             : 
    2893             :       ! return the optimal determinant penalty
    2894           8 :       optimizer%opt_penalty%penalty_strength = 0.0_dp
    2895          16 :       DO ispin = 1, nspins
    2896             :          optimizer%opt_penalty%penalty_strength = optimizer%opt_penalty%penalty_strength + &
    2897          16 :                                                   (-1.0_dp)*penalty_vol_prefactor(ispin)
    2898             :       END DO
    2899           8 :       optimizer%opt_penalty%penalty_strength = optimizer%opt_penalty%penalty_strength/nspins
    2900             : 
    2901           8 :       IF (converged) THEN
    2902           8 :          iter_type = "Final"
    2903             :       ELSE
    2904           0 :          iter_type = "Unconverged"
    2905             :       END IF
    2906             : 
    2907           8 :       IF (unit_nr > 0) THEN
    2908           4 :          WRITE (unit_nr, '()')
    2909           4 :          print_string = TRIM(iter_type)//" localization:"
    2910             :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2911           4 :             print_string, localization_obj_function
    2912           4 :          print_string = TRIM(iter_type)//" determinant:"
    2913             :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2914           4 :             print_string, overlap_determinant
    2915           4 :          print_string = TRIM(iter_type)//" penalty strength:"
    2916             :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2917           4 :             print_string, optimizer%opt_penalty%penalty_strength
    2918             :       END IF
    2919             : 
    2920             :       ! clean up
    2921           8 :       IF (l_bfgs) THEN
    2922           8 :          CALL lbfgs_release(nlmo_lbfgs_history)
    2923             :       END IF
    2924          16 :       DO ispin = 1, nspins
    2925          80 :          DO idim0 = 1, SIZE(m_B0, 2)
    2926         152 :             DO reim = 1, SIZE(m_B0, 1)
    2927         144 :                CALL dbcsr_release(m_B0(reim, idim0, ispin))
    2928             :             END DO
    2929             :          END DO
    2930           8 :          CALL dbcsr_release(m_theta(ispin))
    2931           8 :          CALL dbcsr_release(m_t_mo_local(ispin))
    2932           8 :          CALL dbcsr_release(tempNOcc1(ispin))
    2933           8 :          CALL dbcsr_release(approx_inv_hessian(ispin))
    2934           8 :          CALL dbcsr_release(prev_m_theta(ispin))
    2935           8 :          CALL dbcsr_release(m_theta_normalized(ispin))
    2936           8 :          CALL dbcsr_release(m_S0(ispin))
    2937           8 :          CALL dbcsr_release(prev_grad(ispin))
    2938           8 :          CALL dbcsr_release(grad(ispin))
    2939           8 :          CALL dbcsr_release(prev_step(ispin))
    2940           8 :          CALL dbcsr_release(step(ispin))
    2941           8 :          CALL dbcsr_release(prev_minus_prec_grad(ispin))
    2942           8 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
    2943           8 :          CALL dbcsr_release(m_sigma(ispin))
    2944           8 :          CALL dbcsr_release(m_siginv(ispin))
    2945           8 :          CALL dbcsr_release(tempOccOcc1(ispin))
    2946           8 :          CALL dbcsr_release(tempOccOcc2(ispin))
    2947           8 :          CALL dbcsr_release(tempOccOcc3(ispin))
    2948           8 :          CALL dbcsr_release(bfgs_y(ispin))
    2949          16 :          CALL dbcsr_release(bfgs_s(ispin))
    2950             :       END DO ! ispin
    2951             : 
    2952           8 :       DEALLOCATE (grad_norm_spin)
    2953           8 :       DEALLOCATE (nocc)
    2954           8 :       DEALLOCATE (penalty_vol_prefactor)
    2955           8 :       DEALLOCATE (suggested_vol_penalty)
    2956             : 
    2957           8 :       DEALLOCATE (approx_inv_hessian)
    2958           8 :       DEALLOCATE (prev_m_theta)
    2959           8 :       DEALLOCATE (m_theta_normalized)
    2960           8 :       DEALLOCATE (m_S0)
    2961           8 :       DEALLOCATE (prev_grad)
    2962           8 :       DEALLOCATE (grad)
    2963           8 :       DEALLOCATE (prev_step)
    2964           8 :       DEALLOCATE (step)
    2965           8 :       DEALLOCATE (prev_minus_prec_grad)
    2966           8 :       DEALLOCATE (m_sig_sqrti_ii)
    2967           8 :       DEALLOCATE (m_sigma)
    2968           8 :       DEALLOCATE (m_siginv)
    2969           8 :       DEALLOCATE (tempNOcc1)
    2970           8 :       DEALLOCATE (tempOccOcc1)
    2971           8 :       DEALLOCATE (tempOccOcc2)
    2972           8 :       DEALLOCATE (tempOccOcc3)
    2973           8 :       DEALLOCATE (bfgs_y)
    2974           8 :       DEALLOCATE (bfgs_s)
    2975             : 
    2976           8 :       DEALLOCATE (m_theta, m_t_mo_local)
    2977           8 :       DEALLOCATE (m_B0)
    2978           8 :       DEALLOCATE (weights)
    2979           8 :       DEALLOCATE (first_sgf, last_sgf, nsgf)
    2980             : 
    2981           8 :       IF (.NOT. converged) THEN
    2982           0 :          CPABORT("Optimization not converged! ")
    2983             :       END IF
    2984             : 
    2985           8 :       CALL timestop(handle)
    2986             : 
    2987          16 :    END SUBROUTINE almo_scf_construct_nlmos
    2988             : 
    2989             : ! **************************************************************************************************
    2990             : !> \brief Analysis of the orbitals
    2991             : !> \param detailed_analysis ...
    2992             : !> \param eps_filter ...
    2993             : !> \param m_T_in ...
    2994             : !> \param m_T0_in ...
    2995             : !> \param m_siginv_in ...
    2996             : !> \param m_siginv0_in ...
    2997             : !> \param m_S_in ...
    2998             : !> \param m_KS0_in ...
    2999             : !> \param m_quench_t_in ...
    3000             : !> \param energy_out ...
    3001             : !> \param m_eda_out ...
    3002             : !> \param m_cta_out ...
    3003             : !> \par History
    3004             : !>       2017.07 created [Rustam Z Khaliullin]
    3005             : !> \author Rustam Z Khaliullin
    3006             : ! **************************************************************************************************
    3007          24 :    SUBROUTINE xalmo_analysis(detailed_analysis, eps_filter, m_T_in, m_T0_in, &
    3008          24 :                              m_siginv_in, m_siginv0_in, m_S_in, m_KS0_in, m_quench_t_in, energy_out, &
    3009          24 :                              m_eda_out, m_cta_out)
    3010             : 
    3011             :       LOGICAL, INTENT(IN)                                :: detailed_analysis
    3012             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    3013             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_T_in, m_T0_in, m_siginv_in, &
    3014             :                                                             m_siginv0_in, m_S_in, m_KS0_in, &
    3015             :                                                             m_quench_t_in
    3016             :       REAL(KIND=dp), INTENT(INOUT)                       :: energy_out
    3017             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_eda_out, m_cta_out
    3018             : 
    3019             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'xalmo_analysis'
    3020             : 
    3021             :       INTEGER                                            :: handle, ispin, nspins
    3022             :       REAL(KIND=dp)                                      :: energy_ispin, spin_factor
    3023             :       TYPE(dbcsr_type)                                   :: FTsiginv0, Fvo0, m_X, siginvTFTsiginv0, &
    3024             :                                                             ST0
    3025             : 
    3026          24 :       CALL timeset(routineN, handle)
    3027             : 
    3028          24 :       nspins = SIZE(m_T_in)
    3029             : 
    3030          24 :       IF (nspins == 1) THEN
    3031          24 :          spin_factor = 2.0_dp
    3032             :       ELSE
    3033           0 :          spin_factor = 1.0_dp
    3034             :       END IF
    3035             : 
    3036          24 :       energy_out = 0.0_dp
    3037          48 :       DO ispin = 1, nspins
    3038             : 
    3039             :          ! create temporary matrices
    3040             :          CALL dbcsr_create(Fvo0, &
    3041             :                            template=m_T_in(ispin), &
    3042          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3043             :          CALL dbcsr_create(FTsiginv0, &
    3044             :                            template=m_T_in(ispin), &
    3045          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3046             :          CALL dbcsr_create(ST0, &
    3047             :                            template=m_T_in(ispin), &
    3048          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3049             :          CALL dbcsr_create(m_X, &
    3050             :                            template=m_T_in(ispin), &
    3051          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3052             :          CALL dbcsr_create(siginvTFTsiginv0, &
    3053             :                            template=m_siginv0_in(ispin), &
    3054          24 :                            matrix_type=dbcsr_type_no_symmetry)
    3055             : 
    3056             :          ! compute F_{virt,occ} for the zero-delocalization state
    3057             :          CALL compute_frequently_used_matrices( &
    3058             :             filter_eps=eps_filter, &
    3059             :             m_T_in=m_T0_in(ispin), &
    3060             :             m_siginv_in=m_siginv0_in(ispin), &
    3061             :             m_S_in=m_S_in(1), &
    3062             :             m_F_in=m_KS0_in(ispin), &
    3063             :             m_FTsiginv_out=FTsiginv0, &
    3064             :             m_siginvTFTsiginv_out=siginvTFTsiginv0, &
    3065          24 :             m_ST_out=ST0)
    3066          24 :          CALL dbcsr_copy(Fvo0, m_quench_t_in(ispin))
    3067          24 :          CALL dbcsr_copy(Fvo0, FTsiginv0, keep_sparsity=.TRUE.)
    3068             :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    3069             :                              ST0, &
    3070             :                              siginvTFTsiginv0, &
    3071             :                              1.0_dp, Fvo0, &
    3072          24 :                              retain_sparsity=.TRUE.)
    3073             : 
    3074             :          ! get single excitation amplitudes
    3075          24 :          CALL dbcsr_copy(m_X, m_T0_in(ispin))
    3076          24 :          CALL dbcsr_add(m_X, m_T_in(ispin), -1.0_dp, 1.0_dp)
    3077             : 
    3078          24 :          CALL dbcsr_dot(m_X, Fvo0, energy_ispin)
    3079          24 :          energy_out = energy_out + energy_ispin*spin_factor
    3080             : 
    3081          24 :          IF (detailed_analysis) THEN
    3082             : 
    3083           2 :             CALL dbcsr_hadamard_product(m_X, Fvo0, m_eda_out(ispin))
    3084           2 :             CALL dbcsr_scale(m_eda_out(ispin), spin_factor)
    3085           2 :             CALL dbcsr_filter(m_eda_out(ispin), eps_filter)
    3086             : 
    3087             :             ! first, compute [QR'R]_mu^i = [(S-SRS).X.siginv']_mu^i
    3088             :             ! a. FTsiginv0 = S.T0*siginv0
    3089             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3090             :                                 ST0, &
    3091             :                                 m_siginv0_in(ispin), &
    3092             :                                 0.0_dp, FTsiginv0, &
    3093           2 :                                 filter_eps=eps_filter)
    3094             :             ! c. tmp1(use ST0) = S.X
    3095             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3096             :                                 m_S_in(1), &
    3097             :                                 m_X, &
    3098             :                                 0.0_dp, ST0, &
    3099           2 :                                 filter_eps=eps_filter)
    3100             :             ! d. tmp2 = tr(T0).tmp1 = tr(T0).S.X
    3101             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    3102             :                                 m_T0_in(ispin), &
    3103             :                                 ST0, &
    3104             :                                 0.0_dp, siginvTFTsiginv0, &
    3105           2 :                                 filter_eps=eps_filter)
    3106             :             ! e. tmp1 = tmp1 - tmp3.tmp2 = S.X - S.T0.siginv0*tr(T0).S.X
    3107             :             !         = (1-S.R0).S.X
    3108             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    3109             :                                 FTsiginv0, &
    3110             :                                 siginvTFTsiginv0, &
    3111             :                                 1.0_dp, ST0, &
    3112           2 :                                 filter_eps=eps_filter)
    3113             :             ! f. tmp2(use FTsiginv0) = tmp1*siginv
    3114             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3115             :                                 ST0, &
    3116             :                                 m_siginv_in(ispin), &
    3117             :                                 0.0_dp, FTsiginv0, &
    3118           2 :                                 filter_eps=eps_filter)
    3119             :             ! second, compute traces of blocks [RR'Q]^x_y * [X]^y_x
    3120             :             CALL dbcsr_hadamard_product(m_X, &
    3121           2 :                                         FTsiginv0, m_cta_out(ispin))
    3122           2 :             CALL dbcsr_scale(m_cta_out(ispin), spin_factor)
    3123           2 :             CALL dbcsr_filter(m_cta_out(ispin), eps_filter)
    3124             : 
    3125             :          END IF ! do ALMO EDA/CTA
    3126             : 
    3127          24 :          CALL dbcsr_release(Fvo0)
    3128          24 :          CALL dbcsr_release(FTsiginv0)
    3129          24 :          CALL dbcsr_release(ST0)
    3130          24 :          CALL dbcsr_release(m_X)
    3131          48 :          CALL dbcsr_release(siginvTFTsiginv0)
    3132             : 
    3133             :       END DO ! ispin
    3134             : 
    3135          24 :       CALL timestop(handle)
    3136             : 
    3137          24 :    END SUBROUTINE xalmo_analysis
    3138             : 
    3139             : ! **************************************************************************************************
    3140             : !> \brief Compute matrices that are used often in various parts of the
    3141             : !>        optimization procedure
    3142             : !> \param filter_eps ...
    3143             : !> \param m_T_in ...
    3144             : !> \param m_siginv_in ...
    3145             : !> \param m_S_in ...
    3146             : !> \param m_F_in ...
    3147             : !> \param m_FTsiginv_out ...
    3148             : !> \param m_siginvTFTsiginv_out ...
    3149             : !> \param m_ST_out ...
    3150             : !> \par History
    3151             : !>       2016.12 created [Rustam Z Khaliullin]
    3152             : !> \author Rustam Z Khaliullin
    3153             : ! **************************************************************************************************
    3154        1498 :    SUBROUTINE compute_frequently_used_matrices(filter_eps, &
    3155             :                                                m_T_in, m_siginv_in, m_S_in, m_F_in, m_FTsiginv_out, &
    3156             :                                                m_siginvTFTsiginv_out, m_ST_out)
    3157             : 
    3158             :       REAL(KIND=dp), INTENT(IN)                          :: filter_eps
    3159             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_T_in, m_siginv_in, m_S_in, m_F_in
    3160             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_FTsiginv_out, m_siginvTFTsiginv_out, &
    3161             :                                                             m_ST_out
    3162             : 
    3163             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_frequently_used_matrices'
    3164             : 
    3165             :       INTEGER                                            :: handle
    3166             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_oo_1
    3167             : 
    3168        1498 :       CALL timeset(routineN, handle)
    3169             : 
    3170             :       CALL dbcsr_create(m_tmp_no_1, &
    3171             :                         template=m_T_in, &
    3172        1498 :                         matrix_type=dbcsr_type_no_symmetry)
    3173             :       CALL dbcsr_create(m_tmp_oo_1, &
    3174             :                         template=m_siginv_in, &
    3175        1498 :                         matrix_type=dbcsr_type_no_symmetry)
    3176             : 
    3177             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3178             :                           m_F_in, &
    3179             :                           m_T_in, &
    3180             :                           0.0_dp, m_tmp_no_1, &
    3181        1498 :                           filter_eps=filter_eps)
    3182             : 
    3183             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3184             :                           m_tmp_no_1, &
    3185             :                           m_siginv_in, &
    3186             :                           0.0_dp, m_FTsiginv_out, &
    3187        1498 :                           filter_eps=filter_eps)
    3188             : 
    3189             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    3190             :                           m_T_in, &
    3191             :                           m_FTsiginv_out, &
    3192             :                           0.0_dp, m_tmp_oo_1, &
    3193        1498 :                           filter_eps=filter_eps)
    3194             : 
    3195             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3196             :                           m_siginv_in, &
    3197             :                           m_tmp_oo_1, &
    3198             :                           0.0_dp, m_siginvTFTsiginv_out, &
    3199        1498 :                           filter_eps=filter_eps)
    3200             : 
    3201             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3202             :                           m_S_in, &
    3203             :                           m_T_in, &
    3204             :                           0.0_dp, m_ST_out, &
    3205        1498 :                           filter_eps=filter_eps)
    3206             : 
    3207        1498 :       CALL dbcsr_release(m_tmp_no_1)
    3208        1498 :       CALL dbcsr_release(m_tmp_oo_1)
    3209             : 
    3210        1498 :       CALL timestop(handle)
    3211             : 
    3212        1498 :    END SUBROUTINE compute_frequently_used_matrices
    3213             : 
    3214             : ! **************************************************************************************************
    3215             : !> \brief Split the matrix of virtual orbitals into two:
    3216             : !>        retained orbs and discarded
    3217             : !> \param almo_scf_env ...
    3218             : !> \par History
    3219             : !>       2011.09 created [Rustam Z Khaliullin]
    3220             : !> \author Rustam Z Khaliullin
    3221             : ! **************************************************************************************************
    3222           0 :    SUBROUTINE split_v_blk(almo_scf_env)
    3223             : 
    3224             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    3225             : 
    3226             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'split_v_blk'
    3227             : 
    3228             :       INTEGER                                            :: discarded_v, handle, iblock_col, &
    3229             :                                                             iblock_col_size, iblock_row, &
    3230             :                                                             iblock_row_size, ispin, retained_v
    3231           0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: data_p, p_new_block
    3232             :       TYPE(dbcsr_iterator_type)                          :: iter
    3233             : 
    3234           0 :       CALL timeset(routineN, handle)
    3235             : 
    3236           0 :       DO ispin = 1, almo_scf_env%nspins
    3237             : 
    3238             :          CALL dbcsr_work_create(almo_scf_env%matrix_v_blk(ispin), &
    3239           0 :                                 work_mutable=.TRUE.)
    3240             :          CALL dbcsr_work_create(almo_scf_env%matrix_v_disc_blk(ispin), &
    3241           0 :                                 work_mutable=.TRUE.)
    3242             : 
    3243           0 :          CALL dbcsr_iterator_start(iter, almo_scf_env%matrix_v_full_blk(ispin))
    3244             : 
    3245           0 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    3246             : 
    3247             :             CALL dbcsr_iterator_next_block(iter, iblock_row, iblock_col, data_p, &
    3248           0 :                                            row_size=iblock_row_size, col_size=iblock_col_size)
    3249             : 
    3250           0 :             IF (iblock_row .NE. iblock_col) THEN
    3251           0 :                CPABORT("off-diagonal block found")
    3252             :             END IF
    3253             : 
    3254           0 :             retained_v = almo_scf_env%nvirt_of_domain(iblock_col, ispin)
    3255           0 :             discarded_v = almo_scf_env%nvirt_disc_of_domain(iblock_col, ispin)
    3256           0 :             CPASSERT(retained_v .GT. 0)
    3257           0 :             CPASSERT(discarded_v .GT. 0)
    3258             : 
    3259           0 :             NULLIFY (p_new_block)
    3260             :             CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v_disc_blk(ispin), &
    3261           0 :                                        iblock_row, iblock_col, p_new_block)
    3262           0 :             CPASSERT(ASSOCIATED(p_new_block))
    3263           0 :             CPASSERT(retained_v + discarded_v .EQ. iblock_col_size)
    3264           0 :             p_new_block(:, :) = data_p(:, (retained_v + 1):iblock_col_size)
    3265             : 
    3266           0 :             NULLIFY (p_new_block)
    3267             :             CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v_blk(ispin), &
    3268           0 :                                        iblock_row, iblock_col, p_new_block)
    3269           0 :             CPASSERT(ASSOCIATED(p_new_block))
    3270           0 :             p_new_block(:, :) = data_p(:, 1:retained_v)
    3271             : 
    3272             :          END DO ! iterator
    3273           0 :          CALL dbcsr_iterator_stop(iter)
    3274             : 
    3275           0 :          CALL dbcsr_finalize(almo_scf_env%matrix_v_blk(ispin))
    3276           0 :          CALL dbcsr_finalize(almo_scf_env%matrix_v_disc_blk(ispin))
    3277             : 
    3278             :       END DO ! ispin
    3279             : 
    3280           0 :       CALL timestop(handle)
    3281             : 
    3282           0 :    END SUBROUTINE split_v_blk
    3283             : 
    3284             : ! **************************************************************************************************
    3285             : !> \brief various methods for calculating the Harris-Foulkes correction
    3286             : !> \param almo_scf_env ...
    3287             : !> \par History
    3288             : !>       2011.06 created [Rustam Z Khaliullin]
    3289             : !> \author Rustam Z Khaliullin
    3290             : ! **************************************************************************************************
    3291           0 :    SUBROUTINE harris_foulkes_correction(almo_scf_env)
    3292             : 
    3293             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    3294             : 
    3295             :       CHARACTER(len=*), PARAMETER :: routineN = 'harris_foulkes_correction'
    3296             :       INTEGER, PARAMETER                                 :: cayley_transform = 1, dm_ls_step = 2
    3297             : 
    3298             :       INTEGER :: algorithm_id, handle, handle1, handle2, handle3, handle4, handle5, handle6, &
    3299             :          handle7, handle8, ispin, iteration, n, nmins, nspin, opt_k_max_iter, &
    3300             :          outer_opt_k_iteration, outer_opt_k_max_iter, unit_nr
    3301             :       INTEGER, DIMENSION(1)                              :: fake, nelectron_spin_real
    3302             :       LOGICAL :: converged, line_search, md_in_k_space, outer_opt_k_prepare_to_exit, &
    3303             :          prepare_to_exit, reset_conjugator, reset_step_size, use_cubic_approximation, &
    3304             :          use_quadratic_approximation
    3305             :       REAL(KIND=dp) :: aa, bb, beta, conjugacy_error, conjugacy_error_threshold, &
    3306             :          delta_obj_function, denom, energy_correction_final, frob_matrix, frob_matrix_base, fun0, &
    3307             :          fun1, gfun0, gfun1, grad_norm, grad_norm_frob, kappa, kin_energy, line_search_error, &
    3308             :          line_search_error_threshold, num_threshold, numer, obj_function, quadratic_approx_error, &
    3309             :          quadratic_approx_error_threshold, safety_multiplier, spin_factor, step_size, &
    3310             :          step_size_quadratic_approx, step_size_quadratic_approx2, t1, t1a, t1cholesky, t2, t2a, &
    3311             :          t2cholesky, tau, time_step, x_opt_eps_adaptive, x_opt_eps_adaptive_factor
    3312             :       REAL(KIND=dp), DIMENSION(1)                        :: local_mu
    3313             :       REAL(KIND=dp), DIMENSION(2)                        :: energy_correction
    3314             :       REAL(KIND=dp), DIMENSION(3)                        :: minima
    3315             :       TYPE(cp_logger_type), POINTER                      :: logger
    3316             :       TYPE(ct_step_env_type)                             :: ct_step_env
    3317             :       TYPE(dbcsr_type) :: grad, k_vd_index_down, k_vr_index_down, matrix_k_central, matrix_tmp1, &
    3318             :          matrix_tmp2, prec, prev_grad, prev_minus_prec_grad, prev_step, sigma_oo_curr, &
    3319             :          sigma_oo_curr_inv, sigma_vv_sqrt, sigma_vv_sqrt_guess, sigma_vv_sqrt_inv, &
    3320             :          sigma_vv_sqrt_inv_guess, step, t_curr, tmp1_n_vr, tmp2_n_o, tmp3_vd_vr, tmp4_o_vr, &
    3321             :          tmp_k_blk, vd_fixed, vd_index_sqrt, vd_index_sqrt_inv, velocity, vr_fixed, vr_index_sqrt, &
    3322             :          vr_index_sqrt_inv
    3323           0 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: matrix_p_almo_scf_converged
    3324             : 
    3325           0 :       CALL timeset(routineN, handle)
    3326             : 
    3327             :       ! get a useful output_unit
    3328           0 :       logger => cp_get_default_logger()
    3329           0 :       IF (logger%para_env%is_source()) THEN
    3330           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    3331             :       ELSE
    3332           0 :          unit_nr = -1
    3333             :       END IF
    3334             : 
    3335           0 :       nspin = almo_scf_env%nspins
    3336           0 :       energy_correction_final = 0.0_dp
    3337           0 :       IF (nspin .EQ. 1) THEN
    3338           0 :          spin_factor = 2.0_dp
    3339             :       ELSE
    3340           0 :          spin_factor = 1.0_dp
    3341             :       END IF
    3342             : 
    3343           0 :       IF (almo_scf_env%deloc_use_occ_orbs) THEN
    3344             :          algorithm_id = cayley_transform
    3345             :       ELSE
    3346           0 :          algorithm_id = dm_ls_step
    3347             :       END IF
    3348             : 
    3349           0 :       t1 = m_walltime()
    3350             : 
    3351           0 :       SELECT CASE (algorithm_id)
    3352             :       CASE (cayley_transform)
    3353             : 
    3354             :          ! rescale density matrix by spin factor
    3355             :          ! so the orbitals and density are consistent with each other
    3356           0 :          IF (almo_scf_env%nspins == 1) THEN
    3357           0 :             CALL dbcsr_scale(almo_scf_env%matrix_p(1), 1.0_dp/spin_factor)
    3358             :          END IF
    3359             : 
    3360             :          ! transform matrix_t not matrix_t_blk (we might need ALMOs later)
    3361           0 :          DO ispin = 1, nspin
    3362             : 
    3363             :             CALL dbcsr_copy(almo_scf_env%matrix_t(ispin), &
    3364           0 :                             almo_scf_env%matrix_t_blk(ispin))
    3365             : 
    3366             :             ! obtain orthogonalization matrices for ALMOs
    3367             :             ! RZK-warning - remove this sqrt(sigma) and inv(sqrt(sigma))
    3368             :             ! ideally ALMO scf should use sigma and sigma_inv in
    3369             :             ! the tensor_up_down representation
    3370             : 
    3371           0 :             IF (unit_nr > 0) THEN
    3372           0 :                WRITE (unit_nr, *) "sqrt and inv(sqrt) of MO overlap matrix"
    3373             :             END IF
    3374             :             CALL dbcsr_create(almo_scf_env%matrix_sigma_sqrt(ispin), &
    3375             :                               template=almo_scf_env%matrix_sigma(ispin), &
    3376           0 :                               matrix_type=dbcsr_type_no_symmetry)
    3377             :             CALL dbcsr_create(almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3378             :                               template=almo_scf_env%matrix_sigma(ispin), &
    3379           0 :                               matrix_type=dbcsr_type_no_symmetry)
    3380             : 
    3381             :             CALL matrix_sqrt_Newton_Schulz(almo_scf_env%matrix_sigma_sqrt(ispin), &
    3382             :                                            almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3383             :                                            almo_scf_env%matrix_sigma(ispin), &
    3384             :                                            threshold=almo_scf_env%eps_filter, &
    3385             :                                            order=almo_scf_env%order_lanczos, &
    3386             :                                            eps_lanczos=almo_scf_env%eps_lanczos, &
    3387           0 :                                            max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    3388             : 
    3389           0 :             IF (safe_mode) THEN
    3390             :                CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma(ispin), &
    3391             :                                  matrix_type=dbcsr_type_no_symmetry)
    3392             :                CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_sigma(ispin), &
    3393             :                                  matrix_type=dbcsr_type_no_symmetry)
    3394             : 
    3395             :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3396             :                                    almo_scf_env%matrix_sigma(ispin), &
    3397             :                                    0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3398             :                CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    3399             :                                    almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3400             :                                    0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    3401             : 
    3402             :                frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    3403             :                CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    3404             :                frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    3405             :                IF (unit_nr > 0) THEN
    3406             :                   WRITE (unit_nr, *) "Error for (inv(sqrt(SIG))*SIG*inv(sqrt(SIG))-I)", frob_matrix/frob_matrix_base
    3407             :                END IF
    3408             : 
    3409             :                CALL dbcsr_release(matrix_tmp1)
    3410             :                CALL dbcsr_release(matrix_tmp2)
    3411             :             END IF
    3412             :          END DO
    3413             : 
    3414           0 :          IF (almo_scf_env%almo_update_algorithm .EQ. almo_scf_diag) THEN
    3415             : 
    3416           0 :             DO ispin = 1, nspin
    3417             : 
    3418           0 :                t1a = m_walltime()
    3419             : 
    3420           0 :                line_search_error_threshold = almo_scf_env%real01
    3421           0 :                conjugacy_error_threshold = almo_scf_env%real02
    3422           0 :                quadratic_approx_error_threshold = almo_scf_env%real03
    3423           0 :                x_opt_eps_adaptive_factor = almo_scf_env%real04
    3424             : 
    3425             :                !! the outer loop for k optimization
    3426           0 :                outer_opt_k_max_iter = almo_scf_env%opt_k_outer_max_iter
    3427           0 :                outer_opt_k_prepare_to_exit = .FALSE.
    3428           0 :                outer_opt_k_iteration = 0
    3429           0 :                grad_norm = 0.0_dp
    3430           0 :                grad_norm_frob = 0.0_dp
    3431           0 :                CALL dbcsr_set(almo_scf_env%matrix_x(ispin), 0.0_dp)
    3432           0 :                IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) outer_opt_k_max_iter = 0
    3433             : 
    3434           0 :                DO
    3435             : 
    3436             :                   ! obtain proper retained virtuals (1-R)|ALMO_vr>
    3437             :                   CALL apply_projector(psi_in=almo_scf_env%matrix_v_blk(ispin), &
    3438             :                                        psi_out=almo_scf_env%matrix_v(ispin), &
    3439             :                                        psi_projector=almo_scf_env%matrix_t_blk(ispin), &
    3440             :                                        metric=almo_scf_env%matrix_s(1), &
    3441             :                                        project_out=.TRUE., &
    3442             :                                        psi_projector_orthogonal=.FALSE., &
    3443             :                                        proj_in_template=almo_scf_env%matrix_ov(ispin), &
    3444             :                                        eps_filter=almo_scf_env%eps_filter, &
    3445           0 :                                        sig_inv_projector=almo_scf_env%matrix_sigma_inv(ispin))
    3446             :                   !sig_inv_template=almo_scf_env%matrix_sigma_inv(ispin),&
    3447             : 
    3448             :                   ! save initial retained virtuals
    3449             :                   CALL dbcsr_create(vr_fixed, &
    3450           0 :                                     template=almo_scf_env%matrix_v(ispin))
    3451           0 :                   CALL dbcsr_copy(vr_fixed, almo_scf_env%matrix_v(ispin))
    3452             : 
    3453             :                   ! init matrices common for optimized and non-optimized virts
    3454             :                   CALL dbcsr_create(sigma_vv_sqrt, &
    3455             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3456           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3457             :                   CALL dbcsr_create(sigma_vv_sqrt_inv, &
    3458             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3459           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3460             :                   CALL dbcsr_create(sigma_vv_sqrt_inv_guess, &
    3461             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3462           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3463             :                   CALL dbcsr_create(sigma_vv_sqrt_guess, &
    3464             :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3465           0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3466           0 :                   CALL dbcsr_set(sigma_vv_sqrt_guess, 0.0_dp)
    3467           0 :                   CALL dbcsr_add_on_diag(sigma_vv_sqrt_guess, 1.0_dp)
    3468           0 :                   CALL dbcsr_filter(sigma_vv_sqrt_guess, almo_scf_env%eps_filter)
    3469           0 :                   CALL dbcsr_set(sigma_vv_sqrt_inv_guess, 0.0_dp)
    3470           0 :                   CALL dbcsr_add_on_diag(sigma_vv_sqrt_inv_guess, 1.0_dp)
    3471           0 :                   CALL dbcsr_filter(sigma_vv_sqrt_inv_guess, almo_scf_env%eps_filter)
    3472             : 
    3473             :                   ! do things required to optimize virtuals
    3474           0 :                   IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    3475             : 
    3476             :                      ! project retained virtuals out of discarded block-by-block
    3477             :                      ! (1-Q^VR_ALMO)|ALMO_vd>
    3478             :                      ! this is probably not necessary, do it just to be safe
    3479             :                      !CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin),&
    3480             :                      !        psi_out=almo_scf_env%matrix_v_disc(ispin),&
    3481             :                      !        psi_projector=almo_scf_env%matrix_v_blk(ispin),&
    3482             :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3483             :                      !        project_out=.TRUE.,&
    3484             :                      !        psi_projector_orthogonal=.FALSE.,&
    3485             :                      !        proj_in_template=almo_scf_env%matrix_k_tr(ispin),&
    3486             :                      !        eps_filter=almo_scf_env%eps_filter,&
    3487             :                      !        sig_inv_template=almo_scf_env%matrix_sigma_vv(ispin))
    3488             :                      !CALL dbcsr_copy(almo_scf_env%matrix_v_disc_blk(ispin),&
    3489             :                      !        almo_scf_env%matrix_v_disc(ispin))
    3490             : 
    3491             :                      ! construct discarded virtuals (1-R)|ALMO_vd>
    3492             :                      CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin), &
    3493             :                                           psi_out=almo_scf_env%matrix_v_disc(ispin), &
    3494             :                                           psi_projector=almo_scf_env%matrix_t_blk(ispin), &
    3495             :                                           metric=almo_scf_env%matrix_s(1), &
    3496             :                                           project_out=.TRUE., &
    3497             :                                           psi_projector_orthogonal=.FALSE., &
    3498             :                                           proj_in_template=almo_scf_env%matrix_ov_disc(ispin), &
    3499             :                                           eps_filter=almo_scf_env%eps_filter, &
    3500           0 :                                           sig_inv_projector=almo_scf_env%matrix_sigma_inv(ispin))
    3501             :                      !sig_inv_template=almo_scf_env%matrix_sigma_inv(ispin),&
    3502             : 
    3503             :                      ! save initial discarded
    3504             :                      CALL dbcsr_create(vd_fixed, &
    3505           0 :                                        template=almo_scf_env%matrix_v_disc(ispin))
    3506           0 :                      CALL dbcsr_copy(vd_fixed, almo_scf_env%matrix_v_disc(ispin))
    3507             : 
    3508             :                      !! create the down metric in the retained k-subspace
    3509             :                      CALL dbcsr_create(k_vr_index_down, &
    3510             :                                        template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    3511           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3512             :                      !CALL dbcsr_copy(k_vr_index_down,&
    3513             :                      !        almo_scf_env%matrix_sigma_vv_blk(ispin))
    3514             : 
    3515             :                      !CALL get_overlap(bra=almo_scf_env%matrix_v_blk(ispin),&
    3516             :                      !        ket=almo_scf_env%matrix_v_blk(ispin),&
    3517             :                      !        overlap=k_vr_index_down,&
    3518             :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3519             :                      !        retain_overlap_sparsity=.FALSE.,&
    3520             :                      !        eps_filter=almo_scf_env%eps_filter)
    3521             : 
    3522             :                      !! create the up metric in the discarded k-subspace
    3523             :                      CALL dbcsr_create(k_vd_index_down, &
    3524             :                                        template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    3525           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3526             :                      !CALL dbcsr_init(k_vd_index_up)
    3527             :                      !CALL dbcsr_create(k_vd_index_up,&
    3528             :                      !        template=almo_scf_env%matrix_vv_disc_blk(ispin),&
    3529             :                      !        matrix_type=dbcsr_type_no_symmetry)
    3530             :                      !CALL dbcsr_copy(k_vd_index_down,&
    3531             :                      !        almo_scf_env%matrix_vv_disc_blk(ispin))
    3532             : 
    3533             :                      !CALL get_overlap(bra=almo_scf_env%matrix_v_disc_blk(ispin),&
    3534             :                      !        ket=almo_scf_env%matrix_v_disc_blk(ispin),&
    3535             :                      !        overlap=k_vd_index_down,&
    3536             :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3537             :                      !        retain_overlap_sparsity=.FALSE.,&
    3538             :                      !        eps_filter=almo_scf_env%eps_filter)
    3539             : 
    3540             :                      !IF (unit_nr>0) THEN
    3541             :                      !   WRITE(unit_nr,*) "Inverting blocked overlap matrix of discarded virtuals"
    3542             :                      !ENDIF
    3543             :                      !CALL invert_Hotelling(k_vd_index_up,&
    3544             :                      !        k_vd_index_down,&
    3545             :                      !        almo_scf_env%eps_filter)
    3546             :                      !IF (safe_mode) THEN
    3547             :                      !   CALL dbcsr_init(matrix_tmp1)
    3548             :                      !   CALL dbcsr_create(matrix_tmp1,template=k_vd_index_down,&
    3549             :                      !                        matrix_type=dbcsr_type_no_symmetry)
    3550             :                      !   CALL dbcsr_multiply("N","N",1.0_dp,k_vd_index_up,&
    3551             :                      !                          k_vd_index_down,&
    3552             :                      !                          0.0_dp, matrix_tmp1,&
    3553             :                      !                          filter_eps=almo_scf_env%eps_filter)
    3554             :                      !   frob_matrix_base=dbcsr_frobenius_norm(matrix_tmp1)
    3555             :                      !   CALL dbcsr_add_on_diag(matrix_tmp1,-1.0_dp)
    3556             :                      !   frob_matrix=dbcsr_frobenius_norm(matrix_tmp1)
    3557             :                      !   IF (unit_nr>0) THEN
    3558             :                      !      WRITE(unit_nr,*) "Error for (inv(SIG)*SIG-I)",&
    3559             :                      !            frob_matrix/frob_matrix_base
    3560             :                      !   ENDIF
    3561             :                      !   CALL dbcsr_release(matrix_tmp1)
    3562             :                      !ENDIF
    3563             : 
    3564             :                      ! init matrices necessary for optimization of truncated virts
    3565             :                      ! init blocked gradient before setting K to zero
    3566             :                      ! otherwise the block structure might be lost
    3567             :                      CALL dbcsr_create(grad, &
    3568           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3569           0 :                      CALL dbcsr_copy(grad, almo_scf_env%matrix_k_blk(ispin))
    3570             : 
    3571             :                      ! init MD in the k-space
    3572           0 :                      md_in_k_space = almo_scf_env%logical01
    3573           0 :                      IF (md_in_k_space) THEN
    3574             :                         CALL dbcsr_create(velocity, &
    3575           0 :                                           template=almo_scf_env%matrix_k_blk(ispin))
    3576           0 :                         CALL dbcsr_copy(velocity, almo_scf_env%matrix_k_blk(ispin))
    3577           0 :                         CALL dbcsr_set(velocity, 0.0_dp)
    3578           0 :                         time_step = almo_scf_env%opt_k_trial_step_size
    3579             :                      END IF
    3580             : 
    3581             :                      CALL dbcsr_create(prev_step, &
    3582           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3583             : 
    3584             :                      CALL dbcsr_create(prev_minus_prec_grad, &
    3585           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3586             : 
    3587             :                      ! initialize diagonal blocks of the preconditioner to 1.0_dp
    3588             :                      CALL dbcsr_create(prec, &
    3589           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3590           0 :                      CALL dbcsr_copy(prec, almo_scf_env%matrix_k_blk(ispin))
    3591           0 :                      CALL dbcsr_set(prec, 1.0_dp)
    3592             : 
    3593             :                      ! generate initial K (extrapolate if previous values are available)
    3594           0 :                      CALL dbcsr_set(almo_scf_env%matrix_k_blk(ispin), 0.0_dp)
    3595             :                      ! matrix_k_central stores current k because matrix_k_blk is updated
    3596             :                      ! during linear search
    3597             :                      CALL dbcsr_create(matrix_k_central, &
    3598           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3599             :                      CALL dbcsr_copy(matrix_k_central, &
    3600           0 :                                      almo_scf_env%matrix_k_blk(ispin))
    3601             :                      CALL dbcsr_create(tmp_k_blk, &
    3602           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3603             :                      CALL dbcsr_create(step, &
    3604           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3605           0 :                      CALL dbcsr_set(step, 0.0_dp)
    3606             :                      CALL dbcsr_create(t_curr, &
    3607           0 :                                        template=almo_scf_env%matrix_t(ispin))
    3608             :                      CALL dbcsr_create(sigma_oo_curr, &
    3609             :                                        template=almo_scf_env%matrix_sigma(ispin), &
    3610           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3611             :                      CALL dbcsr_create(sigma_oo_curr_inv, &
    3612             :                                        template=almo_scf_env%matrix_sigma(ispin), &
    3613           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3614             :                      CALL dbcsr_create(tmp1_n_vr, &
    3615           0 :                                        template=almo_scf_env%matrix_v(ispin))
    3616             :                      CALL dbcsr_create(tmp3_vd_vr, &
    3617           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3618             :                      CALL dbcsr_create(tmp2_n_o, &
    3619           0 :                                        template=almo_scf_env%matrix_t(ispin))
    3620             :                      CALL dbcsr_create(tmp4_o_vr, &
    3621           0 :                                        template=almo_scf_env%matrix_ov(ispin))
    3622             :                      CALL dbcsr_create(prev_grad, &
    3623           0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3624           0 :                      CALL dbcsr_set(prev_grad, 0.0_dp)
    3625             : 
    3626             :                      !CALL dbcsr_init(sigma_oo_guess)
    3627             :                      !CALL dbcsr_create(sigma_oo_guess,&
    3628             :                      !        template=almo_scf_env%matrix_sigma(ispin),&
    3629             :                      !        matrix_type=dbcsr_type_no_symmetry)
    3630             :                      !CALL dbcsr_set(sigma_oo_guess,0.0_dp)
    3631             :                      !CALL dbcsr_add_on_diag(sigma_oo_guess,1.0_dp)
    3632             :                      !CALL dbcsr_filter(sigma_oo_guess,almo_scf_env%eps_filter)
    3633             :                      !CALL dbcsr_print(sigma_oo_guess)
    3634             : 
    3635             :                   END IF ! done constructing discarded virtuals
    3636             : 
    3637             :                   ! init variables
    3638           0 :                   opt_k_max_iter = almo_scf_env%opt_k_max_iter
    3639           0 :                   iteration = 0
    3640           0 :                   converged = .FALSE.
    3641           0 :                   prepare_to_exit = .FALSE.
    3642           0 :                   beta = 0.0_dp
    3643           0 :                   line_search = .FALSE.
    3644           0 :                   obj_function = 0.0_dp
    3645           0 :                   conjugacy_error = 0.0_dp
    3646           0 :                   line_search_error = 0.0_dp
    3647           0 :                   fun0 = 0.0_dp
    3648           0 :                   fun1 = 0.0_dp
    3649           0 :                   gfun0 = 0.0_dp
    3650           0 :                   gfun1 = 0.0_dp
    3651           0 :                   step_size_quadratic_approx = 0.0_dp
    3652           0 :                   reset_step_size = .TRUE.
    3653           0 :                   IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) opt_k_max_iter = 0
    3654             : 
    3655             :                   ! start cg iterations to optimize matrix_k_blk
    3656           0 :                   DO
    3657             : 
    3658           0 :                      CALL timeset('k_opt_vr', handle1)
    3659             : 
    3660           0 :                      IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    3661             : 
    3662             :                         ! construct k-excited virtuals
    3663             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vd_fixed, &
    3664             :                                             almo_scf_env%matrix_k_blk(ispin), &
    3665             :                                             0.0_dp, almo_scf_env%matrix_v(ispin), &
    3666           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3667             :                         CALL dbcsr_add(almo_scf_env%matrix_v(ispin), vr_fixed, &
    3668           0 :                                        +1.0_dp, +1.0_dp)
    3669             :                      END IF
    3670             : 
    3671             :                      ! decompose the overlap matrix of the current retained orbitals
    3672             :                      !IF (unit_nr>0) THEN
    3673             :                      !   WRITE(unit_nr,*) "decompose the active VV overlap matrix"
    3674             :                      !ENDIF
    3675             :                      CALL get_overlap(bra=almo_scf_env%matrix_v(ispin), &
    3676             :                                       ket=almo_scf_env%matrix_v(ispin), &
    3677             :                                       overlap=almo_scf_env%matrix_sigma_vv(ispin), &
    3678             :                                       metric=almo_scf_env%matrix_s(1), &
    3679             :                                       retain_overlap_sparsity=.FALSE., &
    3680           0 :                                       eps_filter=almo_scf_env%eps_filter)
    3681             :                      ! use either cholesky or sqrt
    3682             :                      !! RZK-warning: strangely, cholesky does not work with k-optimization
    3683           0 :                      IF (almo_scf_env%deloc_truncate_virt .EQ. virt_full) THEN
    3684           0 :                         CALL timeset('cholesky', handle2)
    3685           0 :                         t1cholesky = m_walltime()
    3686             : 
    3687             :                         ! re-create sigma_vv_sqrt because desymmetrize is buggy -
    3688             :                         ! it will create multiple copies of blocks
    3689             :                         CALL dbcsr_create(sigma_vv_sqrt, &
    3690             :                                           template=almo_scf_env%matrix_sigma_vv(ispin), &
    3691           0 :                                           matrix_type=dbcsr_type_no_symmetry)
    3692             :                         CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma_vv(ispin), &
    3693           0 :                                                 sigma_vv_sqrt)
    3694             :                         CALL cp_dbcsr_cholesky_decompose(sigma_vv_sqrt, &
    3695             :                                                          para_env=almo_scf_env%para_env, &
    3696           0 :                                                          blacs_env=almo_scf_env%blacs_env)
    3697           0 :                         CALL dbcsr_triu(sigma_vv_sqrt)
    3698           0 :                         CALL dbcsr_filter(sigma_vv_sqrt, almo_scf_env%eps_filter)
    3699             :                         ! apply SOLVE to compute U^(-1) : U*U^(-1)=I
    3700           0 :                         CALL dbcsr_get_info(sigma_vv_sqrt, nfullrows_total=n)
    3701             :                         CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3702           0 :                                           matrix_type=dbcsr_type_no_symmetry)
    3703           0 :                         CALL dbcsr_set(matrix_tmp1, 0.0_dp)
    3704           0 :                         CALL dbcsr_add_on_diag(matrix_tmp1, 1.0_dp)
    3705             :                         CALL cp_dbcsr_cholesky_restore(matrix_tmp1, n, sigma_vv_sqrt, &
    3706             :                                                        sigma_vv_sqrt_inv, op="SOLVE", pos="RIGHT", &
    3707             :                                                        para_env=almo_scf_env%para_env, &
    3708           0 :                                                        blacs_env=almo_scf_env%blacs_env)
    3709           0 :                         CALL dbcsr_filter(sigma_vv_sqrt_inv, almo_scf_env%eps_filter)
    3710           0 :                         CALL dbcsr_release(matrix_tmp1)
    3711             :                         IF (safe_mode) THEN
    3712             :                            CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3713             :                                              matrix_type=dbcsr_type_no_symmetry)
    3714             :                            CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma_vv(ispin), &
    3715             :                                                    matrix_tmp1)
    3716             :                            CALL dbcsr_multiply("T", "N", 1.0_dp, sigma_vv_sqrt, &
    3717             :                                                sigma_vv_sqrt, &
    3718             :                                                -1.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3719             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3720             :                            CALL dbcsr_add_on_diag(matrix_tmp1, 1.0_dp)
    3721             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3722             :                            IF (unit_nr > 0) THEN
    3723             :                               WRITE (unit_nr, *) "Error for ( U^T * U - Sig )", &
    3724             :                                  frob_matrix/frob_matrix_base
    3725             :                            END IF
    3726             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_vv_sqrt_inv, &
    3727             :                                                sigma_vv_sqrt, &
    3728             :                                                0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3729             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3730             :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3731             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3732             :                            IF (unit_nr > 0) THEN
    3733             :                               WRITE (unit_nr, *) "Error for ( inv(U) * U - I )", &
    3734             :                                  frob_matrix/frob_matrix_base
    3735             :                            END IF
    3736             :                            CALL dbcsr_release(matrix_tmp1)
    3737             :                         END IF ! safe_mode
    3738           0 :                         t2cholesky = m_walltime()
    3739           0 :                         IF (unit_nr > 0) THEN
    3740           0 :                            WRITE (unit_nr, *) "Cholesky+inverse wall-time: ", t2cholesky - t1cholesky
    3741             :                         END IF
    3742           0 :                         CALL timestop(handle2)
    3743             :                      ELSE
    3744             :                         CALL matrix_sqrt_Newton_Schulz(sigma_vv_sqrt, &
    3745             :                                                        sigma_vv_sqrt_inv, &
    3746             :                                                        almo_scf_env%matrix_sigma_vv(ispin), &
    3747             :                                                        !matrix_sqrt_inv_guess=sigma_vv_sqrt_inv_guess,&
    3748             :                                                        !matrix_sqrt_guess=sigma_vv_sqrt_guess,&
    3749             :                                                        threshold=almo_scf_env%eps_filter, &
    3750             :                                                        order=almo_scf_env%order_lanczos, &
    3751             :                                                        eps_lanczos=almo_scf_env%eps_lanczos, &
    3752           0 :                                                        max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    3753           0 :                         CALL dbcsr_copy(sigma_vv_sqrt_inv_guess, sigma_vv_sqrt_inv)
    3754           0 :                         CALL dbcsr_copy(sigma_vv_sqrt_guess, sigma_vv_sqrt)
    3755             :                         IF (safe_mode) THEN
    3756             :                            CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3757             :                                              matrix_type=dbcsr_type_no_symmetry)
    3758             :                            CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3759             :                                              matrix_type=dbcsr_type_no_symmetry)
    3760             : 
    3761             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_vv_sqrt_inv, &
    3762             :                                                almo_scf_env%matrix_sigma_vv(ispin), &
    3763             :                                                0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3764             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    3765             :                                                sigma_vv_sqrt_inv, &
    3766             :                                                0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    3767             : 
    3768             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    3769             :                            CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    3770             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    3771             :                            IF (unit_nr > 0) THEN
    3772             :                               WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    3773             :                                  frob_matrix/frob_matrix_base
    3774             :                            END IF
    3775             : 
    3776             :                            CALL dbcsr_release(matrix_tmp1)
    3777             :                            CALL dbcsr_release(matrix_tmp2)
    3778             :                         END IF
    3779             :                      END IF
    3780           0 :                      CALL timestop(handle1)
    3781             : 
    3782             :                      ! compute excitation amplitudes (to the current set of retained virtuals)
    3783             :                      ! set convergence criterion for x-optimization
    3784           0 :                      IF ((iteration .EQ. 0) .AND. (.NOT. line_search) .AND. &
    3785             :                          (outer_opt_k_iteration .EQ. 0)) THEN
    3786             :                         x_opt_eps_adaptive = &
    3787           0 :                            almo_scf_env%deloc_cayley_eps_convergence
    3788             :                      ELSE
    3789             :                         x_opt_eps_adaptive = &
    3790             :                            MAX(ABS(almo_scf_env%deloc_cayley_eps_convergence), &
    3791           0 :                                ABS(x_opt_eps_adaptive_factor*grad_norm))
    3792             :                      END IF
    3793           0 :                      CALL ct_step_env_init(ct_step_env)
    3794             :                      CALL ct_step_env_set(ct_step_env, &
    3795             :                                           para_env=almo_scf_env%para_env, &
    3796             :                                           blacs_env=almo_scf_env%blacs_env, &
    3797             :                                           use_occ_orbs=.TRUE., &
    3798             :                                           use_virt_orbs=.TRUE., &
    3799             :                                           occ_orbs_orthogonal=.FALSE., &
    3800             :                                           virt_orbs_orthogonal=.FALSE., &
    3801             :                                           pp_preconditioner_full=almo_scf_env%deloc_cayley_occ_precond, &
    3802             :                                           qq_preconditioner_full=almo_scf_env%deloc_cayley_vir_precond, &
    3803             :                                           tensor_type=almo_scf_env%deloc_cayley_tensor_type, &
    3804             :                                           neglect_quadratic_term=almo_scf_env%deloc_cayley_linear, &
    3805             :                                           conjugator=almo_scf_env%deloc_cayley_conjugator, &
    3806             :                                           max_iter=almo_scf_env%deloc_cayley_max_iter, &
    3807             :                                           calculate_energy_corr=.TRUE., &
    3808             :                                           update_p=.FALSE., &
    3809             :                                           update_q=.FALSE., &
    3810             :                                           eps_convergence=x_opt_eps_adaptive, &
    3811             :                                           eps_filter=almo_scf_env%eps_filter, &
    3812             :                                           !nspins=1,&
    3813             :                                           q_index_up=sigma_vv_sqrt_inv, &
    3814             :                                           q_index_down=sigma_vv_sqrt, &
    3815             :                                           p_index_up=almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3816             :                                           p_index_down=almo_scf_env%matrix_sigma_sqrt(ispin), &
    3817             :                                           matrix_ks=almo_scf_env%matrix_ks_0deloc(ispin), &
    3818             :                                           matrix_t=almo_scf_env%matrix_t(ispin), &
    3819             :                                           matrix_qp_template=almo_scf_env%matrix_vo(ispin), &
    3820             :                                           matrix_pq_template=almo_scf_env%matrix_ov(ispin), &
    3821             :                                           matrix_v=almo_scf_env%matrix_v(ispin), &
    3822           0 :                                           matrix_x_guess=almo_scf_env%matrix_x(ispin))
    3823             :                      ! perform calculations
    3824           0 :                      CALL ct_step_execute(ct_step_env)
    3825             :                      ! get the energy correction
    3826             :                      CALL ct_step_env_get(ct_step_env, &
    3827             :                                           energy_correction=energy_correction(ispin), &
    3828           0 :                                           copy_matrix_x=almo_scf_env%matrix_x(ispin))
    3829           0 :                      CALL ct_step_env_clean(ct_step_env)
    3830             :                      ! RZK-warning matrix_x is being transformed
    3831             :                      ! back and forth between orth and up_down representations
    3832           0 :                      energy_correction(1) = energy_correction(1)*spin_factor
    3833             : 
    3834           0 :                      IF (opt_k_max_iter .NE. 0) THEN
    3835             : 
    3836           0 :                         CALL timeset('k_opt_t_curr', handle3)
    3837             : 
    3838             :                         ! construct current occupied orbitals T_blk + V_r*X
    3839             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3840             :                                             almo_scf_env%matrix_v(ispin), &
    3841             :                                             almo_scf_env%matrix_x(ispin), &
    3842             :                                             0.0_dp, t_curr, &
    3843           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3844             :                         CALL dbcsr_add(t_curr, almo_scf_env%matrix_t_blk(ispin), &
    3845           0 :                                        +1.0_dp, +1.0_dp)
    3846             : 
    3847             :                         ! calculate current occupied overlap
    3848             :                         !IF (unit_nr>0) THEN
    3849             :                         !   WRITE(unit_nr,*) "Inverting current occ overlap matrix"
    3850             :                         !ENDIF
    3851             :                         CALL get_overlap(bra=t_curr, &
    3852             :                                          ket=t_curr, &
    3853             :                                          overlap=sigma_oo_curr, &
    3854             :                                          metric=almo_scf_env%matrix_s(1), &
    3855             :                                          retain_overlap_sparsity=.FALSE., &
    3856           0 :                                          eps_filter=almo_scf_env%eps_filter)
    3857           0 :                         IF (iteration .EQ. 0) THEN
    3858             :                            CALL invert_Hotelling(sigma_oo_curr_inv, &
    3859             :                                                  sigma_oo_curr, &
    3860             :                                                  threshold=almo_scf_env%eps_filter, &
    3861           0 :                                                  use_inv_as_guess=.FALSE.)
    3862             :                         ELSE
    3863             :                            CALL invert_Hotelling(sigma_oo_curr_inv, &
    3864             :                                                  sigma_oo_curr, &
    3865             :                                                  threshold=almo_scf_env%eps_filter, &
    3866           0 :                                                  use_inv_as_guess=.TRUE.)
    3867             :                            !CALL dbcsr_copy(sigma_oo_guess,sigma_oo_curr_inv)
    3868             :                         END IF
    3869             :                         IF (safe_mode) THEN
    3870             :                            CALL dbcsr_create(matrix_tmp1, template=sigma_oo_curr, &
    3871             :                                              matrix_type=dbcsr_type_no_symmetry)
    3872             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_oo_curr, &
    3873             :                                                sigma_oo_curr_inv, &
    3874             :                                                0.0_dp, matrix_tmp1, &
    3875             :                                                filter_eps=almo_scf_env%eps_filter)
    3876             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3877             :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3878             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3879             :                            !CALL dbcsr_filter(matrix_tmp1,almo_scf_env%eps_filter)
    3880             :                            !CALL dbcsr_print(matrix_tmp1)
    3881             :                            IF (unit_nr > 0) THEN
    3882             :                               WRITE (unit_nr, *) "Error for (SIG*inv(SIG)-I)", &
    3883             :                                  frob_matrix/frob_matrix_base, frob_matrix_base
    3884             :                            END IF
    3885             :                            CALL dbcsr_release(matrix_tmp1)
    3886             :                         END IF
    3887             :                         IF (safe_mode) THEN
    3888             :                            CALL dbcsr_create(matrix_tmp1, template=sigma_oo_curr, &
    3889             :                                              matrix_type=dbcsr_type_no_symmetry)
    3890             :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_oo_curr_inv, &
    3891             :                                                sigma_oo_curr, &
    3892             :                                                0.0_dp, matrix_tmp1, &
    3893             :                                                filter_eps=almo_scf_env%eps_filter)
    3894             :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3895             :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3896             :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3897             :                            !CALL dbcsr_filter(matrix_tmp1,almo_scf_env%eps_filter)
    3898             :                            !CALL dbcsr_print(matrix_tmp1)
    3899             :                            IF (unit_nr > 0) THEN
    3900             :                               WRITE (unit_nr, *) "Error for (inv(SIG)*SIG-I)", &
    3901             :                                  frob_matrix/frob_matrix_base, frob_matrix_base
    3902             :                            END IF
    3903             :                            CALL dbcsr_release(matrix_tmp1)
    3904             :                         END IF
    3905             : 
    3906           0 :                         CALL timestop(handle3)
    3907           0 :                         CALL timeset('k_opt_vd', handle4)
    3908             : 
    3909             :                         ! construct current discarded virtuals:
    3910             :                         ! (1-R_curr)(1-Q^VR_curr)|ALMO_vd_basis> =
    3911             :                         ! = (1-Q^VR_curr)|ALMO_vd_basis>
    3912             :                         ! use sigma_vv_sqrt to store the inverse of the overlap
    3913             :                         ! sigma_vv_inv is computed from sqrt/cholesky
    3914             :                         CALL dbcsr_multiply("N", "T", 1.0_dp, &
    3915             :                                             sigma_vv_sqrt_inv, &
    3916             :                                             sigma_vv_sqrt_inv, &
    3917             :                                             0.0_dp, sigma_vv_sqrt, &
    3918           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3919             :                         CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin), &
    3920             :                                              psi_out=almo_scf_env%matrix_v_disc(ispin), &
    3921             :                                              psi_projector=almo_scf_env%matrix_v(ispin), &
    3922             :                                              metric=almo_scf_env%matrix_s(1), &
    3923             :                                              project_out=.FALSE., &
    3924             :                                              psi_projector_orthogonal=.FALSE., &
    3925             :                                              proj_in_template=almo_scf_env%matrix_k_tr(ispin), &
    3926             :                                              eps_filter=almo_scf_env%eps_filter, &
    3927           0 :                                              sig_inv_projector=sigma_vv_sqrt)
    3928             :                         !sig_inv_template=almo_scf_env%matrix_sigma_vv(ispin),&
    3929             :                         CALL dbcsr_add(almo_scf_env%matrix_v_disc(ispin), &
    3930           0 :                                        vd_fixed, -1.0_dp, +1.0_dp)
    3931             : 
    3932           0 :                         CALL timestop(handle4)
    3933           0 :                         CALL timeset('k_opt_grad', handle5)
    3934             : 
    3935             :                         ! evaluate the gradient from the assembled components
    3936             :                         ! grad_xx = c0 [ (Vd_curr^tr)*F*T_curr*sigma_oo_curr_inv*(X^tr)]_xx
    3937             :                         ! save previous gradient to calculate conjugation coef
    3938           0 :                         IF (line_search) THEN
    3939           0 :                            CALL dbcsr_copy(prev_grad, grad)
    3940             :                         END IF
    3941             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3942             :                                             almo_scf_env%matrix_ks_0deloc(ispin), &
    3943             :                                             t_curr, &
    3944             :                                             0.0_dp, tmp2_n_o, &
    3945           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3946             :                         CALL dbcsr_multiply("N", "T", 1.0_dp, &
    3947             :                                             sigma_oo_curr_inv, &
    3948             :                                             almo_scf_env%matrix_x(ispin), &
    3949             :                                             0.0_dp, tmp4_o_vr, &
    3950           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3951             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3952             :                                             tmp2_n_o, &
    3953             :                                             tmp4_o_vr, &
    3954             :                                             0.0_dp, tmp1_n_vr, &
    3955           0 :                                             filter_eps=almo_scf_env%eps_filter)
    3956             :                         CALL dbcsr_multiply("T", "N", 2.0_dp*spin_factor, &
    3957             :                                             almo_scf_env%matrix_v_disc(ispin), &
    3958             :                                             tmp1_n_vr, &
    3959             :                                             0.0_dp, grad, &
    3960           0 :                                             retain_sparsity=.TRUE.)
    3961             :                         !filter_eps=almo_scf_env%eps_filter,&
    3962             :                         ! keep tmp2_n_o for the next step
    3963             :                         ! keep tmp4_o_vr for the preconditioner
    3964             : 
    3965             :                         ! check convergence and other exit criteria
    3966           0 :                         grad_norm_frob = dbcsr_frobenius_norm(grad)
    3967           0 :                         CALL dbcsr_norm(grad, dbcsr_norm_maxabsnorm, norm_scalar=grad_norm)
    3968           0 :                         converged = (grad_norm .LT. almo_scf_env%opt_k_eps_convergence)
    3969           0 :                         IF (converged .OR. (iteration .GE. opt_k_max_iter)) THEN
    3970           0 :                            prepare_to_exit = .TRUE.
    3971             :                         END IF
    3972           0 :                         CALL timestop(handle5)
    3973             : 
    3974           0 :                         IF (.NOT. prepare_to_exit) THEN
    3975             : 
    3976           0 :                            CALL timeset('k_opt_energy', handle6)
    3977             : 
    3978             :                            ! compute "energy" c0*Tr[sig_inv_oo*t*F*t]
    3979             :                            CALL dbcsr_multiply("T", "N", spin_factor, &
    3980             :                                                t_curr, &
    3981             :                                                tmp2_n_o, &
    3982             :                                                0.0_dp, sigma_oo_curr, &
    3983           0 :                                                filter_eps=almo_scf_env%eps_filter)
    3984             :                            delta_obj_function = fun0
    3985           0 :                            CALL dbcsr_dot(sigma_oo_curr_inv, sigma_oo_curr, obj_function)
    3986           0 :                            delta_obj_function = obj_function - delta_obj_function
    3987           0 :                            IF (line_search) THEN
    3988             :                               fun1 = obj_function
    3989             :                            ELSE
    3990           0 :                               fun0 = obj_function
    3991             :                            END IF
    3992             : 
    3993           0 :                            CALL timestop(handle6)
    3994             : 
    3995             :                            ! update the step direction
    3996           0 :                            IF (.NOT. line_search) THEN
    3997             : 
    3998           0 :                               CALL timeset('k_opt_step', handle7)
    3999             : 
    4000           0 :                               IF ((.NOT. md_in_k_space) .AND. &
    4001             :                                   (iteration .GE. MAX(0, almo_scf_env%opt_k_prec_iter_start) .AND. &
    4002             :                                    MOD(iteration - almo_scf_env%opt_k_prec_iter_start, &
    4003             :                                        almo_scf_env%opt_k_prec_iter_freq) .EQ. 0)) THEN
    4004             : 
    4005             :                                  !IF ((iteration.eq.0).AND.(.NOT.md_in_k_space)) THEN
    4006             : 
    4007             :                                  ! compute the preconditioner
    4008           0 :                                  IF (unit_nr > 0) THEN
    4009           0 :                                     WRITE (unit_nr, *) "Computing preconditioner"
    4010             :                                  END IF
    4011             :                                  !CALL opt_k_create_preconditioner(prec,&
    4012             :                                  !        almo_scf_env%matrix_v_disc(ispin),&
    4013             :                                  !        almo_scf_env%matrix_ks_0deloc(ispin),&
    4014             :                                  !        almo_scf_env%matrix_x(ispin),&
    4015             :                                  !        tmp4_o_vr,&
    4016             :                                  !        almo_scf_env%matrix_s(1),&
    4017             :                                  !        grad,&
    4018             :                                  !        !almo_scf_env%matrix_v_disc_blk(ispin),&
    4019             :                                  !        vd_fixed,&
    4020             :                                  !        t_curr,&
    4021             :                                  !        k_vd_index_up,&
    4022             :                                  !        k_vr_index_down,&
    4023             :                                  !        tmp1_n_vr,&
    4024             :                                  !        spin_factor,&
    4025             :                                  !        almo_scf_env%eps_filter)
    4026             :                                  CALL opt_k_create_preconditioner_blk(almo_scf_env, &
    4027             :                                                                       almo_scf_env%matrix_v_disc(ispin), &
    4028             :                                                                       tmp4_o_vr, &
    4029             :                                                                       t_curr, &
    4030             :                                                                       ispin, &
    4031           0 :                                                                       spin_factor)
    4032             : 
    4033             :                               END IF
    4034             : 
    4035             :                               ! save the previous step
    4036           0 :                               CALL dbcsr_copy(prev_step, step)
    4037             : 
    4038             :                               ! compute the new step
    4039             :                               CALL opt_k_apply_preconditioner_blk(almo_scf_env, &
    4040           0 :                                                                   step, grad, ispin)
    4041             :                               !CALL dbcsr_hadamard_product(prec,grad,step)
    4042           0 :                               CALL dbcsr_scale(step, -1.0_dp)
    4043             : 
    4044             :                               ! check whether we need to reset conjugate directions
    4045           0 :                               reset_conjugator = .FALSE.
    4046             :                               ! first check if manual reset is active
    4047           0 :                               IF (iteration .LT. MAX(almo_scf_env%opt_k_conj_iter_start, 1) .OR. &
    4048             :                                   MOD(iteration - almo_scf_env%opt_k_conj_iter_start, &
    4049             :                                       almo_scf_env%opt_k_conj_iter_freq) .EQ. 0) THEN
    4050             : 
    4051             :                                  reset_conjugator = .TRUE.
    4052             : 
    4053             :                               ELSE
    4054             : 
    4055             :                                  ! check for the errors in the cg algorithm
    4056             :                                  !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4057             :                                  !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4058             :                                  !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4059           0 :                                  CALL dbcsr_dot(grad, prev_minus_prec_grad, numer)
    4060           0 :                                  CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4061           0 :                                  conjugacy_error = numer/denom
    4062             : 
    4063           0 :                                  IF (conjugacy_error .GT. MIN(0.5_dp, conjugacy_error_threshold)) THEN
    4064           0 :                                     reset_conjugator = .TRUE.
    4065           0 :                                     IF (unit_nr > 0) THEN
    4066           0 :                                        WRITE (unit_nr, *) "Lack of progress, conjugacy error is ", conjugacy_error
    4067             :                                     END IF
    4068             :                                  END IF
    4069             : 
    4070             :                                  ! check the gradient along the previous direction
    4071           0 :                                  IF ((iteration .NE. 0) .AND. (.NOT. reset_conjugator)) THEN
    4072           0 :                                     CALL dbcsr_dot(grad, prev_step, numer)
    4073           0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4074           0 :                                     line_search_error = numer/denom
    4075           0 :                                     IF (line_search_error .GT. line_search_error_threshold) THEN
    4076           0 :                                        reset_conjugator = .TRUE.
    4077           0 :                                        IF (unit_nr > 0) THEN
    4078           0 :                                           WRITE (unit_nr, *) "Bad line search, line search error is ", line_search_error
    4079             :                                        END IF
    4080             :                                     END IF
    4081             :                                  END IF
    4082             : 
    4083             :                               END IF
    4084             : 
    4085             :                               ! compute the conjugation coefficient - beta
    4086           0 :                               IF (.NOT. reset_conjugator) THEN
    4087             : 
    4088           0 :                                  SELECT CASE (almo_scf_env%opt_k_conjugator)
    4089             :                                  CASE (cg_hestenes_stiefel)
    4090           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4091           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4092           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4093           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4094           0 :                                     beta = -1.0_dp*numer/denom
    4095             :                                  CASE (cg_fletcher_reeves)
    4096             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4097             :                                     !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4098             :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4099             :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4100             :                                     !beta=numer/denom
    4101           0 :                                     CALL dbcsr_dot(grad, step, numer)
    4102           0 :                                     CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4103           0 :                                     beta = numer/denom
    4104             :                                  CASE (cg_polak_ribiere)
    4105             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4106             :                                     !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4107             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4108             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4109             :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4110           0 :                                     CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4111           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4112           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4113           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4114           0 :                                     beta = numer/denom
    4115             :                                  CASE (cg_fletcher)
    4116             :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4117             :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4118             :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4119             :                                     !beta=-1.0_dp*numer/denom
    4120           0 :                                     CALL dbcsr_dot(grad, step, numer)
    4121           0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4122           0 :                                     beta = numer/denom
    4123             :                                  CASE (cg_liu_storey)
    4124           0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4125             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4126             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4127             :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4128           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4129           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4130           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4131           0 :                                     beta = numer/denom
    4132             :                                  CASE (cg_dai_yuan)
    4133             :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4134             :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4135             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4136             :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4137             :                                     !beta=numer/denom
    4138           0 :                                     CALL dbcsr_dot(grad, step, numer)
    4139           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4140           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4141           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4142           0 :                                     beta = -1.0_dp*numer/denom
    4143             :                                  CASE (cg_hager_zhang)
    4144             :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4145             :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4146             :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4147             :                                     !CALL dbcsr_dot(tmp_k_blk,prev_grad,numer)
    4148             :                                     !kappa=2.0_dp*numer/denom
    4149             :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4150             :                                     !tau=numer/denom
    4151             :                                     !CALL dbcsr_dot(prev_step,grad,numer)
    4152             :                                     !beta=tau-kappa*numer/denom
    4153           0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4154           0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4155           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4156           0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_minus_prec_grad, numer)
    4157           0 :                                     kappa = -2.0_dp*numer/denom
    4158           0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4159           0 :                                     tau = -1.0_dp*numer/denom
    4160           0 :                                     CALL dbcsr_dot(prev_step, grad, numer)
    4161           0 :                                     beta = tau - kappa*numer/denom
    4162             :                                  CASE (cg_zero)
    4163           0 :                                     beta = 0.0_dp
    4164             :                                  CASE DEFAULT
    4165           0 :                                     CPABORT("illegal conjugator")
    4166             :                                  END SELECT
    4167             : 
    4168           0 :                                  IF (beta .LT. 0.0_dp) THEN
    4169           0 :                                     IF (unit_nr > 0) THEN
    4170           0 :                                        WRITE (unit_nr, *) "Beta is negative, ", beta
    4171             :                                     END IF
    4172             :                                     reset_conjugator = .TRUE.
    4173             :                                  END IF
    4174             : 
    4175             :                               END IF
    4176             : 
    4177           0 :                               IF (md_in_k_space) THEN
    4178             :                                  reset_conjugator = .TRUE.
    4179             :                               END IF
    4180             : 
    4181           0 :                               IF (reset_conjugator) THEN
    4182             : 
    4183           0 :                                  beta = 0.0_dp
    4184             :                                  !reset_step_size=.TRUE.
    4185             : 
    4186           0 :                                  IF (unit_nr > 0) THEN
    4187           0 :                                     WRITE (unit_nr, *) "(Re)-setting conjugator to zero"
    4188             :                                  END IF
    4189             : 
    4190             :                               END IF
    4191             : 
    4192             :                               ! save the preconditioned gradient
    4193           0 :                               CALL dbcsr_copy(prev_minus_prec_grad, step)
    4194             : 
    4195             :                               ! conjugate the step direction
    4196           0 :                               CALL dbcsr_add(step, prev_step, 1.0_dp, beta)
    4197             : 
    4198           0 :                               CALL timestop(handle7)
    4199             : 
    4200             :                               ! update the step direction
    4201             :                            ELSE ! step update
    4202           0 :                               conjugacy_error = 0.0_dp
    4203             :                            END IF
    4204             : 
    4205             :                            ! compute the gradient with respect to the step size in the curr direction
    4206           0 :                            IF (line_search) THEN
    4207           0 :                               CALL dbcsr_dot(grad, step, gfun1)
    4208           0 :                               line_search_error = gfun1/gfun0
    4209             :                            ELSE
    4210           0 :                               CALL dbcsr_dot(grad, step, gfun0)
    4211             :                            END IF
    4212             : 
    4213             :                            ! make a step - update k
    4214           0 :                            IF (line_search) THEN
    4215             : 
    4216             :                               ! check if the trial step provides enough numerical accuracy
    4217           0 :                               safety_multiplier = 1.0E+1_dp ! must be more than one
    4218             :                               num_threshold = MAX(EPSILON(1.0_dp), &
    4219           0 :                                                   safety_multiplier*(almo_scf_env%eps_filter**2)*almo_scf_env%ndomains)
    4220           0 :                               IF (ABS(fun1 - fun0 - gfun0*step_size) .LT. num_threshold) THEN
    4221           0 :                                  IF (unit_nr > 0) THEN
    4222             :                                     WRITE (unit_nr, '(T3,A,1X,E17.7)') &
    4223           0 :                                        "Numerical accuracy is too low to observe non-linear behavior", &
    4224           0 :                                        ABS(fun1 - fun0 - gfun0*step_size)
    4225           0 :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Error computing ", &
    4226           0 :                                        ABS(gfun0), &
    4227           0 :                                        " is smaller than the threshold", num_threshold
    4228             :                                  END IF
    4229           0 :                                  CPABORT("")
    4230             :                               END IF
    4231           0 :                               IF (ABS(gfun0) .LT. num_threshold) THEN
    4232           0 :                                  IF (unit_nr > 0) THEN
    4233           0 :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Linear gradient", &
    4234           0 :                                        ABS(gfun0), &
    4235           0 :                                        " is smaller than the threshold", num_threshold
    4236             :                                  END IF
    4237           0 :                                  CPABORT("")
    4238             :                               END IF
    4239             : 
    4240           0 :                               use_quadratic_approximation = .TRUE.
    4241           0 :                               use_cubic_approximation = .FALSE.
    4242             : 
    4243             :                               ! find the minimum assuming quadratic form
    4244             :                               ! use f0, f1, g0
    4245           0 :                               step_size_quadratic_approx = -(gfun0*step_size*step_size)/(2.0_dp*(fun1 - fun0 - gfun0*step_size))
    4246             :                               ! use f0, f1, g1
    4247           0 :                              step_size_quadratic_approx2 = -(fun1 - fun0 - step_size*gfun1/2.0_dp)/(gfun1 - (fun1 - fun0)/step_size)
    4248             : 
    4249           0 :                               IF ((step_size_quadratic_approx .LT. 0.0_dp) .AND. &
    4250             :                                   (step_size_quadratic_approx2 .LT. 0.0_dp)) THEN
    4251           0 :                                  IF (unit_nr > 0) THEN
    4252             :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,1X,E17.7,1X,A)') &
    4253           0 :                                        "Quadratic approximation gives negative steps", &
    4254           0 :                                        step_size_quadratic_approx, step_size_quadratic_approx2, &
    4255           0 :                                        "trying cubic..."
    4256             :                                  END IF
    4257             :                                  use_cubic_approximation = .TRUE.
    4258             :                                  use_quadratic_approximation = .FALSE.
    4259             :                               ELSE
    4260           0 :                                  IF (step_size_quadratic_approx .LT. 0.0_dp) THEN
    4261           0 :                                     step_size_quadratic_approx = step_size_quadratic_approx2
    4262             :                                  END IF
    4263           0 :                                  IF (step_size_quadratic_approx2 .LT. 0.0_dp) THEN
    4264           0 :                                     step_size_quadratic_approx2 = step_size_quadratic_approx
    4265             :                                  END IF
    4266             :                               END IF
    4267             : 
    4268             :                               ! check accuracy of the quadratic approximation
    4269             :                               IF (use_quadratic_approximation) THEN
    4270             :                                  quadratic_approx_error = ABS(step_size_quadratic_approx - &
    4271           0 :                                                               step_size_quadratic_approx2)/step_size_quadratic_approx
    4272           0 :                                  IF (quadratic_approx_error .GT. quadratic_approx_error_threshold) THEN
    4273           0 :                                     IF (unit_nr > 0) THEN
    4274           0 :                                        WRITE (unit_nr, '(T3,A,1X,E17.7,1X,E17.7,1X,A)') "Quadratic approximation is poor", &
    4275           0 :                                           step_size_quadratic_approx, step_size_quadratic_approx2, &
    4276           0 :                                           "Try cubic approximation"
    4277             :                                     END IF
    4278             :                                     use_cubic_approximation = .TRUE.
    4279             :                                     use_quadratic_approximation = .FALSE.
    4280             :                                  END IF
    4281             :                               END IF
    4282             : 
    4283             :                               ! check if numerics is fine enough to capture the cubic form
    4284           0 :                               IF (use_cubic_approximation) THEN
    4285             : 
    4286             :                                  ! if quadratic approximation is not accurate enough
    4287             :                                  ! try to find the minimum assuming cubic form
    4288             :                                  ! aa*x**3 + bb*x**2 + cc*x + dd = f(x)
    4289           0 :                                  bb = (-step_size*gfun1 + 3.0_dp*(fun1 - fun0) - 2.0_dp*step_size*gfun0)/(step_size*step_size)
    4290           0 :                                  aa = (gfun1 - 2.0_dp*step_size*bb - gfun0)/(3.0_dp*step_size*step_size)
    4291             : 
    4292           0 :                                  IF (ABS(gfun1 - 2.0_dp*step_size*bb - gfun0) .LT. num_threshold) THEN
    4293           0 :                                     IF (unit_nr > 0) THEN
    4294             :                                        WRITE (unit_nr, '(T3,A,1X,E17.7)') &
    4295           0 :                                           "Numerical accuracy is too low to observe cubic behavior", &
    4296           0 :                                           ABS(gfun1 - 2.0_dp*step_size*bb - gfun0)
    4297             :                                     END IF
    4298             :                                     use_cubic_approximation = .FALSE.
    4299             :                                     use_quadratic_approximation = .TRUE.
    4300             :                                  END IF
    4301           0 :                                  IF (ABS(gfun1) .LT. num_threshold) THEN
    4302           0 :                                     IF (unit_nr > 0) THEN
    4303           0 :                                        WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Linear gradient", &
    4304           0 :                                           ABS(gfun1), &
    4305           0 :                                           " is smaller than the threshold", num_threshold
    4306             :                                     END IF
    4307             :                                     use_cubic_approximation = .FALSE.
    4308             :                                     use_quadratic_approximation = .TRUE.
    4309             :                                  END IF
    4310             :                               END IF
    4311             : 
    4312             :                               ! find the step assuming cubic approximation
    4313           0 :                               IF (use_cubic_approximation) THEN
    4314             :                                  ! to obtain the minimum of the cubic function solve the quadratic equation
    4315             :                                  ! 0.0*x**3 + 3.0*aa*x**2 + 2.0*bb*x + cc = 0
    4316           0 :                                  CALL analytic_line_search(0.0_dp, 3.0_dp*aa, 2.0_dp*bb, gfun0, minima, nmins)
    4317           0 :                                  IF (nmins .LT. 1) THEN
    4318           0 :                                     IF (unit_nr > 0) THEN
    4319             :                                        WRITE (unit_nr, '(T3,A)') &
    4320           0 :                                           "Cubic approximation gives zero soultions! Use quadratic approximation"
    4321             :                                     END IF
    4322             :                                     use_quadratic_approximation = .TRUE.
    4323             :                                     use_cubic_approximation = .TRUE.
    4324             :                                  ELSE
    4325           0 :                                     step_size = minima(1)
    4326           0 :                                     IF (nmins .GT. 1) THEN
    4327           0 :                                        IF (unit_nr > 0) THEN
    4328             :                                           WRITE (unit_nr, '(T3,A)') &
    4329           0 :                                              "More than one solution found! Use quadratic approximation"
    4330             :                                        END IF
    4331             :                                        use_quadratic_approximation = .TRUE.
    4332           0 :                                        use_cubic_approximation = .TRUE.
    4333             :                                     END IF
    4334             :                                  END IF
    4335             :                               END IF
    4336             : 
    4337           0 :                               IF (use_quadratic_approximation) THEN ! use quadratic approximation
    4338           0 :                                  IF (unit_nr > 0) THEN
    4339           0 :                                     WRITE (unit_nr, '(T3,A)') "Use quadratic approximation"
    4340             :                                  END IF
    4341           0 :                                  step_size = (step_size_quadratic_approx + step_size_quadratic_approx2)*0.5_dp
    4342             :                               END IF
    4343             : 
    4344             :                               ! one more check on the step size
    4345           0 :                               IF (step_size .LT. 0.0_dp) THEN
    4346           0 :                                  CPABORT("Negative step proposed")
    4347             :                               END IF
    4348             : 
    4349             :                               CALL dbcsr_copy(almo_scf_env%matrix_k_blk(ispin), &
    4350           0 :                                               matrix_k_central)
    4351             :                               CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4352           0 :                                              step, 1.0_dp, step_size)
    4353             :                               CALL dbcsr_copy(matrix_k_central, &
    4354           0 :                                               almo_scf_env%matrix_k_blk(ispin))
    4355           0 :                               line_search = .FALSE.
    4356             : 
    4357             :                            ELSE
    4358             : 
    4359           0 :                               IF (md_in_k_space) THEN
    4360             : 
    4361             :                                  ! update velocities v(i) = v(i-1) + 0.5*dT*(a(i-1) + a(i))
    4362           0 :                                  IF (iteration .NE. 0) THEN
    4363             :                                     CALL dbcsr_add(velocity, &
    4364           0 :                                                    step, 1.0_dp, 0.5_dp*time_step)
    4365             :                                     CALL dbcsr_add(velocity, &
    4366           0 :                                                    prev_step, 1.0_dp, 0.5_dp*time_step)
    4367             :                                  END IF
    4368           0 :                                  kin_energy = dbcsr_frobenius_norm(velocity)
    4369           0 :                                  kin_energy = 0.5_dp*kin_energy*kin_energy
    4370             : 
    4371             :                                  ! update positions k(i) = k(i-1) + dT*v(i-1) + 0.5*dT*dT*a(i-1)
    4372             :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4373           0 :                                                 velocity, 1.0_dp, time_step)
    4374             :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4375           0 :                                                 step, 1.0_dp, 0.5_dp*time_step*time_step)
    4376             : 
    4377             :                               ELSE
    4378             : 
    4379           0 :                                  IF (reset_step_size) THEN
    4380           0 :                                     step_size = almo_scf_env%opt_k_trial_step_size
    4381           0 :                                     reset_step_size = .FALSE.
    4382             :                                  ELSE
    4383           0 :                                     step_size = step_size*almo_scf_env%opt_k_trial_step_size_multiplier
    4384             :                                  END IF
    4385             :                                  CALL dbcsr_copy(almo_scf_env%matrix_k_blk(ispin), &
    4386           0 :                                                  matrix_k_central)
    4387             :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4388           0 :                                                 step, 1.0_dp, step_size)
    4389           0 :                                  line_search = .TRUE.
    4390             :                               END IF
    4391             : 
    4392             :                            END IF
    4393             : 
    4394             :                         END IF ! .NOT.prepare_to_exit
    4395             : 
    4396             :                         ! print the status of the optimization
    4397           0 :                         t2a = m_walltime()
    4398           0 :                         IF (unit_nr > 0) THEN
    4399           0 :                            IF (md_in_k_space) THEN
    4400             :                               WRITE (unit_nr, '(T6,A,1X,I5,1X,E12.3,E16.7,F15.9,F15.9,F15.9,E12.3,F15.9,F15.9,F8.3)') &
    4401           0 :                                  "K iter CG", iteration, time_step, time_step*iteration, &
    4402           0 :                                  energy_correction(ispin), obj_function, delta_obj_function, grad_norm, &
    4403           0 :                                  kin_energy, kin_energy + obj_function, beta
    4404             :                            ELSE
    4405           0 :                               IF (line_search .OR. prepare_to_exit) THEN
    4406             :                                  WRITE (unit_nr, '(T6,A,1X,I3,1X,E12.3,F16.10,F16.10,E12.3,E12.3,E12.3,F8.3,F8.3,F10.3)') &
    4407           0 :                                     "K iter CG", iteration, step_size, &
    4408           0 :                                     energy_correction(ispin), delta_obj_function, grad_norm, &
    4409           0 :                                     gfun0, line_search_error, beta, conjugacy_error, t2a - t1a
    4410             :                                  !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    4411             :                               ELSE
    4412             :                                  WRITE (unit_nr, '(T6,A,1X,I3,1X,E12.3,F16.10,F16.10,E12.3,E12.3,E12.3,F8.3,F8.3,F10.3)') &
    4413           0 :                                     "K iter LS", iteration, step_size, &
    4414           0 :                                     energy_correction(ispin), delta_obj_function, grad_norm, &
    4415           0 :                                     gfun1, line_search_error, beta, conjugacy_error, t2a - t1a
    4416             :                                  !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    4417             :                               END IF
    4418             :                            END IF
    4419           0 :                            CALL m_flush(unit_nr)
    4420             :                         END IF
    4421           0 :                         t1a = m_walltime()
    4422             : 
    4423             :                      ELSE ! opt_k_max_iter .eq. 0
    4424             :                         prepare_to_exit = .TRUE.
    4425             :                      END IF ! opt_k_max_iter .ne. 0
    4426             : 
    4427           0 :                      IF (.NOT. line_search) iteration = iteration + 1
    4428             : 
    4429           0 :                      IF (prepare_to_exit) EXIT
    4430             : 
    4431             :                   END DO ! end iterations on K
    4432             : 
    4433           0 :                   IF (converged .OR. (outer_opt_k_iteration .GE. outer_opt_k_max_iter)) THEN
    4434           0 :                      outer_opt_k_prepare_to_exit = .TRUE.
    4435             :                   END IF
    4436             : 
    4437           0 :                   IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    4438             : 
    4439           0 :                      IF (unit_nr > 0) THEN
    4440           0 :                         WRITE (unit_nr, *) "Updating ALMO virtuals"
    4441             :                      END IF
    4442             : 
    4443           0 :                      CALL timeset('k_opt_v0_update', handle8)
    4444             : 
    4445             :                      ! update retained ALMO virtuals to restart the cg iterations
    4446             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4447             :                                          almo_scf_env%matrix_v_disc_blk(ispin), &
    4448             :                                          almo_scf_env%matrix_k_blk(ispin), &
    4449             :                                          0.0_dp, vr_fixed, &
    4450           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4451             :                      CALL dbcsr_add(vr_fixed, almo_scf_env%matrix_v_blk(ispin), &
    4452           0 :                                     +1.0_dp, +1.0_dp)
    4453             : 
    4454             :                      ! update discarded ALMO virtuals to restart the cg iterations
    4455             :                      CALL dbcsr_multiply("N", "T", 1.0_dp, &
    4456             :                                          almo_scf_env%matrix_v_blk(ispin), &
    4457             :                                          almo_scf_env%matrix_k_blk(ispin), &
    4458             :                                          0.0_dp, vd_fixed, &
    4459           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4460             :                      CALL dbcsr_add(vd_fixed, almo_scf_env%matrix_v_disc_blk(ispin), &
    4461           0 :                                     -1.0_dp, +1.0_dp)
    4462             : 
    4463             :                      ! orthogonalize new orbitals on fragments
    4464             :                      CALL get_overlap(bra=vr_fixed, &
    4465             :                                       ket=vr_fixed, &
    4466             :                                       overlap=k_vr_index_down, &
    4467             :                                       metric=almo_scf_env%matrix_s_blk(1), &
    4468             :                                       retain_overlap_sparsity=.FALSE., &
    4469           0 :                                       eps_filter=almo_scf_env%eps_filter)
    4470             :                      CALL dbcsr_create(vr_index_sqrt_inv, template=k_vr_index_down, &
    4471           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4472             :                      CALL dbcsr_create(vr_index_sqrt, template=k_vr_index_down, &
    4473           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4474             :                      CALL matrix_sqrt_Newton_Schulz(vr_index_sqrt, &
    4475             :                                                     vr_index_sqrt_inv, &
    4476             :                                                     k_vr_index_down, &
    4477             :                                                     threshold=almo_scf_env%eps_filter, &
    4478             :                                                     order=almo_scf_env%order_lanczos, &
    4479             :                                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4480           0 :                                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4481             :                      IF (safe_mode) THEN
    4482             :                         CALL dbcsr_create(matrix_tmp1, template=k_vr_index_down, &
    4483             :                                           matrix_type=dbcsr_type_no_symmetry)
    4484             :                         CALL dbcsr_create(matrix_tmp2, template=k_vr_index_down, &
    4485             :                                           matrix_type=dbcsr_type_no_symmetry)
    4486             : 
    4487             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vr_index_sqrt_inv, &
    4488             :                                             k_vr_index_down, &
    4489             :                                             0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4490             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    4491             :                                             vr_index_sqrt_inv, &
    4492             :                                             0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4493             : 
    4494             :                         frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4495             :                         CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4496             :                         frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4497             :                         IF (unit_nr > 0) THEN
    4498             :                            WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    4499             :                               frob_matrix/frob_matrix_base
    4500             :                         END IF
    4501             : 
    4502             :                         CALL dbcsr_release(matrix_tmp1)
    4503             :                         CALL dbcsr_release(matrix_tmp2)
    4504             :                      END IF
    4505             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4506             :                                          vr_fixed, &
    4507             :                                          vr_index_sqrt_inv, &
    4508             :                                          0.0_dp, almo_scf_env%matrix_v_blk(ispin), &
    4509           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4510             : 
    4511             :                      CALL get_overlap(bra=vd_fixed, &
    4512             :                                       ket=vd_fixed, &
    4513             :                                       overlap=k_vd_index_down, &
    4514             :                                       metric=almo_scf_env%matrix_s_blk(1), &
    4515             :                                       retain_overlap_sparsity=.FALSE., &
    4516           0 :                                       eps_filter=almo_scf_env%eps_filter)
    4517             :                      CALL dbcsr_create(vd_index_sqrt_inv, template=k_vd_index_down, &
    4518           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4519             :                      CALL dbcsr_create(vd_index_sqrt, template=k_vd_index_down, &
    4520           0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4521             :                      CALL matrix_sqrt_Newton_Schulz(vd_index_sqrt, &
    4522             :                                                     vd_index_sqrt_inv, &
    4523             :                                                     k_vd_index_down, &
    4524             :                                                     threshold=almo_scf_env%eps_filter, &
    4525             :                                                     order=almo_scf_env%order_lanczos, &
    4526             :                                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4527           0 :                                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4528             :                      IF (safe_mode) THEN
    4529             :                         CALL dbcsr_create(matrix_tmp1, template=k_vd_index_down, &
    4530             :                                           matrix_type=dbcsr_type_no_symmetry)
    4531             :                         CALL dbcsr_create(matrix_tmp2, template=k_vd_index_down, &
    4532             :                                           matrix_type=dbcsr_type_no_symmetry)
    4533             : 
    4534             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vd_index_sqrt_inv, &
    4535             :                                             k_vd_index_down, &
    4536             :                                             0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4537             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    4538             :                                             vd_index_sqrt_inv, &
    4539             :                                             0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4540             : 
    4541             :                         frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4542             :                         CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4543             :                         frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4544             :                         IF (unit_nr > 0) THEN
    4545             :                            WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    4546             :                               frob_matrix/frob_matrix_base
    4547             :                         END IF
    4548             : 
    4549             :                         CALL dbcsr_release(matrix_tmp1)
    4550             :                         CALL dbcsr_release(matrix_tmp2)
    4551             :                      END IF
    4552             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4553             :                                          vd_fixed, &
    4554             :                                          vd_index_sqrt_inv, &
    4555             :                                          0.0_dp, almo_scf_env%matrix_v_disc_blk(ispin), &
    4556           0 :                                          filter_eps=almo_scf_env%eps_filter)
    4557             : 
    4558           0 :                      CALL dbcsr_release(vr_index_sqrt_inv)
    4559           0 :                      CALL dbcsr_release(vr_index_sqrt)
    4560           0 :                      CALL dbcsr_release(vd_index_sqrt_inv)
    4561           0 :                      CALL dbcsr_release(vd_index_sqrt)
    4562             : 
    4563           0 :                      CALL timestop(handle8)
    4564             : 
    4565             :                   END IF ! ne.virt_full
    4566             : 
    4567             :                   ! RZK-warning released outside the outer loop
    4568           0 :                   CALL dbcsr_release(sigma_vv_sqrt)
    4569           0 :                   CALL dbcsr_release(sigma_vv_sqrt_inv)
    4570           0 :                   IF (almo_scf_env%deloc_truncate_virt .NE. virt_full) THEN
    4571           0 :                      CALL dbcsr_release(k_vr_index_down)
    4572           0 :                      CALL dbcsr_release(k_vd_index_down)
    4573             :                      !CALL dbcsr_release(k_vd_index_up)
    4574           0 :                      CALL dbcsr_release(matrix_k_central)
    4575           0 :                      CALL dbcsr_release(vr_fixed)
    4576           0 :                      CALL dbcsr_release(vd_fixed)
    4577           0 :                      CALL dbcsr_release(grad)
    4578           0 :                      CALL dbcsr_release(prec)
    4579           0 :                      CALL dbcsr_release(prev_grad)
    4580           0 :                      CALL dbcsr_release(tmp3_vd_vr)
    4581           0 :                      CALL dbcsr_release(tmp1_n_vr)
    4582           0 :                      CALL dbcsr_release(tmp_k_blk)
    4583           0 :                      CALL dbcsr_release(t_curr)
    4584           0 :                      CALL dbcsr_release(sigma_oo_curr)
    4585           0 :                      CALL dbcsr_release(sigma_oo_curr_inv)
    4586           0 :                      CALL dbcsr_release(step)
    4587           0 :                      CALL dbcsr_release(tmp2_n_o)
    4588           0 :                      CALL dbcsr_release(tmp4_o_vr)
    4589           0 :                      CALL dbcsr_release(prev_step)
    4590           0 :                      CALL dbcsr_release(prev_minus_prec_grad)
    4591           0 :                      IF (md_in_k_space) THEN
    4592           0 :                         CALL dbcsr_release(velocity)
    4593             :                      END IF
    4594             : 
    4595             :                   END IF
    4596             : 
    4597           0 :                   outer_opt_k_iteration = outer_opt_k_iteration + 1
    4598           0 :                   IF (outer_opt_k_prepare_to_exit) EXIT
    4599             : 
    4600             :                END DO ! outer loop for k
    4601             : 
    4602             :             END DO ! ispin
    4603             : 
    4604             :             ! RZK-warning update mo orbitals
    4605             : 
    4606             :          ELSE ! virtual orbitals might not be available use projected AOs
    4607             : 
    4608             :             ! compute sqrt(S) and inv(sqrt(S))
    4609             :             ! RZK-warning - remove this sqrt(S) and inv(sqrt(S))
    4610             :             ! ideally ALMO scf should use sigma and sigma_inv in
    4611             :             ! the tensor_up_down representation
    4612           0 :             IF (.NOT. almo_scf_env%s_sqrt_done) THEN
    4613             : 
    4614           0 :                IF (unit_nr > 0) THEN
    4615           0 :                   WRITE (unit_nr, *) "sqrt and inv(sqrt) of AO overlap matrix"
    4616             :                END IF
    4617             :                CALL dbcsr_create(almo_scf_env%matrix_s_sqrt(1), &
    4618             :                                  template=almo_scf_env%matrix_s(1), &
    4619           0 :                                  matrix_type=dbcsr_type_no_symmetry)
    4620             :                CALL dbcsr_create(almo_scf_env%matrix_s_sqrt_inv(1), &
    4621             :                                  template=almo_scf_env%matrix_s(1), &
    4622           0 :                                  matrix_type=dbcsr_type_no_symmetry)
    4623             : 
    4624             :                CALL matrix_sqrt_Newton_Schulz(almo_scf_env%matrix_s_sqrt(1), &
    4625             :                                               almo_scf_env%matrix_s_sqrt_inv(1), &
    4626             :                                               almo_scf_env%matrix_s(1), &
    4627             :                                               threshold=almo_scf_env%eps_filter, &
    4628             :                                               order=almo_scf_env%order_lanczos, &
    4629             :                                               eps_lanczos=almo_scf_env%eps_lanczos, &
    4630           0 :                                               max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4631             : 
    4632             :                IF (safe_mode) THEN
    4633             :                   CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_s(1), &
    4634             :                                     matrix_type=dbcsr_type_no_symmetry)
    4635             :                   CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_s(1), &
    4636             :                                     matrix_type=dbcsr_type_no_symmetry)
    4637             : 
    4638             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_sqrt_inv(1), &
    4639             :                                       almo_scf_env%matrix_s(1), &
    4640             :                                       0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4641             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, almo_scf_env%matrix_s_sqrt_inv(1), &
    4642             :                                       0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4643             : 
    4644             :                   frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4645             :                   CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4646             :                   frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4647             :                   IF (unit_nr > 0) THEN
    4648             :                      WRITE (unit_nr, *) "Error for (inv(sqrt(S))*S*inv(sqrt(S))-I)", frob_matrix/frob_matrix_base
    4649             :                   END IF
    4650             : 
    4651             :                   CALL dbcsr_release(matrix_tmp1)
    4652             :                   CALL dbcsr_release(matrix_tmp2)
    4653             :                END IF
    4654             : 
    4655           0 :                almo_scf_env%s_sqrt_done = .TRUE.
    4656             : 
    4657             :             END IF
    4658             : 
    4659           0 :             DO ispin = 1, nspin
    4660             : 
    4661           0 :                CALL ct_step_env_init(ct_step_env)
    4662             :                CALL ct_step_env_set(ct_step_env, &
    4663             :                                     para_env=almo_scf_env%para_env, &
    4664             :                                     blacs_env=almo_scf_env%blacs_env, &
    4665             :                                     use_occ_orbs=.TRUE., &
    4666             :                                     use_virt_orbs=almo_scf_env%deloc_cayley_use_virt_orbs, &
    4667             :                                     occ_orbs_orthogonal=.FALSE., &
    4668             :                                     virt_orbs_orthogonal=almo_scf_env%orthogonal_basis, &
    4669             :                                     tensor_type=almo_scf_env%deloc_cayley_tensor_type, &
    4670             :                                     neglect_quadratic_term=almo_scf_env%deloc_cayley_linear, &
    4671             :                                     calculate_energy_corr=.TRUE., &
    4672             :                                     update_p=.TRUE., &
    4673             :                                     update_q=.FALSE., &
    4674             :                                     pp_preconditioner_full=almo_scf_env%deloc_cayley_occ_precond, &
    4675             :                                     qq_preconditioner_full=almo_scf_env%deloc_cayley_vir_precond, &
    4676             :                                     eps_convergence=almo_scf_env%deloc_cayley_eps_convergence, &
    4677             :                                     eps_filter=almo_scf_env%eps_filter, &
    4678             :                                     !nspins=almo_scf_env%nspins,&
    4679             :                                     q_index_up=almo_scf_env%matrix_s_sqrt_inv(1), &
    4680             :                                     q_index_down=almo_scf_env%matrix_s_sqrt(1), &
    4681             :                                     p_index_up=almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    4682             :                                     p_index_down=almo_scf_env%matrix_sigma_sqrt(ispin), &
    4683             :                                     matrix_ks=almo_scf_env%matrix_ks_0deloc(ispin), &
    4684             :                                     matrix_p=almo_scf_env%matrix_p(ispin), &
    4685             :                                     matrix_qp_template=almo_scf_env%matrix_t(ispin), &
    4686             :                                     matrix_pq_template=almo_scf_env%matrix_t_tr(ispin), &
    4687             :                                     matrix_t=almo_scf_env%matrix_t(ispin), &
    4688             :                                     conjugator=almo_scf_env%deloc_cayley_conjugator, &
    4689           0 :                                     max_iter=almo_scf_env%deloc_cayley_max_iter)
    4690             : 
    4691             :                ! perform calculations
    4692           0 :                CALL ct_step_execute(ct_step_env)
    4693             : 
    4694             :                ! for now we do not need the new set of orbitals
    4695             :                ! just get the energy correction
    4696             :                CALL ct_step_env_get(ct_step_env, &
    4697           0 :                                     energy_correction=energy_correction(ispin))
    4698             :                !copy_da_energy_matrix=matrix_eda(ispin),&
    4699             :                !copy_da_charge_matrix=matrix_cta(ispin),&
    4700             : 
    4701           0 :                CALL ct_step_env_clean(ct_step_env)
    4702             : 
    4703             :             END DO
    4704             : 
    4705           0 :             energy_correction(1) = energy_correction(1)*spin_factor
    4706             : 
    4707             :          END IF
    4708             : 
    4709             :          ! print the energy correction and exit
    4710           0 :          DO ispin = 1, nspin
    4711             : 
    4712           0 :             IF (unit_nr > 0) THEN
    4713           0 :                WRITE (unit_nr, *)
    4714           0 :                WRITE (unit_nr, '(T2,A,I6,F20.9)') "ECORR", ispin, &
    4715           0 :                   energy_correction(ispin)
    4716           0 :                WRITE (unit_nr, *)
    4717             :             END IF
    4718           0 :             energy_correction_final = energy_correction_final + energy_correction(ispin)
    4719             : 
    4720             :             !!! print out the results of decomposition analysis
    4721             :             !!IF (unit_nr>0) THEN
    4722             :             !!   WRITE(unit_nr,*)
    4723             :             !!   WRITE(unit_nr,'(T2,A)') "ENERGY DECOMPOSITION"
    4724             :             !!ENDIF
    4725             :             !!CALL dbcsr_print_block_sum(eda_matrix(ispin))
    4726             :             !!IF (unit_nr>0) THEN
    4727             :             !!   WRITE(unit_nr,*)
    4728             :             !!   WRITE(unit_nr,'(T2,A)') "CHARGE DECOMPOSITION"
    4729             :             !!ENDIF
    4730             :             !!CALL dbcsr_print_block_sum(cta_matrix(ispin))
    4731             : 
    4732             :             ! obtain density matrix from updated MOs
    4733             :             ! RZK-later sigma and sigma_inv are lost here
    4734             :             CALL almo_scf_t_to_proj(t=almo_scf_env%matrix_t(ispin), &
    4735             :                                     p=almo_scf_env%matrix_p(ispin), &
    4736             :                                     eps_filter=almo_scf_env%eps_filter, &
    4737             :                                     orthog_orbs=.FALSE., &
    4738             :                                     nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    4739             :                                     s=almo_scf_env%matrix_s(1), &
    4740             :                                     sigma=almo_scf_env%matrix_sigma(ispin), &
    4741             :                                     sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
    4742             :                                     !use_guess=use_guess, &
    4743             :                                     algorithm=almo_scf_env%sigma_inv_algorithm, &
    4744             :                                     inverse_accelerator=almo_scf_env%order_lanczos, &
    4745             :                                     inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
    4746             :                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4747             :                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    4748             :                                     para_env=almo_scf_env%para_env, &
    4749           0 :                                     blacs_env=almo_scf_env%blacs_env)
    4750             : 
    4751           0 :             IF (almo_scf_env%nspins == 1) &
    4752             :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
    4753           0 :                                 spin_factor)
    4754             : 
    4755             :          END DO
    4756             : 
    4757             :       CASE (dm_ls_step)
    4758             : 
    4759             :          ! compute the inverse of S
    4760           0 :          IF (.NOT. almo_scf_env%s_inv_done) THEN
    4761           0 :             IF (unit_nr > 0) THEN
    4762           0 :                WRITE (unit_nr, *) "Inverting AO overlap matrix"
    4763             :             END IF
    4764             :             CALL dbcsr_create(almo_scf_env%matrix_s_inv(1), &
    4765             :                               template=almo_scf_env%matrix_s(1), &
    4766           0 :                               matrix_type=dbcsr_type_no_symmetry)
    4767           0 :             IF (.NOT. almo_scf_env%s_sqrt_done) THEN
    4768             :                CALL invert_Hotelling(almo_scf_env%matrix_s_inv(1), &
    4769             :                                      almo_scf_env%matrix_s(1), &
    4770           0 :                                      threshold=almo_scf_env%eps_filter)
    4771             :             ELSE
    4772             :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_sqrt_inv(1), &
    4773             :                                    almo_scf_env%matrix_s_sqrt_inv(1), &
    4774             :                                    0.0_dp, almo_scf_env%matrix_s_inv(1), &
    4775           0 :                                    filter_eps=almo_scf_env%eps_filter)
    4776             :             END IF
    4777             : 
    4778             :             IF (safe_mode) THEN
    4779             :                CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_s(1), &
    4780             :                                  matrix_type=dbcsr_type_no_symmetry)
    4781             :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_inv(1), &
    4782             :                                    almo_scf_env%matrix_s(1), &
    4783             :                                    0.0_dp, matrix_tmp1, &
    4784             :                                    filter_eps=almo_scf_env%eps_filter)
    4785             :                frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    4786             :                CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    4787             :                frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    4788             :                IF (unit_nr > 0) THEN
    4789             :                   WRITE (unit_nr, *) "Error for (inv(S)*S-I)", &
    4790             :                      frob_matrix/frob_matrix_base
    4791             :                END IF
    4792             :                CALL dbcsr_release(matrix_tmp1)
    4793             :             END IF
    4794             : 
    4795           0 :             almo_scf_env%s_inv_done = .TRUE.
    4796             : 
    4797             :          END IF
    4798             : 
    4799           0 :          DO ispin = 1, nspin
    4800             :             ! RZK-warning the preconditioner is very important
    4801             :             !       IF (.FALSE.) THEN
    4802             :             !           CALL apply_matrix_preconditioner(almo_scf_env%matrix_ks(ispin),&
    4803             :             !                   "forward",almo_scf_env%matrix_s_blk_sqrt(1),&
    4804             :             !                   almo_scf_env%matrix_s_blk_sqrt_inv(1))
    4805             :             !       ENDIF
    4806             :             !CALL dbcsr_filter(almo_scf_env%matrix_ks(ispin),&
    4807             :             !         almo_scf_env%eps_filter)
    4808             :          END DO
    4809             : 
    4810           0 :          ALLOCATE (matrix_p_almo_scf_converged(nspin))
    4811           0 :          DO ispin = 1, nspin
    4812             :             CALL dbcsr_create(matrix_p_almo_scf_converged(ispin), &
    4813           0 :                               template=almo_scf_env%matrix_p(ispin))
    4814             :             CALL dbcsr_copy(matrix_p_almo_scf_converged(ispin), &
    4815           0 :                             almo_scf_env%matrix_p(ispin))
    4816             :          END DO
    4817             : 
    4818             :          ! update the density matrix
    4819           0 :          DO ispin = 1, nspin
    4820             : 
    4821           0 :             nelectron_spin_real(1) = almo_scf_env%nelectrons_spin(ispin)
    4822           0 :             IF (almo_scf_env%nspins == 1) &
    4823           0 :                nelectron_spin_real(1) = nelectron_spin_real(1)/2
    4824             : 
    4825           0 :             local_mu(1) = SUM(almo_scf_env%mu_of_domain(:, ispin))/almo_scf_env%ndomains
    4826           0 :             fake(1) = 123523
    4827             : 
    4828             :             ! RZK UPDATE! the update algorithm is removed because
    4829             :             ! RZK UPDATE! it requires updating core LS_SCF routines
    4830             :             ! RZK UPDATE! (the code exists in the CVS version)
    4831           0 :             CPABORT("CVS only: density_matrix_sign has not been updated in SVN")
    4832             :             ! RZK UPDATE!CALL density_matrix_sign(almo_scf_env%matrix_p(ispin),&
    4833             :             ! RZK UPDATE!                     local_mu,&
    4834             :             ! RZK UPDATE!                     almo_scf_env%fixed_mu,&
    4835             :             ! RZK UPDATE!                     almo_scf_env%matrix_ks_0deloc(ispin),&
    4836             :             ! RZK UPDATE!                     almo_scf_env%matrix_s(1), &
    4837             :             ! RZK UPDATE!                     almo_scf_env%matrix_s_inv(1), &
    4838             :             ! RZK UPDATE!                     nelectron_spin_real,&
    4839             :             ! RZK UPDATE!                     almo_scf_env%eps_filter,&
    4840             :             ! RZK UPDATE!                     fake)
    4841             :             ! RZK UPDATE!
    4842           0 :             almo_scf_env%mu = local_mu(1)
    4843             : 
    4844             :             !IF (almo_scf_env%has_s_preconditioner) THEN
    4845             :             !    CALL apply_matrix_preconditioner(&
    4846             :             !             almo_scf_env%matrix_p_blk(ispin),&
    4847             :             !             "forward",almo_scf_env%matrix_s_blk_sqrt(1),&
    4848             :             !             almo_scf_env%matrix_s_blk_sqrt_inv(1))
    4849             :             !ENDIF
    4850             :             !CALL dbcsr_filter(almo_scf_env%matrix_p(ispin),&
    4851             :             !        almo_scf_env%eps_filter)
    4852             : 
    4853           0 :             IF (almo_scf_env%nspins == 1) &
    4854             :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
    4855           0 :                                 spin_factor)
    4856             : 
    4857             :             !CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin),&
    4858             :             !  almo_scf_env%matrix_p(ispin),&
    4859             :             !  energy_correction(ispin))
    4860             :             !IF (unit_nr>0) THEN
    4861             :             !   WRITE(unit_nr,*)
    4862             :             !   WRITE(unit_nr,'(T2,A,I6,F20.9)') "EFAKE",ispin,&
    4863             :             !           energy_correction(ispin)
    4864             :             !   WRITE(unit_nr,*)
    4865             :             !ENDIF
    4866             :             CALL dbcsr_add(matrix_p_almo_scf_converged(ispin), &
    4867           0 :                            almo_scf_env%matrix_p(ispin), -1.0_dp, 1.0_dp)
    4868             :             CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin), &
    4869             :                            matrix_p_almo_scf_converged(ispin), &
    4870           0 :                            energy_correction(ispin))
    4871             : 
    4872           0 :             energy_correction_final = energy_correction_final + energy_correction(ispin)
    4873             : 
    4874           0 :             IF (unit_nr > 0) THEN
    4875           0 :                WRITE (unit_nr, *)
    4876           0 :                WRITE (unit_nr, '(T2,A,I6,F20.9)') "ECORR", ispin, &
    4877           0 :                   energy_correction(ispin)
    4878           0 :                WRITE (unit_nr, *)
    4879             :             END IF
    4880             : 
    4881             :          END DO
    4882             : 
    4883           0 :          DO ispin = 1, nspin
    4884           0 :             CALL dbcsr_release(matrix_p_almo_scf_converged(ispin))
    4885             :          END DO
    4886           0 :          DEALLOCATE (matrix_p_almo_scf_converged)
    4887             : 
    4888             :       END SELECT ! algorithm selection
    4889             : 
    4890           0 :       t2 = m_walltime()
    4891             : 
    4892           0 :       IF (unit_nr > 0) THEN
    4893           0 :          WRITE (unit_nr, *)
    4894           0 :          WRITE (unit_nr, '(T2,A,F18.9,F18.9,F18.9,F12.6)') "ETOT", &
    4895           0 :             almo_scf_env%almo_scf_energy, &
    4896           0 :             energy_correction_final, &
    4897           0 :             almo_scf_env%almo_scf_energy + energy_correction_final, &
    4898           0 :             t2 - t1
    4899           0 :          WRITE (unit_nr, *)
    4900             :       END IF
    4901             : 
    4902           0 :       CALL timestop(handle)
    4903             : 
    4904           0 :    END SUBROUTINE harris_foulkes_correction
    4905             : 
    4906             : ! **************************************************************************************************
    4907             : !> \brief Computes a diagonal preconditioner for the cg optimization of k matrix
    4908             : !> \param prec ...
    4909             : !> \param vd_prop ...
    4910             : !> \param f ...
    4911             : !> \param x ...
    4912             : !> \param oo_inv_x_tr ...
    4913             : !> \param s ...
    4914             : !> \param grad ...
    4915             : !> \param vd_blk ...
    4916             : !> \param t ...
    4917             : !> \param template_vd_vd_blk ...
    4918             : !> \param template_vr_vr_blk ...
    4919             : !> \param template_n_vr ...
    4920             : !> \param spin_factor ...
    4921             : !> \param eps_filter ...
    4922             : !> \par History
    4923             : !>       2011.09 created [Rustam Z Khaliullin]
    4924             : !> \author Rustam Z Khaliullin
    4925             : ! **************************************************************************************************
    4926           0 :    SUBROUTINE opt_k_create_preconditioner(prec, vd_prop, f, x, oo_inv_x_tr, s, grad, &
    4927             :                                           vd_blk, t, template_vd_vd_blk, template_vr_vr_blk, template_n_vr, &
    4928             :                                           spin_factor, eps_filter)
    4929             : 
    4930             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: prec
    4931             :       TYPE(dbcsr_type), INTENT(IN)                       :: vd_prop, f, x, oo_inv_x_tr, s, grad, &
    4932             :                                                             vd_blk, t, template_vd_vd_blk, &
    4933             :                                                             template_vr_vr_blk, template_n_vr
    4934             :       REAL(KIND=dp), INTENT(IN)                          :: spin_factor, eps_filter
    4935             : 
    4936             :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_create_preconditioner'
    4937             : 
    4938             :       INTEGER                                            :: handle, p_nrows, q_nrows
    4939           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: p_diagonal, q_diagonal
    4940             :       TYPE(dbcsr_type)                                   :: pp_diag, qq_diag, t1, t2, tmp, &
    4941             :                                                             tmp1_n_vr, tmp2_n_vr, tmp_n_vd, &
    4942             :                                                             tmp_vd_vd_blk, tmp_vr_vr_blk
    4943             : 
    4944             : ! init diag blocks outside
    4945             : ! init diag blocks otside
    4946             : !INTEGER                                  :: iblock_row, iblock_col,&
    4947             : !                                            nblkrows_tot, nblkcols_tot
    4948             : !REAL(KIND=dp), DIMENSION(:, :), POINTER  :: p_new_block
    4949             : !INTEGER                                  :: mynode, hold, row, col
    4950             : 
    4951           0 :       CALL timeset(routineN, handle)
    4952             : 
    4953             :       ! initialize a matrix to 1.0
    4954           0 :       CALL dbcsr_create(tmp, template=prec)
    4955             :       ! in order to use dbcsr_set matrix blocks must exist
    4956           0 :       CALL dbcsr_copy(tmp, prec)
    4957           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    4958             : 
    4959             :       ! compute qq = (Vd^tr)*F*Vd
    4960           0 :       CALL dbcsr_create(tmp_n_vd, template=vd_prop)
    4961             :       CALL dbcsr_multiply("N", "N", 1.0_dp, f, vd_prop, &
    4962           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    4963             :       CALL dbcsr_create(tmp_vd_vd_blk, &
    4964           0 :                         template=template_vd_vd_blk)
    4965           0 :       CALL dbcsr_copy(tmp_vd_vd_blk, template_vd_vd_blk)
    4966             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    4967             :                           0.0_dp, tmp_vd_vd_blk, &
    4968             :                           retain_sparsity=.TRUE., &
    4969           0 :                           filter_eps=eps_filter)
    4970             :       ! copy diagonal elements of the result into rows of a matrix
    4971           0 :       CALL dbcsr_get_info(tmp_vd_vd_blk, nfullrows_total=q_nrows)
    4972           0 :       ALLOCATE (q_diagonal(q_nrows))
    4973           0 :       CALL dbcsr_get_diag(tmp_vd_vd_blk, q_diagonal)
    4974             :       CALL dbcsr_create(qq_diag, &
    4975           0 :                         template=template_vd_vd_blk)
    4976           0 :       CALL dbcsr_add_on_diag(qq_diag, 1.0_dp)
    4977           0 :       CALL dbcsr_set_diag(qq_diag, q_diagonal)
    4978           0 :       CALL dbcsr_create(t1, template=prec)
    4979             :       CALL dbcsr_multiply("N", "N", 1.0_dp, qq_diag, tmp, &
    4980           0 :                           0.0_dp, t1, filter_eps=eps_filter)
    4981             : 
    4982             :       ! compute pp = X*sigma_oo_inv*X^tr
    4983           0 :       CALL dbcsr_create(tmp_vr_vr_blk, template=template_vr_vr_blk)
    4984           0 :       CALL dbcsr_copy(tmp_vr_vr_blk, template_vr_vr_blk)
    4985             :       CALL dbcsr_multiply("N", "N", 1.0_dp, x, oo_inv_x_tr, &
    4986             :                           0.0_dp, tmp_vr_vr_blk, &
    4987             :                           retain_sparsity=.TRUE., &
    4988           0 :                           filter_eps=eps_filter)
    4989             :       ! copy diagonal elements of the result into cols of a matrix
    4990           0 :       CALL dbcsr_get_info(tmp_vr_vr_blk, nfullrows_total=p_nrows)
    4991           0 :       ALLOCATE (p_diagonal(p_nrows))
    4992           0 :       CALL dbcsr_get_diag(tmp_vr_vr_blk, p_diagonal)
    4993           0 :       CALL dbcsr_create(pp_diag, template=template_vr_vr_blk)
    4994           0 :       CALL dbcsr_add_on_diag(pp_diag, 1.0_dp)
    4995           0 :       CALL dbcsr_set_diag(pp_diag, p_diagonal)
    4996           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    4997           0 :       CALL dbcsr_create(t2, template=prec)
    4998             :       CALL dbcsr_multiply("N", "N", 1.0_dp, tmp, pp_diag, &
    4999           0 :                           0.0_dp, t2, filter_eps=eps_filter)
    5000             : 
    5001           0 :       CALL dbcsr_hadamard_product(t1, t2, prec)
    5002             : 
    5003             :       ! compute qq = (Vd^tr)*S*Vd
    5004             :       CALL dbcsr_multiply("N", "N", 1.0_dp, s, vd_prop, &
    5005           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5006             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5007             :                           0.0_dp, tmp_vd_vd_blk, &
    5008             :                           retain_sparsity=.TRUE., &
    5009           0 :                           filter_eps=eps_filter)
    5010             :       ! copy diagonal elements of the result into rows of a matrix
    5011           0 :       CALL dbcsr_get_diag(tmp_vd_vd_blk, q_diagonal)
    5012           0 :       CALL dbcsr_add_on_diag(qq_diag, 1.0_dp)
    5013           0 :       CALL dbcsr_set_diag(qq_diag, q_diagonal)
    5014           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    5015             :       CALL dbcsr_multiply("N", "N", 1.0_dp, qq_diag, tmp, &
    5016           0 :                           0.0_dp, t1, filter_eps=eps_filter)
    5017             : 
    5018             :       ! compute pp = X*sig_oo_inv*(T^tr)*F*T*sig_oo_inv*(X^tr)
    5019           0 :       CALL dbcsr_create(tmp1_n_vr, template=template_n_vr)
    5020           0 :       CALL dbcsr_create(tmp2_n_vr, template=template_n_vr)
    5021             :       CALL dbcsr_multiply("N", "N", 1.0_dp, t, oo_inv_x_tr, &
    5022           0 :                           0.0_dp, tmp1_n_vr, filter_eps=eps_filter)
    5023             :       CALL dbcsr_multiply("N", "N", 1.0_dp, f, tmp1_n_vr, &
    5024           0 :                           0.0_dp, tmp2_n_vr, filter_eps=eps_filter)
    5025             :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp1_n_vr, tmp2_n_vr, &
    5026             :                           0.0_dp, tmp_vr_vr_blk, &
    5027             :                           retain_sparsity=.TRUE., &
    5028           0 :                           filter_eps=eps_filter)
    5029             :       ! copy diagonal elements of the result into cols of a matrix
    5030           0 :       CALL dbcsr_get_diag(tmp_vr_vr_blk, p_diagonal)
    5031           0 :       CALL dbcsr_add_on_diag(pp_diag, 1.0_dp)
    5032           0 :       CALL dbcsr_set_diag(pp_diag, p_diagonal)
    5033           0 :       CALL dbcsr_set(tmp, 1.0_dp)
    5034             :       CALL dbcsr_multiply("N", "N", 1.0_dp, tmp, pp_diag, &
    5035           0 :                           0.0_dp, t2, filter_eps=eps_filter)
    5036             : 
    5037           0 :       CALL dbcsr_hadamard_product(t1, t2, tmp)
    5038           0 :       CALL dbcsr_add(prec, tmp, 1.0_dp, -1.0_dp)
    5039           0 :       CALL dbcsr_scale(prec, 2.0_dp*spin_factor)
    5040             : 
    5041             :       ! compute qp = X*sig_oo_inv*(T^tr)*S*Vd
    5042             :       CALL dbcsr_multiply("N", "N", 1.0_dp, s, vd_blk, &
    5043           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5044             :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp_n_vd, tmp1_n_vr, &
    5045             :                           0.0_dp, tmp, retain_sparsity=.TRUE., &
    5046           0 :                           filter_eps=eps_filter)
    5047           0 :       CALL dbcsr_hadamard_product(grad, tmp, t1)
    5048             :       ! gradient already contains 2.0*spin_factor
    5049           0 :       CALL dbcsr_scale(t1, -2.0_dp)
    5050             : 
    5051           0 :       CALL dbcsr_add(prec, t1, 1.0_dp, 1.0_dp)
    5052             : 
    5053           0 :       CALL dbcsr_function_of_elements(prec, dbcsr_func_inverse)
    5054           0 :       CALL dbcsr_filter(prec, eps_filter)
    5055             : 
    5056           0 :       DEALLOCATE (q_diagonal)
    5057           0 :       DEALLOCATE (p_diagonal)
    5058           0 :       CALL dbcsr_release(tmp)
    5059           0 :       CALL dbcsr_release(qq_diag)
    5060           0 :       CALL dbcsr_release(t1)
    5061           0 :       CALL dbcsr_release(pp_diag)
    5062           0 :       CALL dbcsr_release(t2)
    5063           0 :       CALL dbcsr_release(tmp_n_vd)
    5064           0 :       CALL dbcsr_release(tmp_vd_vd_blk)
    5065           0 :       CALL dbcsr_release(tmp_vr_vr_blk)
    5066           0 :       CALL dbcsr_release(tmp1_n_vr)
    5067           0 :       CALL dbcsr_release(tmp2_n_vr)
    5068             : 
    5069           0 :       CALL timestop(handle)
    5070             : 
    5071           0 :    END SUBROUTINE opt_k_create_preconditioner
    5072             : 
    5073             : ! **************************************************************************************************
    5074             : !> \brief Computes a block-diagonal preconditioner for the optimization of
    5075             : !>        k matrix
    5076             : !> \param almo_scf_env ...
    5077             : !> \param vd_prop ...
    5078             : !> \param oo_inv_x_tr ...
    5079             : !> \param t_curr ...
    5080             : !> \param ispin ...
    5081             : !> \param spin_factor ...
    5082             : !> \par History
    5083             : !>       2011.10 created [Rustam Z Khaliullin]
    5084             : !> \author Rustam Z Khaliullin
    5085             : ! **************************************************************************************************
    5086           0 :    SUBROUTINE opt_k_create_preconditioner_blk(almo_scf_env, vd_prop, oo_inv_x_tr, &
    5087             :                                               t_curr, ispin, spin_factor)
    5088             : 
    5089             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    5090             :       TYPE(dbcsr_type), INTENT(IN)                       :: vd_prop, oo_inv_x_tr, t_curr
    5091             :       INTEGER, INTENT(IN)                                :: ispin
    5092             :       REAL(KIND=dp), INTENT(IN)                          :: spin_factor
    5093             : 
    5094             :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_create_preconditioner_blk'
    5095             : 
    5096             :       INTEGER                                            :: handle
    5097             :       REAL(KIND=dp)                                      :: eps_filter
    5098             :       TYPE(dbcsr_type)                                   :: opt_k_e_dd, opt_k_e_rr, s_dd_sqrt, &
    5099             :                                                             s_rr_sqrt, t1, tmp, tmp1_n_vr, &
    5100             :                                                             tmp2_n_vr, tmp_n_vd, tmp_vd_vd_blk, &
    5101             :                                                             tmp_vr_vr_blk
    5102             : 
    5103             : ! matrices that has been computed outside the routine already
    5104             : 
    5105           0 :       CALL timeset(routineN, handle)
    5106             : 
    5107           0 :       eps_filter = almo_scf_env%eps_filter
    5108             : 
    5109             :       ! compute S_qq = (Vd^tr)*S*Vd
    5110           0 :       CALL dbcsr_create(tmp_n_vd, template=almo_scf_env%matrix_v_disc(ispin))
    5111             :       CALL dbcsr_create(tmp_vd_vd_blk, &
    5112             :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5113           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5114             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5115             :                           almo_scf_env%matrix_s(1), &
    5116             :                           vd_prop, &
    5117           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5118             :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5119           0 :                       almo_scf_env%matrix_vv_disc_blk(ispin))
    5120             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5121             :                           0.0_dp, tmp_vd_vd_blk, &
    5122           0 :                           retain_sparsity=.TRUE.)
    5123             : 
    5124             :       CALL dbcsr_create(s_dd_sqrt, &
    5125             :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5126           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5127             :       CALL matrix_sqrt_Newton_Schulz(s_dd_sqrt, &
    5128             :                                      almo_scf_env%opt_k_t_dd(ispin), &
    5129             :                                      tmp_vd_vd_blk, &
    5130             :                                      threshold=eps_filter, &
    5131             :                                      order=almo_scf_env%order_lanczos, &
    5132             :                                      eps_lanczos=almo_scf_env%eps_lanczos, &
    5133           0 :                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5134             : 
    5135             :       ! compute F_qq = (Vd^tr)*F*Vd
    5136             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5137             :                           almo_scf_env%matrix_ks_0deloc(ispin), &
    5138             :                           vd_prop, &
    5139           0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5140             :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5141           0 :                       almo_scf_env%matrix_vv_disc_blk(ispin))
    5142             :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5143             :                           0.0_dp, tmp_vd_vd_blk, &
    5144           0 :                           retain_sparsity=.TRUE.)
    5145           0 :       CALL dbcsr_release(tmp_n_vd)
    5146             : 
    5147             :       ! bring to the blocked-orthogonalized basis
    5148             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5149             :                           tmp_vd_vd_blk, &
    5150             :                           almo_scf_env%opt_k_t_dd(ispin), &
    5151           0 :                           0.0_dp, s_dd_sqrt, filter_eps=eps_filter)
    5152             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5153             :                           almo_scf_env%opt_k_t_dd(ispin), &
    5154             :                           s_dd_sqrt, &
    5155           0 :                           0.0_dp, tmp_vd_vd_blk, filter_eps=eps_filter)
    5156             : 
    5157             :       ! diagonalize the matrix
    5158             :       CALL dbcsr_create(opt_k_e_dd, &
    5159           0 :                         template=almo_scf_env%matrix_vv_disc_blk(ispin))
    5160           0 :       CALL dbcsr_release(s_dd_sqrt)
    5161             :       CALL dbcsr_create(s_dd_sqrt, &
    5162             :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5163           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5164             :       CALL diagonalize_diagonal_blocks(tmp_vd_vd_blk, &
    5165             :                                        s_dd_sqrt, &
    5166           0 :                                        opt_k_e_dd)
    5167             : 
    5168             :       ! obtain the transformation matrix in the discarded subspace
    5169             :       ! T = S^{-1/2}.U
    5170             :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5171           0 :                       almo_scf_env%opt_k_t_dd(ispin))
    5172             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5173             :                           tmp_vd_vd_blk, &
    5174             :                           s_dd_sqrt, &
    5175             :                           0.0_dp, almo_scf_env%opt_k_t_dd(ispin), &
    5176           0 :                           filter_eps=eps_filter)
    5177           0 :       CALL dbcsr_release(s_dd_sqrt)
    5178           0 :       CALL dbcsr_release(tmp_vd_vd_blk)
    5179             : 
    5180             :       ! copy diagonal elements of the result into rows of a matrix
    5181             :       CALL dbcsr_create(tmp, &
    5182           0 :                         template=almo_scf_env%matrix_k_blk_ones(ispin))
    5183             :       CALL dbcsr_copy(tmp, &
    5184           0 :                       almo_scf_env%matrix_k_blk_ones(ispin))
    5185             :       CALL dbcsr_create(t1, &
    5186           0 :                         template=almo_scf_env%matrix_k_blk_ones(ispin))
    5187             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5188             :                           opt_k_e_dd, tmp, &
    5189           0 :                           0.0_dp, t1, filter_eps=eps_filter)
    5190           0 :       CALL dbcsr_release(opt_k_e_dd)
    5191             : 
    5192             :       ! compute S_pp = X*sigma_oo_inv*X^tr
    5193             :       CALL dbcsr_create(tmp_vr_vr_blk, &
    5194             :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5195           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5196             :       CALL dbcsr_copy(tmp_vr_vr_blk, &
    5197           0 :                       almo_scf_env%matrix_sigma_vv_blk(ispin))
    5198             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5199             :                           almo_scf_env%matrix_x(ispin), &
    5200             :                           oo_inv_x_tr, &
    5201             :                           0.0_dp, tmp_vr_vr_blk, &
    5202           0 :                           retain_sparsity=.TRUE.)
    5203             : 
    5204             :       ! obtain the orthogonalization matrix
    5205             :       CALL dbcsr_create(s_rr_sqrt, &
    5206             :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5207           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5208             :       CALL matrix_sqrt_Newton_Schulz(s_rr_sqrt, &
    5209             :                                      almo_scf_env%opt_k_t_rr(ispin), &
    5210             :                                      tmp_vr_vr_blk, &
    5211             :                                      threshold=eps_filter, &
    5212             :                                      order=almo_scf_env%order_lanczos, &
    5213             :                                      eps_lanczos=almo_scf_env%eps_lanczos, &
    5214           0 :                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5215             : 
    5216             :       ! compute F_pp = X*sig_oo_inv*(T^tr)*F*T*sig_oo_inv*(X^tr)
    5217             :       CALL dbcsr_create(tmp1_n_vr, &
    5218           0 :                         template=almo_scf_env%matrix_v(ispin))
    5219             :       CALL dbcsr_create(tmp2_n_vr, &
    5220           0 :                         template=almo_scf_env%matrix_v(ispin))
    5221             :       CALL dbcsr_multiply("N", "N", 1.0_dp, t_curr, oo_inv_x_tr, &
    5222           0 :                           0.0_dp, tmp1_n_vr, filter_eps=eps_filter)
    5223             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5224             :                           almo_scf_env%matrix_ks_0deloc(ispin), &
    5225             :                           tmp1_n_vr, &
    5226           0 :                           0.0_dp, tmp2_n_vr, filter_eps=eps_filter)
    5227             :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp1_n_vr, tmp2_n_vr, &
    5228             :                           0.0_dp, tmp_vr_vr_blk, &
    5229           0 :                           retain_sparsity=.TRUE.)
    5230           0 :       CALL dbcsr_release(tmp1_n_vr)
    5231           0 :       CALL dbcsr_release(tmp2_n_vr)
    5232             : 
    5233             :       ! bring to the blocked-orthogonalized basis
    5234             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5235             :                           tmp_vr_vr_blk, &
    5236             :                           almo_scf_env%opt_k_t_rr(ispin), &
    5237           0 :                           0.0_dp, s_rr_sqrt, filter_eps=eps_filter)
    5238             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5239             :                           almo_scf_env%opt_k_t_rr(ispin), &
    5240             :                           s_rr_sqrt, &
    5241           0 :                           0.0_dp, tmp_vr_vr_blk, filter_eps=eps_filter)
    5242             : 
    5243             :       ! diagonalize the matrix
    5244             :       CALL dbcsr_create(opt_k_e_rr, &
    5245           0 :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin))
    5246           0 :       CALL dbcsr_release(s_rr_sqrt)
    5247             :       CALL dbcsr_create(s_rr_sqrt, &
    5248             :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5249           0 :                         matrix_type=dbcsr_type_no_symmetry)
    5250             :       CALL diagonalize_diagonal_blocks(tmp_vr_vr_blk, &
    5251             :                                        s_rr_sqrt, &
    5252           0 :                                        opt_k_e_rr)
    5253             : 
    5254             :       ! obtain the transformation matrix in the retained subspace
    5255             :       ! T = S^{-1/2}.U
    5256             :       CALL dbcsr_copy(tmp_vr_vr_blk, &
    5257           0 :                       almo_scf_env%opt_k_t_rr(ispin))
    5258             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5259             :                           tmp_vr_vr_blk, &
    5260             :                           s_rr_sqrt, &
    5261             :                           0.0_dp, almo_scf_env%opt_k_t_rr(ispin), &
    5262           0 :                           filter_eps=eps_filter)
    5263           0 :       CALL dbcsr_release(s_rr_sqrt)
    5264           0 :       CALL dbcsr_release(tmp_vr_vr_blk)
    5265             : 
    5266             :       ! copy diagonal elements of the result into cols of a matrix
    5267             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5268             :                           tmp, opt_k_e_rr, &
    5269             :                           0.0_dp, almo_scf_env%opt_k_denom(ispin), &
    5270           0 :                           filter_eps=eps_filter)
    5271           0 :       CALL dbcsr_release(opt_k_e_rr)
    5272           0 :       CALL dbcsr_release(tmp)
    5273             : 
    5274             :       ! form the denominator matrix
    5275             :       CALL dbcsr_add(almo_scf_env%opt_k_denom(ispin), t1, &
    5276           0 :                      -1.0_dp, 1.0_dp)
    5277           0 :       CALL dbcsr_release(t1)
    5278             :       CALL dbcsr_scale(almo_scf_env%opt_k_denom(ispin), &
    5279           0 :                        2.0_dp*spin_factor)
    5280             : 
    5281             :       CALL dbcsr_function_of_elements(almo_scf_env%opt_k_denom(ispin), &
    5282           0 :                                       dbcsr_func_inverse)
    5283             :       CALL dbcsr_filter(almo_scf_env%opt_k_denom(ispin), &
    5284           0 :                         eps_filter)
    5285             : 
    5286           0 :       CALL timestop(handle)
    5287             : 
    5288           0 :    END SUBROUTINE opt_k_create_preconditioner_blk
    5289             : 
    5290             : ! **************************************************************************************************
    5291             : !> \brief Applies a block-diagonal preconditioner for the optimization of
    5292             : !>        k matrix (preconditioner matrices must be calculated and stored
    5293             : !>        beforehand)
    5294             : !> \param almo_scf_env ...
    5295             : !> \param step ...
    5296             : !> \param grad ...
    5297             : !> \param ispin ...
    5298             : !> \par History
    5299             : !>       2011.10 created [Rustam Z Khaliullin]
    5300             : !> \author Rustam Z Khaliullin
    5301             : ! **************************************************************************************************
    5302           0 :    SUBROUTINE opt_k_apply_preconditioner_blk(almo_scf_env, step, grad, ispin)
    5303             : 
    5304             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    5305             :       TYPE(dbcsr_type), INTENT(OUT)                      :: step
    5306             :       TYPE(dbcsr_type), INTENT(IN)                       :: grad
    5307             :       INTEGER, INTENT(IN)                                :: ispin
    5308             : 
    5309             :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_apply_preconditioner_blk'
    5310             : 
    5311             :       INTEGER                                            :: handle
    5312             :       REAL(KIND=dp)                                      :: eps_filter
    5313             :       TYPE(dbcsr_type)                                   :: tmp_k
    5314             : 
    5315           0 :       CALL timeset(routineN, handle)
    5316             : 
    5317           0 :       eps_filter = almo_scf_env%eps_filter
    5318             : 
    5319           0 :       CALL dbcsr_create(tmp_k, template=almo_scf_env%matrix_k_blk(ispin))
    5320             : 
    5321             :       ! transform gradient to the correct "diagonal" basis
    5322             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5323             :                           grad, almo_scf_env%opt_k_t_rr(ispin), &
    5324           0 :                           0.0_dp, tmp_k, filter_eps=eps_filter)
    5325             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    5326             :                           almo_scf_env%opt_k_t_dd(ispin), tmp_k, &
    5327           0 :                           0.0_dp, step, filter_eps=eps_filter)
    5328             : 
    5329             :       ! apply diagonal preconditioner
    5330             :       CALL dbcsr_hadamard_product(step, &
    5331           0 :                                   almo_scf_env%opt_k_denom(ispin), tmp_k)
    5332             : 
    5333             :       ! back-transform the result to the initial basis
    5334             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5335             :                           almo_scf_env%opt_k_t_dd(ispin), tmp_k, &
    5336           0 :                           0.0_dp, step, filter_eps=eps_filter)
    5337             :       CALL dbcsr_multiply("N", "T", 1.0_dp, &
    5338             :                           step, almo_scf_env%opt_k_t_rr(ispin), &
    5339           0 :                           0.0_dp, tmp_k, filter_eps=eps_filter)
    5340             : 
    5341           0 :       CALL dbcsr_copy(step, tmp_k)
    5342             : 
    5343           0 :       CALL dbcsr_release(tmp_k)
    5344             : 
    5345           0 :       CALL timestop(handle)
    5346             : 
    5347           0 :    END SUBROUTINE opt_k_apply_preconditioner_blk
    5348             : 
    5349             : !! **************************************************************************************************
    5350             : !!> \brief Reduce the number of virtual orbitals by rotating them within
    5351             : !!>        a domain. The rotation is such that minimizes the frobenius norm of
    5352             : !!>        the Fov domain-blocks of the discarded virtuals
    5353             : !!> \par History
    5354             : !!>       2011.08 created [Rustam Z Khaliullin]
    5355             : !!> \author Rustam Z Khaliullin
    5356             : !! **************************************************************************************************
    5357             : !  SUBROUTINE truncate_subspace_v_blk(qs_env,almo_scf_env)
    5358             : !
    5359             : !    TYPE(qs_environment_type), POINTER       :: qs_env
    5360             : !    TYPE(almo_scf_env_type)                  :: almo_scf_env
    5361             : !
    5362             : !    CHARACTER(len=*), PARAMETER :: routineN = 'truncate_subspace_v_blk', &
    5363             : !      routineP = moduleN//':'//routineN
    5364             : !
    5365             : !    INTEGER                                  :: handle, ispin, iblock_row, &
    5366             : !                                                iblock_col, iblock_row_size, &
    5367             : !                                                iblock_col_size, retained_v, &
    5368             : !                                                iteration, line_search_step, &
    5369             : !                                                unit_nr, line_search_step_last
    5370             : !    REAL(KIND=dp)                            :: t1, obj_function, grad_norm,&
    5371             : !                                                c0, b0, a0, obj_function_new,&
    5372             : !                                                t2, alpha, ff1, ff2, step1,&
    5373             : !                                                step2,&
    5374             : !                                                frob_matrix_base,&
    5375             : !                                                frob_matrix
    5376             : !    LOGICAL                                  :: safe_mode, converged, &
    5377             : !                                                prepare_to_exit, failure
    5378             : !    TYPE(cp_logger_type), POINTER            :: logger
    5379             : !    TYPE(dbcsr_type)                      :: Fon, Fov, Fov_filtered, &
    5380             : !                                                temp1_oo, temp2_oo, Fov_original, &
    5381             : !                                                temp0_ov, U_blk_tot, U_blk, &
    5382             : !                                                grad_blk, step_blk, matrix_filter, &
    5383             : !                                                v_full_new,v_full_tmp,&
    5384             : !                                                matrix_sigma_vv_full,&
    5385             : !                                                matrix_sigma_vv_full_sqrt,&
    5386             : !                                                matrix_sigma_vv_full_sqrt_inv,&
    5387             : !                                                matrix_tmp1,&
    5388             : !                                                matrix_tmp2
    5389             : !
    5390             : !    REAL(kind=dp), DIMENSION(:, :), POINTER  :: data_p, p_new_block
    5391             : !    TYPE(dbcsr_iterator_type)                  :: iter
    5392             : !
    5393             : !
    5394             : !REAL(kind=dp), DIMENSION(:), ALLOCATABLE     :: eigenvalues, WORK
    5395             : !REAL(kind=dp), DIMENSION(:,:), ALLOCATABLE   :: data_copy, left_vectors, right_vectors
    5396             : !INTEGER                                      :: LWORK, INFO
    5397             : !TYPE(dbcsr_type)                          :: temp_u_v_full_blk
    5398             : !
    5399             : !    CALL timeset(routineN,handle)
    5400             : !
    5401             : !    safe_mode=.TRUE.
    5402             : !
    5403             : !    ! get a useful output_unit
    5404             : !    logger => cp_get_default_logger()
    5405             : !    IF (logger%para_env%is_source()) THEN
    5406             : !       unit_nr=cp_logger_get_default_unit_nr(logger,local=.TRUE.)
    5407             : !    ELSE
    5408             : !       unit_nr=-1
    5409             : !    ENDIF
    5410             : !
    5411             : !    DO ispin=1,almo_scf_env%nspins
    5412             : !
    5413             : !       t1 = m_walltime()
    5414             : !
    5415             : !       !!!!!!!!!!!!!!!!!
    5416             : !       ! 0. Orthogonalize virtuals
    5417             : !       !    Unfortunately, we have to do it in the FULL V subspace :(
    5418             : !
    5419             : !       CALL dbcsr_init(v_full_new)
    5420             : !       CALL dbcsr_create(v_full_new,&
    5421             : !               template=almo_scf_env%matrix_v_full_blk(ispin),&
    5422             : !               matrix_type=dbcsr_type_no_symmetry)
    5423             : !
    5424             : !       ! project the occupied subspace out
    5425             : !       CALL almo_scf_p_out_from_v(almo_scf_env%matrix_v_full_blk(ispin),&
    5426             : !              v_full_new,almo_scf_env%matrix_ov_full(ispin),&
    5427             : !              ispin,almo_scf_env)
    5428             : !
    5429             : !       ! init overlap and its functions
    5430             : !       CALL dbcsr_init(matrix_sigma_vv_full)
    5431             : !       CALL dbcsr_init(matrix_sigma_vv_full_sqrt)
    5432             : !       CALL dbcsr_init(matrix_sigma_vv_full_sqrt_inv)
    5433             : !       CALL dbcsr_create(matrix_sigma_vv_full,&
    5434             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5435             : !               matrix_type=dbcsr_type_no_symmetry)
    5436             : !       CALL dbcsr_create(matrix_sigma_vv_full_sqrt,&
    5437             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5438             : !               matrix_type=dbcsr_type_no_symmetry)
    5439             : !       CALL dbcsr_create(matrix_sigma_vv_full_sqrt_inv,&
    5440             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5441             : !               matrix_type=dbcsr_type_no_symmetry)
    5442             : !
    5443             : !       ! construct VV overlap
    5444             : !       CALL almo_scf_mo_to_sigma(v_full_new,&
    5445             : !               matrix_sigma_vv_full,&
    5446             : !               almo_scf_env%matrix_s(1),&
    5447             : !               almo_scf_env%eps_filter)
    5448             : !
    5449             : !       IF (unit_nr>0) THEN
    5450             : !          WRITE(unit_nr,*) "sqrt and inv(sqrt) of the FULL virtual MO overlap"
    5451             : !       ENDIF
    5452             : !
    5453             : !       ! construct orthogonalization matrices
    5454             : !       CALL matrix_sqrt_Newton_Schulz(matrix_sigma_vv_full_sqrt,&
    5455             : !                                      matrix_sigma_vv_full_sqrt_inv,&
    5456             : !                                      matrix_sigma_vv_full,&
    5457             : !                                      threshold=almo_scf_env%eps_filter,&
    5458             : !                                      order=almo_scf_env%order_lanczos,&
    5459             : !                                      eps_lanczos=almo_scf_env%eps_lanczos,&
    5460             : !                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5461             : !       IF (safe_mode) THEN
    5462             : !          CALL dbcsr_init(matrix_tmp1)
    5463             : !          CALL dbcsr_create(matrix_tmp1,template=matrix_sigma_vv_full,&
    5464             : !                               matrix_type=dbcsr_type_no_symmetry)
    5465             : !          CALL dbcsr_init(matrix_tmp2)
    5466             : !          CALL dbcsr_create(matrix_tmp2,template=matrix_sigma_vv_full,&
    5467             : !                               matrix_type=dbcsr_type_no_symmetry)
    5468             : !
    5469             : !          CALL dbcsr_multiply("N","N",1.0_dp,matrix_sigma_vv_full_sqrt_inv,&
    5470             : !                                 matrix_sigma_vv_full,&
    5471             : !                                 0.0_dp,matrix_tmp1,filter_eps=almo_scf_env%eps_filter)
    5472             : !          CALL dbcsr_multiply("N","N",1.0_dp,matrix_tmp1,&
    5473             : !                                 matrix_sigma_vv_full_sqrt_inv,&
    5474             : !                                 0.0_dp,matrix_tmp2,filter_eps=almo_scf_env%eps_filter)
    5475             : !
    5476             : !          frob_matrix_base=dbcsr_frobenius_norm(matrix_tmp2)
    5477             : !          CALL dbcsr_add_on_diag(matrix_tmp2,-1.0_dp)
    5478             : !          frob_matrix=dbcsr_frobenius_norm(matrix_tmp2)
    5479             : !          IF (unit_nr>0) THEN
    5480             : !             WRITE(unit_nr,*) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)",frob_matrix/frob_matrix_base
    5481             : !          ENDIF
    5482             : !
    5483             : !          CALL dbcsr_release(matrix_tmp1)
    5484             : !          CALL dbcsr_release(matrix_tmp2)
    5485             : !       ENDIF
    5486             : !
    5487             : !       ! discard unnecessary overlap functions
    5488             : !       CALL dbcsr_release(matrix_sigma_vv_full)
    5489             : !       CALL dbcsr_release(matrix_sigma_vv_full_sqrt)
    5490             : !
    5491             : !! this can be re-written because we have (1-P)|v>
    5492             : !
    5493             : !       !!!!!!!!!!!!!!!!!!!
    5494             : !       ! 1. Compute F_ov
    5495             : !       CALL dbcsr_init(Fon)
    5496             : !       CALL dbcsr_create(Fon,&
    5497             : !               template=almo_scf_env%matrix_v_full_blk(ispin))
    5498             : !       CALL dbcsr_init(Fov)
    5499             : !       CALL dbcsr_create(Fov,&
    5500             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5501             : !       CALL dbcsr_init(Fov_filtered)
    5502             : !       CALL dbcsr_create(Fov_filtered,&
    5503             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5504             : !       CALL dbcsr_init(temp1_oo)
    5505             : !       CALL dbcsr_create(temp1_oo,&
    5506             : !               template=almo_scf_env%matrix_sigma(ispin),&
    5507             : !               !matrix_type=dbcsr_type_no_symmetry)
    5508             : !       CALL dbcsr_init(temp2_oo)
    5509             : !       CALL dbcsr_create(temp2_oo,&
    5510             : !               template=almo_scf_env%matrix_sigma(ispin),&
    5511             : !               matrix_type=dbcsr_type_no_symmetry)
    5512             : !
    5513             : !       CALL dbcsr_multiply("T","N",1.0_dp,almo_scf_env%matrix_t_blk(ispin),&
    5514             : !               almo_scf_env%matrix_ks_0deloc(ispin),&
    5515             : !               0.0_dp,Fon,filter_eps=almo_scf_env%eps_filter)
    5516             : !
    5517             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5518             : !               almo_scf_env%matrix_v_full_blk(ispin),&
    5519             : !               0.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5520             : !
    5521             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5522             : !               almo_scf_env%matrix_t_blk(ispin),&
    5523             : !               0.0_dp,temp1_oo,filter_eps=almo_scf_env%eps_filter)
    5524             : !
    5525             : !       CALL dbcsr_multiply("N","N",1.0_dp,temp1_oo,&
    5526             : !               almo_scf_env%matrix_sigma_inv(ispin),&
    5527             : !               0.0_dp,temp2_oo,filter_eps=almo_scf_env%eps_filter)
    5528             : !       CALL dbcsr_release(temp1_oo)
    5529             : !
    5530             : !       CALL dbcsr_multiply("T","N",1.0_dp,almo_scf_env%matrix_t_blk(ispin),&
    5531             : !               almo_scf_env%matrix_s(1),&
    5532             : !               0.0_dp,Fon,filter_eps=almo_scf_env%eps_filter)
    5533             : !
    5534             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5535             : !               almo_scf_env%matrix_v_full_blk(ispin),&
    5536             : !               0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5537             : !       CALL dbcsr_release(Fon)
    5538             : !
    5539             : !       CALL dbcsr_multiply("N","N",-1.0_dp,temp2_oo,&
    5540             : !               Fov_filtered,&
    5541             : !               1.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5542             : !       CALL dbcsr_release(temp2_oo)
    5543             : !
    5544             : !       CALL dbcsr_multiply("N","N",1.0_dp,almo_scf_env%matrix_sigma_inv(ispin),&
    5545             : !               Fov,0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5546             : !
    5547             : !       CALL dbcsr_multiply("N","N",1.0_dp,Fov_filtered,&
    5548             : !               matrix_sigma_vv_full_sqrt_inv,&
    5549             : !               0.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5550             : !       !CALL dbcsr_copy(Fov,Fov_filtered)
    5551             : !CALL dbcsr_print(Fov)
    5552             : !
    5553             : !       IF (safe_mode) THEN
    5554             : !          CALL dbcsr_init(Fov_original)
    5555             : !          CALL dbcsr_create(Fov_original,template=Fov)
    5556             : !          CALL dbcsr_copy(Fov_original,Fov)
    5557             : !       ENDIF
    5558             : !
    5559             : !!! remove diagonal blocks
    5560             : !!CALL dbcsr_iterator_start(iter,Fov)
    5561             : !!DO WHILE (dbcsr_iterator_blocks_left(iter))
    5562             : !!
    5563             : !!   CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5564             : !!           row_size=iblock_row_size,col_size=iblock_col_size)
    5565             : !!
    5566             : !!   IF (iblock_row.eq.iblock_col) data_p(:,:)=0.0_dp
    5567             : !!
    5568             : !!ENDDO
    5569             : !!CALL dbcsr_iterator_stop(iter)
    5570             : !!CALL dbcsr_finalize(Fov)
    5571             : !
    5572             : !!! perform svd of blocks
    5573             : !!!!! THIS ROUTINE WORKS ONLY ON ONE CPU AND ONLY FOR 2 MOLECULES !!!
    5574             : !!CALL dbcsr_init(temp_u_v_full_blk)
    5575             : !!CALL dbcsr_create(temp_u_v_full_blk,&
    5576             : !!        template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5577             : !!        matrix_type=dbcsr_type_no_symmetry)
    5578             : !!
    5579             : !!CALL dbcsr_work_create(temp_u_v_full_blk,&
    5580             : !!        work_mutable=.TRUE.)
    5581             : !!CALL dbcsr_iterator_start(iter,Fov)
    5582             : !!DO WHILE (dbcsr_iterator_blocks_left(iter))
    5583             : !!
    5584             : !!   CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5585             : !!           row_size=iblock_row_size,col_size=iblock_col_size)
    5586             : !!
    5587             : !!   IF (iblock_row.ne.iblock_col) THEN
    5588             : !!
    5589             : !!      ! Prepare data
    5590             : !!      allocate(eigenvalues(min(iblock_row_size,iblock_col_size)))
    5591             : !!      allocate(data_copy(iblock_row_size,iblock_col_size))
    5592             : !!      allocate(left_vectors(iblock_row_size,iblock_row_size))
    5593             : !!      allocate(right_vectors(iblock_col_size,iblock_col_size))
    5594             : !!      data_copy(:,:)=data_p(:,:)
    5595             : !!
    5596             : !!      ! Query the optimal workspace for dgesvd
    5597             : !!      LWORK = -1
    5598             : !!      allocate(WORK(MAX(1,LWORK)))
    5599             : !!      CALL DGESVD('N','A',iblock_row_size,iblock_col_size,data_copy,&
    5600             : !!              iblock_row_size,eigenvalues,left_vectors,iblock_row_size,&
    5601             : !!              right_vectors,iblock_col_size,WORK,LWORK,INFO)
    5602             : !!      LWORK = INT(WORK( 1 ))
    5603             : !!      deallocate(WORK)
    5604             : !!
    5605             : !!      ! Allocate the workspace and perform svd
    5606             : !!      allocate(WORK(MAX(1,LWORK)))
    5607             : !!      CALL DGESVD('N','A',iblock_row_size,iblock_col_size,data_copy,&
    5608             : !!              iblock_row_size,eigenvalues,left_vectors,iblock_row_size,&
    5609             : !!              right_vectors,iblock_col_size,WORK,LWORK,INFO)
    5610             : !!      deallocate(WORK)
    5611             : !!      IF( INFO.NE.0 ) THEN
    5612             : !!         CPABORT("DGESVD failed")
    5613             : !!      END IF
    5614             : !!
    5615             : !!      ! copy right singular vectors into a unitary matrix
    5616             : !!      NULLIFY (p_new_block)
    5617             : !!      CALL dbcsr_reserve_block2d(temp_u_v_full_blk,iblock_col,iblock_col,p_new_block)
    5618             : !!      CPASSERT(ASSOCIATED(p_new_block))
    5619             : !!      p_new_block(:,:) = right_vectors(:,:)
    5620             : !!
    5621             : !!      deallocate(eigenvalues)
    5622             : !!      deallocate(data_copy)
    5623             : !!      deallocate(left_vectors)
    5624             : !!      deallocate(right_vectors)
    5625             : !!
    5626             : !!   ENDIF
    5627             : !!ENDDO
    5628             : !!CALL dbcsr_iterator_stop(iter)
    5629             : !!CALL dbcsr_finalize(temp_u_v_full_blk)
    5630             : !!!CALL dbcsr_print(temp_u_v_full_blk)
    5631             : !!CALL dbcsr_multiply("N","T",1.0_dp,Fov,temp_u_v_full_blk,&
    5632             : !!        0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5633             : !!
    5634             : !!CALL dbcsr_copy(Fov,Fov_filtered)
    5635             : !!CALL dbcsr_print(Fov)
    5636             : !
    5637             : !       !!!!!!!!!!!!!!!!!!!
    5638             : !       ! 2. Initialize variables
    5639             : !
    5640             : !       ! temp space
    5641             : !       CALL dbcsr_init(temp0_ov)
    5642             : !       CALL dbcsr_create(temp0_ov,&
    5643             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5644             : !
    5645             : !       ! current unitary matrix
    5646             : !       CALL dbcsr_init(U_blk)
    5647             : !       CALL dbcsr_create(U_blk,&
    5648             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5649             : !               matrix_type=dbcsr_type_no_symmetry)
    5650             : !
    5651             : !       ! unitary matrix accumulator
    5652             : !       CALL dbcsr_init(U_blk_tot)
    5653             : !       CALL dbcsr_create(U_blk_tot,&
    5654             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5655             : !               matrix_type=dbcsr_type_no_symmetry)
    5656             : !       CALL dbcsr_add_on_diag(U_blk_tot,1.0_dp)
    5657             : !
    5658             : !!CALL dbcsr_add_on_diag(U_blk,1.0_dp)
    5659             : !!CALL dbcsr_multiply("N","T",1.0_dp,U_blk,temp_u_v_full_blk,&
    5660             : !!        0.0_dp,U_blk_tot,filter_eps=almo_scf_env%eps_filter)
    5661             : !!
    5662             : !!CALL dbcsr_release(temp_u_v_full_blk)
    5663             : !
    5664             : !       ! init gradient
    5665             : !       CALL dbcsr_init(grad_blk)
    5666             : !       CALL dbcsr_create(grad_blk,&
    5667             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5668             : !               matrix_type=dbcsr_type_no_symmetry)
    5669             : !
    5670             : !       ! init step matrix
    5671             : !       CALL dbcsr_init(step_blk)
    5672             : !       CALL dbcsr_create(step_blk,&
    5673             : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5674             : !               matrix_type=dbcsr_type_no_symmetry)
    5675             : !
    5676             : !       ! "retain discarded" filter (0.0 - retain, 1.0 - discard)
    5677             : !       CALL dbcsr_init(matrix_filter)
    5678             : !       CALL dbcsr_create(matrix_filter,&
    5679             : !               template=almo_scf_env%matrix_ov_full(ispin))
    5680             : !       ! copy Fov into the filter matrix temporarily
    5681             : !       ! so we know which blocks contain significant elements
    5682             : !       CALL dbcsr_copy(matrix_filter,Fov)
    5683             : !
    5684             : !       ! fill out filter elements block-by-block
    5685             : !       CALL dbcsr_iterator_start(iter,matrix_filter)
    5686             : !       DO WHILE (dbcsr_iterator_blocks_left(iter))
    5687             : !
    5688             : !          CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5689             : !                  row_size=iblock_row_size,col_size=iblock_col_size)
    5690             : !
    5691             : !          retained_v=almo_scf_env%nvirt_of_domain(iblock_col,ispin)
    5692             : !
    5693             : !          data_p(:,1:retained_v)=0.0_dp
    5694             : !          data_p(:,(retained_v+1):iblock_col_size)=1.0_dp
    5695             : !
    5696             : !       ENDDO
    5697             : !       CALL dbcsr_iterator_stop(iter)
    5698             : !       CALL dbcsr_finalize(matrix_filter)
    5699             : !
    5700             : !       ! apply the filter
    5701             : !       CALL dbcsr_hadamard_product(Fov,matrix_filter,Fov_filtered)
    5702             : !
    5703             : !       !!!!!!!!!!!!!!!!!!!!!
    5704             : !       ! 3. start iterative minimization of the elements to be discarded
    5705             : !       iteration=0
    5706             : !       converged=.FALSE.
    5707             : !       prepare_to_exit=.FALSE.
    5708             : !       DO
    5709             : !
    5710             : !          iteration=iteration+1
    5711             : !
    5712             : !          !!!!!!!!!!!!!!!!!!!!!!!!!
    5713             : !          ! 4. compute the gradient
    5714             : !          CALL dbcsr_set(grad_blk,0.0_dp)
    5715             : !          ! create the diagonal blocks only
    5716             : !          CALL dbcsr_add_on_diag(grad_blk,1.0_dp)
    5717             : !
    5718             : !          CALL dbcsr_multiply("T","N",2.0_dp,Fov_filtered,Fov,&
    5719             : !                  0.0_dp,grad_blk,retain_sparsity=.TRUE.,&
    5720             : !                  filter_eps=almo_scf_env%eps_filter)
    5721             : !          CALL dbcsr_multiply("T","N",-2.0_dp,Fov,Fov_filtered,&
    5722             : !                  1.0_dp,grad_blk,retain_sparsity=.TRUE.,&
    5723             : !                  filter_eps=almo_scf_env%eps_filter)
    5724             : !
    5725             : !          !!!!!!!!!!!!!!!!!!!!!!!
    5726             : !          ! 5. check convergence
    5727             : !          obj_function = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5728             : !          grad_norm = dbcsr_frobenius_norm(grad_blk)
    5729             : !          converged=(grad_norm.lt.almo_scf_env%truncate_v_eps_convergence)
    5730             : !          IF (converged.OR.(iteration.ge.almo_scf_env%truncate_v_max_iter)) THEN
    5731             : !             prepare_to_exit=.TRUE.
    5732             : !          ENDIF
    5733             : !
    5734             : !          IF (.NOT.prepare_to_exit) THEN
    5735             : !
    5736             : !             !!!!!!!!!!!!!!!!!!!!!!!
    5737             : !             ! 6. perform steps in the direction of the gradient
    5738             : !             !    a. first, perform a trial step to "see" the parameters
    5739             : !             !       of the parabola along the gradient:
    5740             : !             !       a0 * x^2 + b0 * x + c0
    5741             : !             !    b. then perform the step to the bottom of the parabola
    5742             : !
    5743             : !             ! get c0
    5744             : !             c0 = obj_function
    5745             : !             ! get b0 <= d_f/d_alpha along grad
    5746             : !             !!!CALL dbcsr_multiply("N","N",4.0_dp,Fov,grad_blk,&
    5747             : !             !!!        0.0_dp,temp0_ov,&
    5748             : !             !!!        filter_eps=almo_scf_env%eps_filter)
    5749             : !             !!!CALL dbcsr_dot(Fov_filtered,temp0_ov,b0)
    5750             : !
    5751             : !             alpha=almo_scf_env%truncate_v_trial_step_size
    5752             : !
    5753             : !             line_search_step_last=3
    5754             : !             DO line_search_step=1,line_search_step_last
    5755             : !                CALL dbcsr_copy(step_blk,grad_blk)
    5756             : !                CALL dbcsr_scale(step_blk,-1.0_dp*alpha)
    5757             : !                CALL generator_to_unitary(step_blk,U_blk,&
    5758             : !                        almo_scf_env%eps_filter)
    5759             : !                CALL dbcsr_multiply("N","N",1.0_dp,Fov,U_blk,0.0_dp,temp0_ov,&
    5760             : !                        filter_eps=almo_scf_env%eps_filter)
    5761             : !                CALL dbcsr_hadamard_product(temp0_ov,matrix_filter,&
    5762             : !                        Fov_filtered)
    5763             : !
    5764             : !                obj_function_new = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5765             : !                IF (line_search_step.eq.1) THEN
    5766             : !                   ff1 = obj_function_new
    5767             : !                   step1 = alpha
    5768             : !                ELSE IF (line_search_step.eq.2) THEN
    5769             : !                   ff2 = obj_function_new
    5770             : !                   step2 = alpha
    5771             : !                ENDIF
    5772             : !
    5773             : !                IF (unit_nr>0.AND.(line_search_step.ne.line_search_step_last)) THEN
    5774             : !                   WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.3,E12.3,E12.3,E12.3)') &
    5775             : !                         "JOINT_SVD_lin",&
    5776             : !                         iteration,&
    5777             : !                         alpha,&
    5778             : !                         obj_function,&
    5779             : !                         obj_function_new,&
    5780             : !                         obj_function_new-obj_function
    5781             : !                ENDIF
    5782             : !
    5783             : !                IF (line_search_step.eq.1) THEN
    5784             : !                   alpha=2.0_dp*alpha
    5785             : !                ENDIF
    5786             : !                IF (line_search_step.eq.2) THEN
    5787             : !                   a0 = ((ff1-c0)/step1 - (ff2-c0)/step2) / (step1 - step2)
    5788             : !                   b0 = (ff1-c0)/step1 - a0*step1
    5789             : !                   ! step size in to the bottom of "the parabola"
    5790             : !                   alpha=-b0/(2.0_dp*a0)
    5791             : !                   ! update the default step size
    5792             : !                   almo_scf_env%truncate_v_trial_step_size=alpha
    5793             : !                ENDIF
    5794             : !                !!!IF (line_search_step.eq.1) THEN
    5795             : !                !!!   a0 = (obj_function_new - b0 * alpha - c0) / (alpha*alpha)
    5796             : !                !!!   ! step size in to the bottom of "the parabola"
    5797             : !                !!!   alpha=-b0/(2.0_dp*a0)
    5798             : !                !!!   !IF (alpha.gt.10.0_dp) alpha=10.0_dp
    5799             : !                !!!ENDIF
    5800             : !
    5801             : !             ENDDO
    5802             : !
    5803             : !             ! update Fov and U_blk_tot (use grad_blk as tmp storage)
    5804             : !             CALL dbcsr_copy(Fov,temp0_ov)
    5805             : !             CALL dbcsr_multiply("N","N",1.0_dp,U_blk_tot,U_blk,&
    5806             : !                     0.0_dp,grad_blk,&
    5807             : !                     filter_eps=almo_scf_env%eps_filter)
    5808             : !             CALL dbcsr_copy(U_blk_tot,grad_blk)
    5809             : !
    5810             : !          ENDIF
    5811             : !
    5812             : !          t2 = m_walltime()
    5813             : !
    5814             : !          IF (unit_nr>0) THEN
    5815             : !             WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.3,E12.3,E12.3,E12.3,E12.3,F10.3)') &
    5816             : !                   "JOINT_SVD_itr",&
    5817             : !                   iteration,&
    5818             : !                   alpha,&
    5819             : !                   obj_function,&
    5820             : !                   obj_function_new,&
    5821             : !                   obj_function_new-obj_function,&
    5822             : !                   grad_norm,&
    5823             : !                   t2-t1
    5824             : !                   !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    5825             : !             CALL m_flush(unit_nr)
    5826             : !          ENDIF
    5827             : !
    5828             : !          t1 = m_walltime()
    5829             : !
    5830             : !          IF (prepare_to_exit) EXIT
    5831             : !
    5832             : !       ENDDO ! stop iterations
    5833             : !
    5834             : !       IF (safe_mode) THEN
    5835             : !          CALL dbcsr_multiply("N","N",1.0_dp,Fov_original,&
    5836             : !                  U_blk_tot,0.0_dp,temp0_ov,&
    5837             : !                  filter_eps=almo_scf_env%eps_filter)
    5838             : !CALL dbcsr_print(temp0_ov)
    5839             : !          CALL dbcsr_hadamard_product(temp0_ov,matrix_filter,&
    5840             : !                  Fov_filtered)
    5841             : !          obj_function_new = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5842             : !
    5843             : !          IF (unit_nr>0) THEN
    5844             : !             WRITE(unit_nr,'(T6,A,1X,E12.3)') &
    5845             : !                   "SANITY CHECK:",&
    5846             : !                   obj_function_new
    5847             : !             CALL m_flush(unit_nr)
    5848             : !          ENDIF
    5849             : !
    5850             : !          CALL dbcsr_release(Fov_original)
    5851             : !       ENDIF
    5852             : !
    5853             : !       CALL dbcsr_release(temp0_ov)
    5854             : !       CALL dbcsr_release(U_blk)
    5855             : !       CALL dbcsr_release(grad_blk)
    5856             : !       CALL dbcsr_release(step_blk)
    5857             : !       CALL dbcsr_release(matrix_filter)
    5858             : !       CALL dbcsr_release(Fov)
    5859             : !       CALL dbcsr_release(Fov_filtered)
    5860             : !
    5861             : !       ! compute rotated virtual orbitals
    5862             : !       CALL dbcsr_init(v_full_tmp)
    5863             : !       CALL dbcsr_create(v_full_tmp,&
    5864             : !               template=almo_scf_env%matrix_v_full_blk(ispin),&
    5865             : !               matrix_type=dbcsr_type_no_symmetry)
    5866             : !       CALL dbcsr_multiply("N","N",1.0_dp,&
    5867             : !               v_full_new,&
    5868             : !               matrix_sigma_vv_full_sqrt_inv,0.0_dp,v_full_tmp,&
    5869             : !               filter_eps=almo_scf_env%eps_filter)
    5870             : !       CALL dbcsr_multiply("N","N",1.0_dp,&
    5871             : !               v_full_tmp,&
    5872             : !               U_blk_tot,0.0_dp,v_full_new,&
    5873             : !               filter_eps=almo_scf_env%eps_filter)
    5874             : !
    5875             : !       CALL dbcsr_release(matrix_sigma_vv_full_sqrt_inv)
    5876             : !       CALL dbcsr_release(v_full_tmp)
    5877             : !       CALL dbcsr_release(U_blk_tot)
    5878             : !
    5879             : !!!!! orthogonalized virtuals are not blocked
    5880             : !       ! copy new virtuals into the truncated matrix
    5881             : !       !CALL dbcsr_work_create(almo_scf_env%matrix_v_blk(ispin),&
    5882             : !       CALL dbcsr_work_create(almo_scf_env%matrix_v(ispin),&
    5883             : !               work_mutable=.TRUE.)
    5884             : !       CALL dbcsr_iterator_start(iter,v_full_new)
    5885             : !       DO WHILE (dbcsr_iterator_blocks_left(iter))
    5886             : !
    5887             : !          CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5888             : !                  row_size=iblock_row_size,col_size=iblock_col_size)
    5889             : !
    5890             : !          retained_v=almo_scf_env%nvirt_of_domain(iblock_col,ispin)
    5891             : !
    5892             : !          NULLIFY (p_new_block)
    5893             : !          !CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v_blk(ispin),&
    5894             : !          CALL dbcsr_reserve_block2d(almo_scf_env%matrix_v(ispin),&
    5895             : !                  iblock_row,iblock_col,p_new_block)
    5896             : !          CPASSERT(ASSOCIATED(p_new_block))
    5897             : !          CPASSERT(retained_v.gt.0)
    5898             : !          p_new_block(:,:) = data_p(:,1:retained_v)
    5899             : !
    5900             : !       ENDDO ! iterator
    5901             : !       CALL dbcsr_iterator_stop(iter)
    5902             : !       !!CALL dbcsr_finalize(almo_scf_env%matrix_v_blk(ispin))
    5903             : !       CALL dbcsr_finalize(almo_scf_env%matrix_v(ispin))
    5904             : !
    5905             : !       CALL dbcsr_release(v_full_new)
    5906             : !
    5907             : !    ENDDO ! ispin
    5908             : !
    5909             : !    CALL timestop(handle)
    5910             : !
    5911             : !  END SUBROUTINE truncate_subspace_v_blk
    5912             : 
    5913             : ! *****************************************************************************
    5914             : !> \brief Compute the gradient wrt the main variable (e.g. Theta, X)
    5915             : !> \param m_grad_out ...
    5916             : !> \param m_ks ...
    5917             : !> \param m_s ...
    5918             : !> \param m_t ...
    5919             : !> \param m_t0 ...
    5920             : !> \param m_siginv ...
    5921             : !> \param m_quench_t ...
    5922             : !> \param m_FTsiginv ...
    5923             : !> \param m_siginvTFTsiginv ...
    5924             : !> \param m_ST ...
    5925             : !> \param m_STsiginv0 ...
    5926             : !> \param m_theta ...
    5927             : !> \param domain_s_inv ...
    5928             : !> \param domain_r_down ...
    5929             : !> \param cpu_of_domain ...
    5930             : !> \param domain_map ...
    5931             : !> \param assume_t0_q0x ...
    5932             : !> \param optimize_theta ...
    5933             : !> \param normalize_orbitals ...
    5934             : !> \param penalty_occ_vol ...
    5935             : !> \param penalty_occ_local ...
    5936             : !> \param penalty_occ_vol_prefactor ...
    5937             : !> \param envelope_amplitude ...
    5938             : !> \param eps_filter ...
    5939             : !> \param spin_factor ...
    5940             : !> \param special_case ...
    5941             : !> \param m_sig_sqrti_ii ...
    5942             : !> \param op_sm_set ...
    5943             : !> \param weights ...
    5944             : !> \param energy_coeff ...
    5945             : !> \param localiz_coeff ...
    5946             : !> \par History
    5947             : !>       2015.03 created [Rustam Z Khaliullin]
    5948             : !> \author Rustam Z Khaliullin
    5949             : ! **************************************************************************************************
    5950        1474 :    SUBROUTINE compute_gradient(m_grad_out, m_ks, m_s, m_t, m_t0, &
    5951             :                                m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_STsiginv0, &
    5952        1474 :                                m_theta, domain_s_inv, domain_r_down, &
    5953        1474 :                                cpu_of_domain, domain_map, assume_t0_q0x, optimize_theta, &
    5954             :                                normalize_orbitals, penalty_occ_vol, penalty_occ_local, &
    5955             :                                penalty_occ_vol_prefactor, envelope_amplitude, eps_filter, spin_factor, &
    5956        1474 :                                special_case, m_sig_sqrti_ii, op_sm_set, weights, energy_coeff, &
    5957             :                                localiz_coeff)
    5958             : 
    5959             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_grad_out
    5960             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_ks, m_s, m_t, m_t0, m_siginv, &
    5961             :                                                             m_quench_t, m_FTsiginv, &
    5962             :                                                             m_siginvTFTsiginv, m_ST, m_STsiginv0, &
    5963             :                                                             m_theta
    5964             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    5965             :          INTENT(IN)                                      :: domain_s_inv, domain_r_down
    5966             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    5967             :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    5968             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, optimize_theta, &
    5969             :                                                             normalize_orbitals, penalty_occ_vol
    5970             :       LOGICAL, INTENT(IN), OPTIONAL                      :: penalty_occ_local
    5971             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, &
    5972             :                                                             envelope_amplitude, eps_filter, &
    5973             :                                                             spin_factor
    5974             :       INTEGER, INTENT(IN)                                :: special_case
    5975             :       TYPE(dbcsr_type), INTENT(IN), OPTIONAL             :: m_sig_sqrti_ii
    5976             :       TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL, &
    5977             :          POINTER                                         :: op_sm_set
    5978             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL  :: weights
    5979             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: energy_coeff, localiz_coeff
    5980             : 
    5981             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_gradient'
    5982             : 
    5983             :       INTEGER                                            :: dim0, handle, idim0, nao, reim
    5984             :       LOGICAL                                            :: my_penalty_local
    5985             :       REAL(KIND=dp)                                      :: coeff, energy_g_norm, my_energy_coeff, &
    5986             :                                                             my_localiz_coeff, &
    5987             :                                                             penalty_occ_vol_g_norm
    5988        1474 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal
    5989             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_no_2, m_tmp_no_3, &
    5990             :                                                             m_tmp_oo_1, m_tmp_oo_2, temp1, temp2, &
    5991             :                                                             tempNOcc1, tempOccOcc1
    5992             : 
    5993        1474 :       CALL timeset(routineN, handle)
    5994             : 
    5995        1474 :       IF (normalize_orbitals .AND. (.NOT. PRESENT(m_sig_sqrti_ii))) THEN
    5996           0 :          CPABORT("Normalization matrix is required")
    5997             :       END IF
    5998             : 
    5999        1474 :       my_penalty_local = .FALSE.
    6000        1474 :       my_localiz_coeff = 1.0_dp
    6001        1474 :       my_energy_coeff = 0.0_dp
    6002        1474 :       IF (PRESENT(localiz_coeff)) THEN
    6003        1048 :          my_localiz_coeff = localiz_coeff
    6004             :       END IF
    6005        1474 :       IF (PRESENT(energy_coeff)) THEN
    6006        1048 :          my_energy_coeff = energy_coeff
    6007             :       END IF
    6008        1474 :       IF (PRESENT(penalty_occ_local)) THEN
    6009        1048 :          my_penalty_local = penalty_occ_local
    6010             :       END IF
    6011             : 
    6012             :       ! use this otherways unused variables
    6013        1474 :       CALL dbcsr_get_info(matrix=m_ks, nfullrows_total=nao)
    6014        1474 :       CALL dbcsr_get_info(matrix=m_s, nfullrows_total=nao)
    6015        1474 :       CALL dbcsr_get_info(matrix=m_t, nfullrows_total=nao)
    6016             : 
    6017             :       CALL dbcsr_create(m_tmp_no_1, &
    6018             :                         template=m_quench_t, &
    6019        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6020             :       CALL dbcsr_create(m_tmp_no_2, &
    6021             :                         template=m_quench_t, &
    6022        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6023             :       CALL dbcsr_create(m_tmp_no_3, &
    6024             :                         template=m_quench_t, &
    6025        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6026             :       CALL dbcsr_create(m_tmp_oo_1, &
    6027             :                         template=m_siginv, &
    6028        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6029             :       CALL dbcsr_create(m_tmp_oo_2, &
    6030             :                         template=m_siginv, &
    6031        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6032             :       CALL dbcsr_create(tempNOcc1, &
    6033             :                         template=m_t, &
    6034        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6035             :       CALL dbcsr_create(tempOccOcc1, &
    6036             :                         template=m_siginv, &
    6037        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6038             :       CALL dbcsr_create(temp1, &
    6039             :                         template=m_t, &
    6040        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6041             :       CALL dbcsr_create(temp2, &
    6042             :                         template=m_t, &
    6043        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6044             : 
    6045             :       ! do d_E/d_T first
    6046             :       !IF (.NOT.PRESENT(m_FTsiginv)) THEN
    6047             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6048             :       !           m_ks,&
    6049             :       !           m_t,&
    6050             :       !           0.0_dp,m_tmp_no_1,&
    6051             :       !           filter_eps=eps_filter)
    6052             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6053             :       !           m_tmp_no_1,&
    6054             :       !           m_siginv,&
    6055             :       !           0.0_dp,m_FTsiginv,&
    6056             :       !           filter_eps=eps_filter)
    6057             :       !ENDIF
    6058             : 
    6059        1474 :       CALL dbcsr_copy(m_tmp_no_2, m_quench_t)
    6060        1474 :       CALL dbcsr_copy(m_tmp_no_2, m_FTsiginv, keep_sparsity=.TRUE.)
    6061             : 
    6062             :       !IF (.NOT.PRESENT(m_siginvTFTsiginv)) THEN
    6063             :       !   CALL dbcsr_multiply("T","N",1.0_dp,&
    6064             :       !           m_t,&
    6065             :       !           m_FTsiginv,&
    6066             :       !           0.0_dp,m_tmp_oo_1,&
    6067             :       !           filter_eps=eps_filter)
    6068             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6069             :       !           m_siginv,&
    6070             :       !           m_tmp_oo_1,&
    6071             :       !           0.0_dp,m_siginvTFTsiginv,&
    6072             :       !           filter_eps=eps_filter)
    6073             :       !ENDIF
    6074             : 
    6075             :       !IF (.NOT.PRESENT(m_ST)) THEN
    6076             :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6077             :       !           m_s,&
    6078             :       !           m_t,&
    6079             :       !           0.0_dp,m_ST,&
    6080             :       !           filter_eps=eps_filter)
    6081             :       !ENDIF
    6082             : 
    6083             :       CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6084             :                           m_ST, &
    6085             :                           m_siginvTFTsiginv, &
    6086             :                           1.0_dp, m_tmp_no_2, &
    6087        1474 :                           retain_sparsity=.TRUE.)
    6088        1474 :       CALL dbcsr_scale(m_tmp_no_2, 2.0_dp*spin_factor)
    6089             : 
    6090             :       ! LzL Add gradient for Localization
    6091        1474 :       IF (my_penalty_local) THEN
    6092             : 
    6093           0 :          CALL dbcsr_set(temp2, 0.0_dp) ! accumulate the localization gradient here
    6094             : 
    6095           0 :          DO idim0 = 1, SIZE(op_sm_set, 2) ! this loop is over miller ind
    6096             : 
    6097           0 :             DO reim = 1, SIZE(op_sm_set, 1) ! this loop is over Re/Im
    6098             : 
    6099             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6100             :                                    op_sm_set(reim, idim0)%matrix, &
    6101             :                                    m_t, &
    6102             :                                    0.0_dp, tempNOcc1, &
    6103           0 :                                    filter_eps=eps_filter)
    6104             : 
    6105             :                ! warning - save time by computing only the diagonal elements
    6106             :                CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6107             :                                    m_t, &
    6108             :                                    tempNOcc1, &
    6109             :                                    0.0_dp, tempOccOcc1, &
    6110           0 :                                    filter_eps=eps_filter)
    6111             : 
    6112           0 :                CALL dbcsr_get_info(tempOccOcc1, nfullrows_total=dim0)
    6113           0 :                ALLOCATE (tg_diagonal(dim0))
    6114           0 :                CALL dbcsr_get_diag(tempOccOcc1, tg_diagonal)
    6115           0 :                CALL dbcsr_set(tempOccOcc1, 0.0_dp)
    6116           0 :                CALL dbcsr_set_diag(tempOccOcc1, tg_diagonal)
    6117           0 :                DEALLOCATE (tg_diagonal)
    6118             : 
    6119             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6120             :                                    tempNOcc1, &
    6121             :                                    tempOccOcc1, &
    6122             :                                    0.0_dp, temp1, &
    6123           0 :                                    filter_eps=eps_filter)
    6124             : 
    6125             :             END DO
    6126             : 
    6127             :             SELECT CASE (2) ! allows for selection of different spread functionals
    6128             :             CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6129           0 :                CPABORT("Localization function is not implemented")
    6130             :                !coeff = -(weights(idim0)/z2(ielem))
    6131             :             CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6132           0 :                coeff = -weights(idim0)
    6133             :             CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6134             :                CPABORT("Localization function is not implemented")
    6135             :                !coeff = -(weights(idim0)/(2.0_dp*z2(ielem)))
    6136             :             END SELECT
    6137           0 :             CALL dbcsr_add(temp2, temp1, 1.0_dp, coeff)
    6138             :             !CALL dbcsr_add(grad_loc, temp1, 1.0_dp, 1.0_dp)
    6139             : 
    6140             :          END DO ! end loop over idim0
    6141           0 :          CALL dbcsr_add(m_tmp_no_2, temp2, my_energy_coeff, my_localiz_coeff*4.0_dp)
    6142             :       END IF
    6143             : 
    6144             :       ! add penalty on the occupied volume: det(sigma)
    6145        1474 :       IF (penalty_occ_vol) THEN
    6146             :          !RZK-warning CALL dbcsr_multiply("N","N",&
    6147             :          !RZK-warning         penalty_occ_vol_prefactor,&
    6148             :          !RZK-warning         m_ST,&
    6149             :          !RZK-warning         m_siginv,&
    6150             :          !RZK-warning         1.0_dp,m_tmp_no_2,&
    6151             :          !RZK-warning         retain_sparsity=.TRUE.,&
    6152             :          !RZK-warning         )
    6153           0 :          CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6154             :          CALL dbcsr_multiply("N", "N", &
    6155             :                              penalty_occ_vol_prefactor, &
    6156             :                              m_ST, &
    6157             :                              m_siginv, &
    6158             :                              0.0_dp, m_tmp_no_1, &
    6159           0 :                              retain_sparsity=.TRUE.)
    6160             :          ! this norm does not contain the normalization factors
    6161             :          CALL dbcsr_norm(m_tmp_no_1, dbcsr_norm_maxabsnorm, &
    6162           0 :                          norm_scalar=penalty_occ_vol_g_norm)
    6163             :          CALL dbcsr_norm(m_tmp_no_2, dbcsr_norm_maxabsnorm, &
    6164           0 :                          norm_scalar=energy_g_norm)
    6165             :          !WRITE (*, "(A30,2F20.10)") "Energy/penalty g norms (no norm): ", energy_g_norm, penalty_occ_vol_g_norm
    6166           0 :          CALL dbcsr_add(m_tmp_no_2, m_tmp_no_1, 1.0_dp, 1.0_dp)
    6167             :       END IF
    6168             : 
    6169             :       ! take into account the factor from the normalization constraint
    6170        1474 :       IF (normalize_orbitals) THEN
    6171             : 
    6172             :          ! G = ( G - ST.[tr(T).G]_ii ) . [sig_sqrti]_ii
    6173             :          ! this expression can be simplified to
    6174             :          ! G = ( G - c0*ST ) . [sig_sqrti]_ii
    6175             :          ! where c0 = penalty_occ_vol_prefactor
    6176             :          ! This is because tr(T).G_Energy = 0 and
    6177             :          !                 tr(T).G_Penalty = c0*I
    6178             : 
    6179             :          !! faster way to take the norm into account (tested for vol penalty olny)
    6180             :          !!CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6181             :          !!CALL dbcsr_copy(m_tmp_no_1, m_ST, keep_sparsity=.TRUE.)
    6182             :          !!CALL dbcsr_add(m_tmp_no_2, m_tmp_no_1, 1.0_dp, -penalty_occ_vol_prefactor)
    6183             :          !!CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6184             :          !!CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6185             :          !!                    m_tmp_no_2, &
    6186             :          !!                    m_sig_sqrti_ii, &
    6187             :          !!                    0.0_dp, m_tmp_no_1, &
    6188             :          !!                    retain_sparsity=.TRUE.)
    6189             : 
    6190             :          ! slower way of taking the norm into account
    6191           0 :          CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6192             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6193             :                              m_tmp_no_2, &
    6194             :                              m_sig_sqrti_ii, &
    6195             :                              0.0_dp, m_tmp_no_1, &
    6196           0 :                              retain_sparsity=.TRUE.)
    6197             : 
    6198             :          ! get [tr(T).G]_ii
    6199           0 :          CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii)
    6200             :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6201             :                              m_t, &
    6202             :                              m_tmp_no_2, &
    6203             :                              0.0_dp, m_tmp_oo_1, &
    6204           0 :                              retain_sparsity=.TRUE.)
    6205             : 
    6206           0 :          CALL dbcsr_get_info(m_sig_sqrti_ii, nfullrows_total=dim0)
    6207           0 :          ALLOCATE (tg_diagonal(dim0))
    6208           0 :          CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    6209           0 :          CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    6210           0 :          CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    6211           0 :          DEALLOCATE (tg_diagonal)
    6212             : 
    6213             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6214             :                              m_sig_sqrti_ii, &
    6215             :                              m_tmp_oo_1, &
    6216             :                              0.0_dp, m_tmp_oo_2, &
    6217           0 :                              filter_eps=eps_filter)
    6218             :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6219             :                              m_ST, &
    6220             :                              m_tmp_oo_2, &
    6221             :                              1.0_dp, m_tmp_no_1, &
    6222           0 :                              retain_sparsity=.TRUE.)
    6223             : 
    6224             :       ELSE
    6225             : 
    6226        1474 :          CALL dbcsr_copy(m_tmp_no_1, m_tmp_no_2)
    6227             : 
    6228             :       END IF ! normalize_orbitals
    6229             : 
    6230             :       ! project out the occupied space from the gradient
    6231        1474 :       IF (assume_t0_q0x) THEN
    6232         466 :          IF (special_case .EQ. xalmo_case_fully_deloc) THEN
    6233         160 :             CALL dbcsr_copy(m_grad_out, m_tmp_no_1)
    6234             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6235             :                                 m_t0, &
    6236             :                                 m_grad_out, &
    6237             :                                 0.0_dp, m_tmp_oo_1, &
    6238         160 :                                 filter_eps=eps_filter)
    6239             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6240             :                                 m_STsiginv0, &
    6241             :                                 m_tmp_oo_1, &
    6242             :                                 1.0_dp, m_grad_out, &
    6243         160 :                                 filter_eps=eps_filter)
    6244         306 :          ELSE IF (special_case .EQ. xalmo_case_block_diag) THEN
    6245           0 :             CPABORT("Cannot project the zero-order space from itself")
    6246             :          ELSE
    6247             :             ! no special case: normal xALMOs
    6248             :             CALL apply_domain_operators( &
    6249             :                matrix_in=m_tmp_no_1, &
    6250             :                matrix_out=m_grad_out, &
    6251             :                operator2=domain_r_down(:), &
    6252             :                operator1=domain_s_inv(:), &
    6253             :                dpattern=m_quench_t, &
    6254             :                map=domain_map, &
    6255             :                node_of_domain=cpu_of_domain, &
    6256             :                my_action=1, &
    6257             :                filter_eps=eps_filter, &
    6258             :                !matrix_trimmer=,&
    6259         306 :                use_trimmer=.FALSE.)
    6260             :          END IF ! my_special_case
    6261         466 :          CALL dbcsr_copy(m_tmp_no_1, m_grad_out)
    6262             :       END IF
    6263             : 
    6264             :       !! check whether the gradient lies entirely in R or Q
    6265             :       !CALL dbcsr_multiply("T","N",1.0_dp,&
    6266             :       !        m_t,&
    6267             :       !        m_tmp_no_1,&
    6268             :       !        0.0_dp,m_tmp_oo_1,&
    6269             :       !        filter_eps=eps_filter,&
    6270             :       !        )
    6271             :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    6272             :       !        m_siginv,&
    6273             :       !        m_tmp_oo_1,&
    6274             :       !        0.0_dp,m_tmp_oo_2,&
    6275             :       !        filter_eps=eps_filter,&
    6276             :       !        )
    6277             :       !CALL dbcsr_copy(m_tmp_no_2,m_tmp_no_1)
    6278             :       !CALL dbcsr_multiply("N","N",-1.0_dp,&
    6279             :       !        m_ST,&
    6280             :       !        m_tmp_oo_2,&
    6281             :       !        1.0_dp,m_tmp_no_2,&
    6282             :       !        retain_sparsity=.TRUE.,&
    6283             :       !        )
    6284             :       !CALL dbcsr_norm(m_tmp_no_2, dbcsr_norm_maxabsnorm,&
    6285             :       !        norm_scalar=penalty_occ_vol_g_norm, )
    6286             :       !WRITE(*,"(A50,2F20.10)") "Virtual-space projection of the gradient", penalty_occ_vol_g_norm
    6287             :       !CALL dbcsr_add(m_tmp_no_2,m_tmp_no_1,1.0_dp,-1.0_dp)
    6288             :       !CALL dbcsr_norm(m_tmp_no_2, dbcsr_norm_maxabsnorm,&
    6289             :       !        norm_scalar=penalty_occ_vol_g_norm, )
    6290             :       !WRITE(*,"(A50,2F20.10)") "Occupied-space projection of the gradient", penalty_occ_vol_g_norm
    6291             :       !CALL dbcsr_norm(m_tmp_no_1, dbcsr_norm_maxabsnorm,&
    6292             :       !        norm_scalar=penalty_occ_vol_g_norm, )
    6293             :       !WRITE(*,"(A50,2F20.10)") "Full gradient", penalty_occ_vol_g_norm
    6294             : 
    6295             :       ! transform d_E/d_T to d_E/d_theta
    6296        1474 :       IF (optimize_theta) THEN
    6297           0 :          CALL dbcsr_copy(m_tmp_no_2, m_theta)
    6298             :          CALL dbcsr_function_of_elements(m_tmp_no_2, &
    6299             :                                          !func=dbcsr_func_cos,&
    6300             :                                          func=dbcsr_func_dtanh, &
    6301             :                                          a0=0.0_dp, &
    6302           0 :                                          a1=1.0_dp/envelope_amplitude)
    6303             :          CALL dbcsr_scale(m_tmp_no_2, &
    6304           0 :                           envelope_amplitude)
    6305           0 :          CALL dbcsr_set(m_tmp_no_3, 0.0_dp)
    6306           0 :          CALL dbcsr_filter(m_tmp_no_3, eps_filter)
    6307             :          CALL dbcsr_hadamard_product(m_tmp_no_1, &
    6308             :                                      m_tmp_no_2, &
    6309             :                                      m_tmp_no_3, &
    6310           0 :                                      b_assume_value=1.0_dp)
    6311             :          CALL dbcsr_hadamard_product(m_tmp_no_3, &
    6312             :                                      m_quench_t, &
    6313           0 :                                      m_grad_out)
    6314             :       ELSE ! simply copy
    6315             :          CALL dbcsr_hadamard_product(m_tmp_no_1, &
    6316             :                                      m_quench_t, &
    6317        1474 :                                      m_grad_out)
    6318             :       END IF
    6319        1474 :       CALL dbcsr_filter(m_grad_out, eps_filter)
    6320             : 
    6321        1474 :       CALL dbcsr_release(m_tmp_no_1)
    6322        1474 :       CALL dbcsr_release(m_tmp_no_2)
    6323        1474 :       CALL dbcsr_release(m_tmp_no_3)
    6324        1474 :       CALL dbcsr_release(m_tmp_oo_1)
    6325        1474 :       CALL dbcsr_release(m_tmp_oo_2)
    6326        1474 :       CALL dbcsr_release(tempNOcc1)
    6327        1474 :       CALL dbcsr_release(tempOccOcc1)
    6328        1474 :       CALL dbcsr_release(temp1)
    6329        1474 :       CALL dbcsr_release(temp2)
    6330             : 
    6331        1474 :       CALL timestop(handle)
    6332             : 
    6333        2948 :    END SUBROUTINE compute_gradient
    6334             : 
    6335             : ! *****************************************************************************
    6336             : !> \brief Serial code that prints matrices readable by Mathematica
    6337             : !> \param matrix - matrix to print
    6338             : !> \param filename ...
    6339             : !> \par History
    6340             : !>       2015.05 created [Rustam Z. Khaliullin]
    6341             : !> \author Rustam Z. Khaliullin
    6342             : ! **************************************************************************************************
    6343           0 :    SUBROUTINE print_mathematica_matrix(matrix, filename)
    6344             : 
    6345             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix
    6346             :       CHARACTER(len=*), INTENT(IN)                       :: filename
    6347             : 
    6348             :       CHARACTER(len=*), PARAMETER :: routineN = 'print_mathematica_matrix'
    6349             : 
    6350             :       CHARACTER(LEN=20)                                  :: formatstr, Scols
    6351             :       INTEGER                                            :: col, fiunit, handle, hori_offset, jj, &
    6352             :                                                             nblkcols_tot, nblkrows_tot, Ncols, &
    6353             :                                                             ncores, Nrows, row, unit_nr, &
    6354             :                                                             vert_offset
    6355           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ao_block_sizes, mo_block_sizes
    6356           0 :       INTEGER, DIMENSION(:), POINTER                     :: ao_blk_sizes, mo_blk_sizes
    6357             :       LOGICAL                                            :: found
    6358           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: H
    6359           0 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
    6360             :       TYPE(cp_logger_type), POINTER                      :: logger
    6361             :       TYPE(dbcsr_distribution_type)                      :: dist
    6362             :       TYPE(dbcsr_type)                                   :: matrix_asym
    6363             : 
    6364           0 :       CALL timeset(routineN, handle)
    6365             : 
    6366             :       ! get a useful output_unit
    6367           0 :       logger => cp_get_default_logger()
    6368           0 :       IF (logger%para_env%is_source()) THEN
    6369           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    6370             :       ELSE
    6371             :          unit_nr = -1
    6372             :       END IF
    6373             : 
    6374             :       ! serial code only
    6375           0 :       CALL dbcsr_get_info(matrix, distribution=dist)
    6376           0 :       CALL dbcsr_distribution_get(dist, numnodes=ncores)
    6377           0 :       IF (ncores .GT. 1) THEN
    6378           0 :          CPABORT("mathematica files: serial code only")
    6379             :       END IF
    6380             : 
    6381           0 :       nblkrows_tot = dbcsr_nblkrows_total(matrix)
    6382           0 :       nblkcols_tot = dbcsr_nblkcols_total(matrix)
    6383           0 :       CPASSERT(nblkrows_tot == nblkcols_tot)
    6384           0 :       CALL dbcsr_get_info(matrix, row_blk_size=ao_blk_sizes)
    6385           0 :       CALL dbcsr_get_info(matrix, col_blk_size=mo_blk_sizes)
    6386           0 :       ALLOCATE (mo_block_sizes(nblkcols_tot), ao_block_sizes(nblkcols_tot))
    6387           0 :       mo_block_sizes(:) = mo_blk_sizes(:)
    6388           0 :       ao_block_sizes(:) = ao_blk_sizes(:)
    6389             : 
    6390             :       CALL dbcsr_create(matrix_asym, &
    6391             :                         template=matrix, &
    6392           0 :                         matrix_type=dbcsr_type_no_symmetry)
    6393           0 :       CALL dbcsr_desymmetrize(matrix, matrix_asym)
    6394             : 
    6395           0 :       Ncols = SUM(mo_block_sizes)
    6396           0 :       Nrows = SUM(ao_block_sizes)
    6397           0 :       ALLOCATE (H(Nrows, Ncols))
    6398           0 :       H(:, :) = 0.0_dp
    6399             : 
    6400           0 :       hori_offset = 0
    6401           0 :       DO col = 1, nblkcols_tot
    6402             : 
    6403           0 :          vert_offset = 0
    6404           0 :          DO row = 1, nblkrows_tot
    6405             : 
    6406           0 :             CALL dbcsr_get_block_p(matrix_asym, row, col, block_p, found)
    6407           0 :             IF (found) THEN
    6408             : 
    6409             :                H(vert_offset + 1:vert_offset + ao_block_sizes(row), &
    6410             :                  hori_offset + 1:hori_offset + mo_block_sizes(col)) &
    6411           0 :                   = block_p(:, :)
    6412             : 
    6413             :             END IF
    6414             : 
    6415           0 :             vert_offset = vert_offset + ao_block_sizes(row)
    6416             : 
    6417             :          END DO
    6418             : 
    6419           0 :          hori_offset = hori_offset + mo_block_sizes(col)
    6420             : 
    6421             :       END DO ! loop over electron blocks
    6422             : 
    6423           0 :       CALL dbcsr_release(matrix_asym)
    6424             : 
    6425           0 :       IF (unit_nr > 0) THEN
    6426           0 :          CALL open_file(filename, unit_number=fiunit, file_status='REPLACE')
    6427           0 :          WRITE (Scols, "(I10)") Ncols
    6428           0 :          formatstr = "("//TRIM(Scols)//"E27.17)"
    6429           0 :          DO jj = 1, Nrows
    6430           0 :             WRITE (fiunit, formatstr) H(jj, :)
    6431             :          END DO
    6432           0 :          CALL close_file(fiunit)
    6433             :       END IF
    6434             : 
    6435           0 :       DEALLOCATE (mo_block_sizes)
    6436           0 :       DEALLOCATE (ao_block_sizes)
    6437           0 :       DEALLOCATE (H)
    6438             : 
    6439           0 :       CALL timestop(handle)
    6440             : 
    6441           0 :    END SUBROUTINE print_mathematica_matrix
    6442             : 
    6443             : ! *****************************************************************************
    6444             : !> \brief Compute the objective functional of NLMOs
    6445             : !> \param localization_obj_function_ispin ...
    6446             : !> \param penalty_func_ispin ...
    6447             : !> \param penalty_vol_prefactor ...
    6448             : !> \param overlap_determinant ...
    6449             : !> \param m_sigma ...
    6450             : !> \param nocc ...
    6451             : !> \param m_B0 ...
    6452             : !> \param m_theta_normalized ...
    6453             : !> \param template_matrix_mo ...
    6454             : !> \param weights ...
    6455             : !> \param m_S0 ...
    6456             : !> \param just_started ...
    6457             : !> \param penalty_amplitude ...
    6458             : !> \param eps_filter ...
    6459             : !> \par History
    6460             : !>       2020.01 created [Ziling Luo]
    6461             : !> \author Ziling Luo
    6462             : ! **************************************************************************************************
    6463          82 :    SUBROUTINE compute_obj_nlmos(localization_obj_function_ispin, penalty_func_ispin, &
    6464          82 :                                 penalty_vol_prefactor, overlap_determinant, m_sigma, nocc, m_B0, &
    6465          82 :                                 m_theta_normalized, template_matrix_mo, weights, m_S0, just_started, &
    6466             :                                 penalty_amplitude, eps_filter)
    6467             : 
    6468             :       REAL(KIND=dp), INTENT(INOUT) :: localization_obj_function_ispin, penalty_func_ispin, &
    6469             :          penalty_vol_prefactor, overlap_determinant
    6470             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_sigma
    6471             :       INTEGER, INTENT(IN)                                :: nocc
    6472             :       TYPE(dbcsr_type), DIMENSION(:, :), INTENT(IN)      :: m_B0
    6473             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_theta_normalized, template_matrix_mo
    6474             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: weights
    6475             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_S0
    6476             :       LOGICAL, INTENT(IN)                                :: just_started
    6477             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_amplitude, eps_filter
    6478             : 
    6479             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_obj_nlmos'
    6480             : 
    6481             :       INTEGER                                            :: handle, idim0, ielem, para_group_handle, &
    6482             :                                                             reim
    6483             :       REAL(KIND=dp)                                      :: det1, fval
    6484          82 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: reim_diag, z2
    6485             :       TYPE(dbcsr_type)                                   :: tempNOcc1, tempOccOcc1, tempOccOcc2
    6486             :       TYPE(mp_comm_type)                                 :: para_group
    6487             : 
    6488          82 :       CALL timeset(routineN, handle)
    6489             : 
    6490             :       CALL dbcsr_create(tempNOcc1, &
    6491             :                         template=template_matrix_mo, &
    6492          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6493             :       CALL dbcsr_create(tempOccOcc1, &
    6494             :                         template=m_theta_normalized, &
    6495          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6496             :       CALL dbcsr_create(tempOccOcc2, &
    6497             :                         template=m_theta_normalized, &
    6498          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6499             : 
    6500          82 :       localization_obj_function_ispin = 0.0_dp
    6501          82 :       penalty_func_ispin = 0.0_dp
    6502         246 :       ALLOCATE (z2(nocc))
    6503         246 :       ALLOCATE (reim_diag(nocc))
    6504             : 
    6505          82 :       CALL dbcsr_get_info(tempOccOcc2, group=para_group_handle)
    6506          82 :       CALL para_group%set_handle(para_group_handle)
    6507             : 
    6508         842 :       DO idim0 = 1, SIZE(m_B0, 2) ! this loop is over miller ind
    6509             : 
    6510       12608 :          z2(:) = 0.0_dp
    6511             : 
    6512        1520 :          DO reim = 1, SIZE(m_B0, 1) ! this loop is over Re/Im
    6513             : 
    6514             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6515             :                                 m_B0(reim, idim0), &
    6516             :                                 m_theta_normalized, &
    6517             :                                 0.0_dp, tempOccOcc1, &
    6518         760 :                                 filter_eps=eps_filter)
    6519         760 :             CALL dbcsr_set(tempOccOcc2, 0.0_dp)
    6520         760 :             CALL dbcsr_add_on_diag(tempOccOcc2, 1.0_dp)
    6521             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6522             :                                 m_theta_normalized, &
    6523             :                                 tempOccOcc1, &
    6524             :                                 0.0_dp, tempOccOcc2, &
    6525         760 :                                 retain_sparsity=.TRUE.)
    6526             : 
    6527       12608 :             reim_diag = 0.0_dp
    6528         760 :             CALL dbcsr_get_diag(tempOccOcc2, reim_diag)
    6529         760 :             CALL para_group%sum(reim_diag)
    6530       13368 :             z2(:) = z2(:) + reim_diag(:)*reim_diag(:)
    6531             : 
    6532             :          END DO
    6533             : 
    6534       12690 :          DO ielem = 1, nocc
    6535             :             SELECT CASE (2) ! allows for selection of different spread functionals
    6536             :             CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6537       11848 :                fval = -weights(idim0)*LOG(ABS(z2(ielem)))
    6538             :             CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6539       11848 :                fval = weights(idim0) - weights(idim0)*ABS(z2(ielem))
    6540             :             CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6541             :                fval = weights(idim0) - weights(idim0)*SQRT(ABS(z2(ielem)))
    6542             :             END SELECT
    6543       12608 :             localization_obj_function_ispin = localization_obj_function_ispin + fval
    6544             :          END DO
    6545             : 
    6546             :       END DO ! end loop over idim0
    6547             : 
    6548          82 :       DEALLOCATE (z2)
    6549          82 :       DEALLOCATE (reim_diag)
    6550             : 
    6551             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6552             :                           m_S0, &
    6553             :                           m_theta_normalized, &
    6554             :                           0.0_dp, tempOccOcc1, &
    6555          82 :                           filter_eps=eps_filter)
    6556             :       ! compute current sigma
    6557             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6558             :                           m_theta_normalized, &
    6559             :                           tempOccOcc1, &
    6560             :                           0.0_dp, m_sigma, &
    6561          82 :                           filter_eps=eps_filter)
    6562             : 
    6563             :       CALL determinant(m_sigma, det1, &
    6564          82 :                        eps_filter)
    6565             :       ! save the current determinant
    6566          82 :       overlap_determinant = det1
    6567             : 
    6568          82 :       IF (just_started .AND. penalty_amplitude .LT. 0.0_dp) THEN
    6569           4 :          penalty_vol_prefactor = -(-penalty_amplitude)*localization_obj_function_ispin
    6570             :       END IF
    6571          82 :       penalty_func_ispin = penalty_func_ispin + penalty_vol_prefactor*LOG(det1)
    6572             : 
    6573          82 :       CALL dbcsr_release(tempNOcc1)
    6574          82 :       CALL dbcsr_release(tempOccOcc1)
    6575          82 :       CALL dbcsr_release(tempOccOcc2)
    6576             : 
    6577          82 :       CALL timestop(handle)
    6578             : 
    6579         164 :    END SUBROUTINE compute_obj_nlmos
    6580             : 
    6581             : ! *****************************************************************************
    6582             : !> \brief Compute the gradient wrt the main variable
    6583             : !> \param m_grad_out ...
    6584             : !> \param m_B0 ...
    6585             : !> \param weights ...
    6586             : !> \param m_S0 ...
    6587             : !> \param m_theta_normalized ...
    6588             : !> \param m_siginv ...
    6589             : !> \param m_sig_sqrti_ii ...
    6590             : !> \param penalty_vol_prefactor ...
    6591             : !> \param eps_filter ...
    6592             : !> \param suggested_vol_penalty ...
    6593             : !> \par History
    6594             : !>       2018.10 created [Ziling Luo]
    6595             : !> \author Ziling Luo
    6596             : ! **************************************************************************************************
    6597          82 :    SUBROUTINE compute_gradient_nlmos(m_grad_out, m_B0, weights, &
    6598             :                                      m_S0, m_theta_normalized, m_siginv, m_sig_sqrti_ii, &
    6599             :                                      penalty_vol_prefactor, eps_filter, suggested_vol_penalty)
    6600             : 
    6601             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_grad_out
    6602             :       TYPE(dbcsr_type), DIMENSION(:, :), INTENT(IN)      :: m_B0
    6603             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: weights
    6604             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_S0, m_theta_normalized, m_siginv, &
    6605             :                                                             m_sig_sqrti_ii
    6606             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_vol_prefactor, eps_filter
    6607             :       REAL(KIND=dp), INTENT(INOUT)                       :: suggested_vol_penalty
    6608             : 
    6609             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_gradient_nlmos'
    6610             : 
    6611             :       INTEGER                                            :: dim0, handle, idim0, reim
    6612             :       REAL(KIND=dp)                                      :: norm_loc, norm_vol
    6613             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal, z2
    6614             :       TYPE(dbcsr_type)                                   :: m_temp_oo_1, m_temp_oo_2, m_temp_oo_3, &
    6615             :                                                             m_temp_oo_4
    6616             : 
    6617          82 :       CALL timeset(routineN, handle)
    6618             : 
    6619             :       CALL dbcsr_create(m_temp_oo_1, &
    6620             :                         template=m_theta_normalized, &
    6621          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6622             :       CALL dbcsr_create(m_temp_oo_2, &
    6623             :                         template=m_theta_normalized, &
    6624          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6625             :       CALL dbcsr_create(m_temp_oo_3, &
    6626             :                         template=m_theta_normalized, &
    6627          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6628             :       CALL dbcsr_create(m_temp_oo_4, &
    6629             :                         template=m_theta_normalized, &
    6630          82 :                         matrix_type=dbcsr_type_no_symmetry)
    6631             : 
    6632          82 :       CALL dbcsr_get_info(m_siginv, nfullrows_total=dim0)
    6633         246 :       ALLOCATE (tg_diagonal(dim0))
    6634         246 :       ALLOCATE (z2(dim0))
    6635          82 :       CALL dbcsr_set(m_temp_oo_1, 0.0_dp) ! accumulate the gradient wrt a_norm here
    6636             : 
    6637             :       ! do d_Omega/d_a_normalized first
    6638         842 :       DO idim0 = 1, SIZE(m_B0, 2) ! this loop is over miller ind
    6639             : 
    6640       12608 :          z2(:) = 0.0_dp
    6641         760 :          CALL dbcsr_set(m_temp_oo_2, 0.0_dp) ! accumulate index gradient here
    6642        1520 :          DO reim = 1, SIZE(m_B0, 1) ! this loop is over Re/Im
    6643             : 
    6644             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6645             :                                 m_B0(reim, idim0), &
    6646             :                                 m_theta_normalized, &
    6647             :                                 0.0_dp, m_temp_oo_3, &
    6648         760 :                                 filter_eps=eps_filter)
    6649             : 
    6650             :             ! result contain Re/Im part of Z for the current Miller index
    6651             :             ! warning - save time by computing only the diagonal elements
    6652             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6653             :                                 m_theta_normalized, &
    6654             :                                 m_temp_oo_3, &
    6655             :                                 0.0_dp, m_temp_oo_4, &
    6656         760 :                                 filter_eps=eps_filter)
    6657             : 
    6658       12608 :             tg_diagonal(:) = 0.0_dp
    6659         760 :             CALL dbcsr_get_diag(m_temp_oo_4, tg_diagonal)
    6660         760 :             CALL dbcsr_set(m_temp_oo_4, 0.0_dp)
    6661         760 :             CALL dbcsr_set_diag(m_temp_oo_4, tg_diagonal)
    6662             :             !CALL para_group%sum(tg_diagonal)
    6663       12608 :             z2(:) = z2(:) + tg_diagonal(:)*tg_diagonal(:)
    6664             : 
    6665             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6666             :                                 m_temp_oo_3, &
    6667             :                                 m_temp_oo_4, &
    6668             :                                 1.0_dp, m_temp_oo_2, &
    6669        1520 :                                 filter_eps=eps_filter)
    6670             : 
    6671             :          END DO
    6672             : 
    6673             :          ! TODO: because some elements are zeros on some MPI tasks the
    6674             :          ! gradient evaluation will fail for CASE 1 and 3
    6675             :          SELECT CASE (2) ! allows for selection of different spread functionals
    6676             :          CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6677             :             z2(:) = -weights(idim0)/z2(:)
    6678             :          CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6679       12608 :             z2(:) = -weights(idim0)
    6680             :          CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6681             :             z2(:) = -weights(idim0)/(2*SQRT(z2(:)))
    6682             :          END SELECT
    6683         760 :          CALL dbcsr_set(m_temp_oo_3, 0.0_dp)
    6684         760 :          CALL dbcsr_set_diag(m_temp_oo_3, z2)
    6685             :          ! TODO: print this matrix to make sure its block structure is fine
    6686             :          ! and there are no unecessary elements
    6687             : 
    6688             :          CALL dbcsr_multiply("N", "N", 4.0_dp, &
    6689             :                              m_temp_oo_2, &
    6690             :                              m_temp_oo_3, &
    6691             :                              1.0_dp, m_temp_oo_1, &
    6692         842 :                              filter_eps=eps_filter)
    6693             : 
    6694             :       END DO ! end loop over idim0
    6695          82 :       DEALLOCATE (z2)
    6696             : 
    6697             :       ! sigma0.a_norm is necessary for the volume penalty and normalization
    6698             :       CALL dbcsr_multiply("N", "N", &
    6699             :                           1.0_dp, &
    6700             :                           m_S0, &
    6701             :                           m_theta_normalized, &
    6702             :                           0.0_dp, m_temp_oo_2, &
    6703          82 :                           filter_eps=eps_filter)
    6704             : 
    6705             :       ! add gradient of the penalty functional log[det(sigma)]
    6706             :       ! G = 2*prefactor*sigma0.a_norm.sigma_inv
    6707             :       CALL dbcsr_multiply("N", "N", &
    6708             :                           1.0_dp, &
    6709             :                           m_temp_oo_2, &
    6710             :                           m_siginv, &
    6711             :                           0.0_dp, m_temp_oo_3, &
    6712          82 :                           filter_eps=eps_filter)
    6713             :       CALL dbcsr_norm(m_temp_oo_3, &
    6714          82 :                       dbcsr_norm_maxabsnorm, norm_scalar=norm_vol)
    6715             :       CALL dbcsr_norm(m_temp_oo_1, &
    6716          82 :                       dbcsr_norm_maxabsnorm, norm_scalar=norm_loc)
    6717          82 :       suggested_vol_penalty = norm_loc/norm_vol
    6718             :       CALL dbcsr_add(m_temp_oo_1, m_temp_oo_3, &
    6719          82 :                      1.0_dp, 2.0_dp*penalty_vol_prefactor)
    6720             : 
    6721             :       ! take into account the factor from the normalization constraint
    6722             :       ! G = ( G - sigma0.a_norm.[tr(a_norm).G]_ii ) . [sig_sqrti]_ii
    6723             :       ! 1. get G.[sig_sqrti]_ii
    6724             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6725             :                           m_temp_oo_1, &
    6726             :                           m_sig_sqrti_ii, &
    6727             :                           0.0_dp, m_grad_out, &
    6728          82 :                           filter_eps=eps_filter)
    6729             : 
    6730             :       ! 2. get [tr(a_norm).G]_ii
    6731             :       ! it is possible to save time by computing only the diagonal elements
    6732             :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6733             :                           m_theta_normalized, &
    6734             :                           m_temp_oo_1, &
    6735             :                           0.0_dp, m_temp_oo_3, &
    6736          82 :                           filter_eps=eps_filter)
    6737          82 :       CALL dbcsr_get_diag(m_temp_oo_3, tg_diagonal)
    6738          82 :       CALL dbcsr_set(m_temp_oo_3, 0.0_dp)
    6739          82 :       CALL dbcsr_set_diag(m_temp_oo_3, tg_diagonal)
    6740             : 
    6741             :       ! 3. [X]_ii . [sig_sqrti]_ii
    6742             :       ! it is possible to save time by computing only the diagonal elements
    6743             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6744             :                           m_sig_sqrti_ii, &
    6745             :                           m_temp_oo_3, &
    6746             :                           0.0_dp, m_temp_oo_1, &
    6747          82 :                           filter_eps=eps_filter)
    6748             :       ! 4. (sigma0*a_norm) .[X]_ii
    6749             :       CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6750             :                           m_temp_oo_2, &
    6751             :                           m_temp_oo_1, &
    6752             :                           1.0_dp, m_grad_out, &
    6753          82 :                           filter_eps=eps_filter)
    6754             : 
    6755          82 :       DEALLOCATE (tg_diagonal)
    6756          82 :       CALL dbcsr_release(m_temp_oo_1)
    6757          82 :       CALL dbcsr_release(m_temp_oo_2)
    6758          82 :       CALL dbcsr_release(m_temp_oo_3)
    6759          82 :       CALL dbcsr_release(m_temp_oo_4)
    6760             : 
    6761          82 :       CALL timestop(handle)
    6762             : 
    6763         164 :    END SUBROUTINE compute_gradient_nlmos
    6764             : 
    6765             : ! *****************************************************************************
    6766             : !> \brief Compute MO coeffs from the main optimized variable (e.g. Theta, X)
    6767             : !> \param m_var_in ...
    6768             : !> \param m_t_out ...
    6769             : !> \param m_quench_t ...
    6770             : !> \param m_t0 ...
    6771             : !> \param m_oo_template ...
    6772             : !> \param m_STsiginv0 ...
    6773             : !> \param m_s ...
    6774             : !> \param m_sig_sqrti_ii_out ...
    6775             : !> \param domain_r_down ...
    6776             : !> \param domain_s_inv ...
    6777             : !> \param domain_map ...
    6778             : !> \param cpu_of_domain ...
    6779             : !> \param assume_t0_q0x ...
    6780             : !> \param just_started ...
    6781             : !> \param optimize_theta ...
    6782             : !> \param normalize_orbitals ...
    6783             : !> \param envelope_amplitude ...
    6784             : !> \param eps_filter ...
    6785             : !> \param special_case ...
    6786             : !> \param nocc_of_domain ...
    6787             : !> \param order_lanczos ...
    6788             : !> \param eps_lanczos ...
    6789             : !> \param max_iter_lanczos ...
    6790             : !> \par History
    6791             : !>       2015.03 created [Rustam Z Khaliullin]
    6792             : !> \author Rustam Z Khaliullin
    6793             : ! **************************************************************************************************
    6794        2948 :    SUBROUTINE compute_xalmos_from_main_var(m_var_in, m_t_out, m_quench_t, &
    6795        1474 :                                            m_t0, m_oo_template, m_STsiginv0, m_s, m_sig_sqrti_ii_out, domain_r_down, &
    6796        1474 :                                            domain_s_inv, domain_map, cpu_of_domain, assume_t0_q0x, just_started, &
    6797             :                                            optimize_theta, normalize_orbitals, envelope_amplitude, eps_filter, &
    6798        1474 :                                            special_case, nocc_of_domain, order_lanczos, eps_lanczos, max_iter_lanczos)
    6799             : 
    6800             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_var_in
    6801             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_t_out
    6802             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_quench_t, m_t0, m_oo_template, &
    6803             :                                                             m_STsiginv0, m_s
    6804             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_sig_sqrti_ii_out
    6805             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6806             :          INTENT(IN)                                      :: domain_r_down, domain_s_inv
    6807             :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    6808             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    6809             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, just_started, &
    6810             :                                                             optimize_theta, normalize_orbitals
    6811             :       REAL(KIND=dp), INTENT(IN)                          :: envelope_amplitude, eps_filter
    6812             :       INTEGER, INTENT(IN)                                :: special_case
    6813             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: nocc_of_domain
    6814             :       INTEGER, INTENT(IN)                                :: order_lanczos
    6815             :       REAL(KIND=dp), INTENT(IN)                          :: eps_lanczos
    6816             :       INTEGER, INTENT(IN)                                :: max_iter_lanczos
    6817             : 
    6818             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_xalmos_from_main_var'
    6819             : 
    6820             :       INTEGER                                            :: handle, unit_nr
    6821             :       REAL(KIND=dp)                                      :: t_norm
    6822             :       TYPE(cp_logger_type), POINTER                      :: logger
    6823             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_oo_1
    6824             : 
    6825        1474 :       CALL timeset(routineN, handle)
    6826             : 
    6827             :       ! get a useful output_unit
    6828        1474 :       logger => cp_get_default_logger()
    6829        1474 :       IF (logger%para_env%is_source()) THEN
    6830         737 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    6831             :       ELSE
    6832             :          unit_nr = -1
    6833             :       END IF
    6834             : 
    6835             :       CALL dbcsr_create(m_tmp_no_1, &
    6836             :                         template=m_quench_t, &
    6837        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6838             :       CALL dbcsr_create(m_tmp_oo_1, &
    6839             :                         template=m_oo_template, &
    6840        1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6841             : 
    6842        1474 :       CALL dbcsr_copy(m_tmp_no_1, m_var_in)
    6843        1474 :       IF (optimize_theta) THEN
    6844             :          ! check that all MO coefficients of the guess are less
    6845             :          ! than the maximum allowed amplitude
    6846             :          CALL dbcsr_norm(m_tmp_no_1, &
    6847           0 :                          dbcsr_norm_maxabsnorm, norm_scalar=t_norm)
    6848           0 :          IF (unit_nr > 0) THEN
    6849           0 :             WRITE (unit_nr, *) "Maximum norm of the initial guess: ", t_norm
    6850           0 :             WRITE (unit_nr, *) "Maximum allowed amplitude: ", &
    6851           0 :                envelope_amplitude
    6852             :          END IF
    6853           0 :          IF (t_norm .GT. envelope_amplitude .AND. just_started) THEN
    6854           0 :             CPABORT("Max norm of the initial guess is too large")
    6855             :          END IF
    6856             :          ! use artanh to tame MOs
    6857             :          CALL dbcsr_function_of_elements(m_tmp_no_1, &
    6858             :                                          func=dbcsr_func_tanh, &
    6859             :                                          a0=0.0_dp, &
    6860           0 :                                          a1=1.0_dp/envelope_amplitude)
    6861             :          CALL dbcsr_scale(m_tmp_no_1, &
    6862           0 :                           envelope_amplitude)
    6863             :       END IF
    6864             :       CALL dbcsr_hadamard_product(m_tmp_no_1, m_quench_t, &
    6865        1474 :                                   m_t_out)
    6866             : 
    6867             :       ! project out R_0
    6868        1474 :       IF (assume_t0_q0x) THEN
    6869         466 :          IF (special_case .EQ. xalmo_case_fully_deloc) THEN
    6870             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6871             :                                 m_STsiginv0, &
    6872             :                                 m_t_out, &
    6873             :                                 0.0_dp, m_tmp_oo_1, &
    6874         160 :                                 filter_eps=eps_filter)
    6875             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6876             :                                 m_t0, &
    6877             :                                 m_tmp_oo_1, &
    6878             :                                 1.0_dp, m_t_out, &
    6879         160 :                                 filter_eps=eps_filter)
    6880         306 :          ELSE IF (special_case .EQ. xalmo_case_block_diag) THEN
    6881           0 :             CPABORT("cannot use projector with block-daigonal ALMOs")
    6882             :          ELSE
    6883             :             ! no special case
    6884             :             CALL apply_domain_operators( &
    6885             :                matrix_in=m_t_out, &
    6886             :                matrix_out=m_tmp_no_1, &
    6887             :                operator1=domain_r_down, &
    6888             :                operator2=domain_s_inv, &
    6889             :                dpattern=m_quench_t, &
    6890             :                map=domain_map, &
    6891             :                node_of_domain=cpu_of_domain, &
    6892             :                my_action=1, &
    6893             :                filter_eps=eps_filter, &
    6894         306 :                use_trimmer=.FALSE.)
    6895             :             CALL dbcsr_copy(m_t_out, &
    6896         306 :                             m_tmp_no_1)
    6897             :          END IF ! special case
    6898             :          CALL dbcsr_add(m_t_out, &
    6899         466 :                         m_t0, 1.0_dp, 1.0_dp)
    6900             :       END IF
    6901             : 
    6902        1474 :       IF (normalize_orbitals) THEN
    6903             :          CALL orthogonalize_mos( &
    6904             :             ket=m_t_out, &
    6905             :             overlap=m_tmp_oo_1, &
    6906             :             metric=m_s, &
    6907             :             retain_locality=.TRUE., &
    6908             :             only_normalize=.TRUE., &
    6909             :             nocc_of_domain=nocc_of_domain(:), &
    6910             :             eps_filter=eps_filter, &
    6911             :             order_lanczos=order_lanczos, &
    6912             :             eps_lanczos=eps_lanczos, &
    6913             :             max_iter_lanczos=max_iter_lanczos, &
    6914           0 :             overlap_sqrti=m_sig_sqrti_ii_out)
    6915             :       END IF
    6916             : 
    6917        1474 :       CALL dbcsr_filter(m_t_out, eps_filter)
    6918             : 
    6919        1474 :       CALL dbcsr_release(m_tmp_no_1)
    6920        1474 :       CALL dbcsr_release(m_tmp_oo_1)
    6921             : 
    6922        1474 :       CALL timestop(handle)
    6923             : 
    6924        1474 :    END SUBROUTINE compute_xalmos_from_main_var
    6925             : 
    6926             : ! *****************************************************************************
    6927             : !> \brief Compute the preconditioner matrices and invert them if necessary
    6928             : !> \param domain_prec_out ...
    6929             : !> \param m_prec_out ...
    6930             : !> \param m_ks ...
    6931             : !> \param m_s ...
    6932             : !> \param m_siginv ...
    6933             : !> \param m_quench_t ...
    6934             : !> \param m_FTsiginv ...
    6935             : !> \param m_siginvTFTsiginv ...
    6936             : !> \param m_ST ...
    6937             : !> \param m_STsiginv_out ...
    6938             : !> \param m_s_vv_out ...
    6939             : !> \param m_f_vv_out ...
    6940             : !> \param para_env ...
    6941             : !> \param blacs_env ...
    6942             : !> \param nocc_of_domain ...
    6943             : !> \param domain_s_inv ...
    6944             : !> \param domain_s_inv_half ...
    6945             : !> \param domain_s_half ...
    6946             : !> \param domain_r_down ...
    6947             : !> \param cpu_of_domain ...
    6948             : !> \param domain_map ...
    6949             : !> \param assume_t0_q0x ...
    6950             : !> \param penalty_occ_vol ...
    6951             : !> \param penalty_occ_vol_prefactor ...
    6952             : !> \param eps_filter ...
    6953             : !> \param neg_thr ...
    6954             : !> \param spin_factor ...
    6955             : !> \param special_case ...
    6956             : !> \param bad_modes_projector_down_out ...
    6957             : !> \param skip_inversion ...
    6958             : !> \par History
    6959             : !>       2015.03 created [Rustam Z Khaliullin]
    6960             : !> \author Rustam Z Khaliullin
    6961             : ! **************************************************************************************************
    6962        1500 :    SUBROUTINE compute_preconditioner(domain_prec_out, m_prec_out, m_ks, m_s, &
    6963             :                                      m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, &
    6964             :                                      m_STsiginv_out, m_s_vv_out, m_f_vv_out, para_env, &
    6965        1000 :                                      blacs_env, nocc_of_domain, domain_s_inv, domain_s_inv_half, domain_s_half, &
    6966         500 :                                      domain_r_down, cpu_of_domain, &
    6967             :                                      domain_map, assume_t0_q0x, penalty_occ_vol, penalty_occ_vol_prefactor, &
    6968         500 :                                      eps_filter, neg_thr, spin_factor, special_case, bad_modes_projector_down_out, &
    6969             :                                      skip_inversion)
    6970             : 
    6971             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6972             :          INTENT(INOUT)                                   :: domain_prec_out
    6973             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_prec_out, m_ks, m_s
    6974             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_siginv, m_quench_t, m_FTsiginv, &
    6975             :                                                             m_siginvTFTsiginv, m_ST
    6976             :       TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL          :: m_STsiginv_out, m_s_vv_out, m_f_vv_out
    6977             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    6978             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    6979             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: nocc_of_domain
    6980             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6981             :          INTENT(IN)                                      :: domain_s_inv
    6982             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6983             :          INTENT(IN), OPTIONAL                            :: domain_s_inv_half, domain_s_half
    6984             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6985             :          INTENT(IN)                                      :: domain_r_down
    6986             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    6987             :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    6988             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, penalty_occ_vol
    6989             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, eps_filter, &
    6990             :                                                             neg_thr, spin_factor
    6991             :       INTEGER, INTENT(IN)                                :: special_case
    6992             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6993             :          INTENT(INOUT), OPTIONAL                         :: bad_modes_projector_down_out
    6994             :       LOGICAL, INTENT(IN)                                :: skip_inversion
    6995             : 
    6996             :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_preconditioner'
    6997             : 
    6998             :       INTEGER                                            :: handle, ndim, precond_domain_projector
    6999         500 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: nn_diagonal
    7000             :       TYPE(dbcsr_type)                                   :: m_tmp_nn_1, m_tmp_no_3
    7001             : 
    7002         500 :       CALL timeset(routineN, handle)
    7003             : 
    7004             :       CALL dbcsr_create(m_tmp_nn_1, &
    7005             :                         template=m_s, &
    7006         500 :                         matrix_type=dbcsr_type_no_symmetry)
    7007             :       CALL dbcsr_create(m_tmp_no_3, &
    7008             :                         template=m_quench_t, &
    7009         500 :                         matrix_type=dbcsr_type_no_symmetry)
    7010             : 
    7011             :       ! calculate (1-R)F(1-R) and S-SRS
    7012             :       ! RZK-warning take advantage: some elements will be removed by the quencher
    7013             :       ! RZK-warning S operations can be performed outside the spin loop to save time
    7014             :       ! IT IS REQUIRED THAT PRECONDITIONER DOES NOT BREAK THE LOCALITY!!!!
    7015             :       ! RZK-warning: further optimization is ABSOLUTELY NECESSARY
    7016             : 
    7017             :       ! First S-SRS
    7018             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7019             :                           m_ST, &
    7020             :                           m_siginv, &
    7021             :                           0.0_dp, m_tmp_no_3, &
    7022         500 :                           filter_eps=eps_filter)
    7023         500 :       CALL dbcsr_desymmetrize(m_s, m_tmp_nn_1)
    7024             :       ! return STsiginv if necessary
    7025         500 :       IF (PRESENT(m_STsiginv_out)) THEN
    7026           0 :          CALL dbcsr_copy(m_STsiginv_out, m_tmp_no_3)
    7027             :       END IF
    7028         500 :       IF (special_case .EQ. xalmo_case_fully_deloc) THEN
    7029             :          ! use S instead of S-SRS
    7030             :       ELSE
    7031             :          CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7032             :                              m_ST, &
    7033             :                              m_tmp_no_3, &
    7034             :                              1.0_dp, m_tmp_nn_1, &
    7035         456 :                              filter_eps=eps_filter)
    7036             :       END IF
    7037             :       ! return S_vv = (S or S-SRS) if necessary
    7038         500 :       IF (PRESENT(m_s_vv_out)) THEN
    7039           0 :          CALL dbcsr_copy(m_s_vv_out, m_tmp_nn_1)
    7040             :       END IF
    7041             : 
    7042             :       ! Second (1-R)F(1-R)
    7043             :       ! re-create matrix because desymmetrize is buggy -
    7044             :       ! it will create multiple copies of blocks
    7045         500 :       CALL dbcsr_desymmetrize(m_ks, m_prec_out)
    7046             :       CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7047             :                           m_FTsiginv, &
    7048             :                           m_ST, &
    7049             :                           1.0_dp, m_prec_out, &
    7050         500 :                           filter_eps=eps_filter)
    7051             :       CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7052             :                           m_ST, &
    7053             :                           m_FTsiginv, &
    7054             :                           1.0_dp, m_prec_out, &
    7055         500 :                           filter_eps=eps_filter)
    7056             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7057             :                           m_ST, &
    7058             :                           m_siginvTFTsiginv, &
    7059             :                           0.0_dp, m_tmp_no_3, &
    7060         500 :                           filter_eps=eps_filter)
    7061             :       CALL dbcsr_multiply("N", "T", 1.0_dp, &
    7062             :                           m_tmp_no_3, &
    7063             :                           m_ST, &
    7064             :                           1.0_dp, m_prec_out, &
    7065         500 :                           filter_eps=eps_filter)
    7066             :       ! return F_vv = (I-SR)F(I-RS) if necessary
    7067         500 :       IF (PRESENT(m_f_vv_out)) THEN
    7068           0 :          CALL dbcsr_copy(m_f_vv_out, m_prec_out)
    7069             :       END IF
    7070             : 
    7071             : #if 0
    7072             : !penalty_only=.TRUE.
    7073             :       WRITE (unit_nr, *) "prefactor0:", penalty_occ_vol_prefactor
    7074             :       !IF (penalty_occ_vol) THEN
    7075             :       CALL dbcsr_desymmetrize(m_s, &
    7076             :                               m_prec_out)
    7077             :       !CALL dbcsr_scale(m_prec_out,-penalty_occ_vol_prefactor)
    7078             :       !ENDIF
    7079             : #else
    7080             :       ! sum up the F_vv and S_vv terms
    7081             :       CALL dbcsr_add(m_prec_out, m_tmp_nn_1, &
    7082         500 :                      1.0_dp, 1.0_dp)
    7083             :       ! Scale to obtain unit step length
    7084         500 :       CALL dbcsr_scale(m_prec_out, 2.0_dp*spin_factor)
    7085             : 
    7086             :       ! add the contribution from the penalty on the occupied volume
    7087         500 :       IF (penalty_occ_vol) THEN
    7088             :          CALL dbcsr_add(m_prec_out, m_tmp_nn_1, &
    7089           0 :                         1.0_dp, penalty_occ_vol_prefactor)
    7090             :       END IF
    7091             : #endif
    7092             : 
    7093         500 :       CALL dbcsr_copy(m_tmp_nn_1, m_prec_out)
    7094             : 
    7095             :       ! invert using various algorithms
    7096         500 :       IF (special_case .EQ. xalmo_case_block_diag) THEN ! non-overlapping diagonal blocks
    7097             : 
    7098          96 :          IF (skip_inversion) THEN
    7099             : 
    7100             :             ! impose block-diagonal structure
    7101          92 :             CALL dbcsr_get_info(m_s, nfullrows_total=ndim)
    7102         276 :             ALLOCATE (nn_diagonal(ndim))
    7103          92 :             CALL dbcsr_get_diag(m_s, nn_diagonal)
    7104          92 :             CALL dbcsr_set(m_prec_out, 0.0_dp)
    7105          92 :             CALL dbcsr_set_diag(m_prec_out, nn_diagonal)
    7106          92 :             CALL dbcsr_filter(m_prec_out, eps_filter)
    7107          92 :             DEALLOCATE (nn_diagonal)
    7108             : 
    7109         184 :             CALL dbcsr_copy(m_prec_out, m_tmp_nn_1, keep_sparsity=.TRUE.)
    7110             : 
    7111             :          ELSE
    7112             : 
    7113             :             CALL pseudo_invert_diagonal_blk( &
    7114             :                matrix_in=m_tmp_nn_1, &
    7115             :                matrix_out=m_prec_out, &
    7116             :                nocc=nocc_of_domain(:) &
    7117           4 :                )
    7118             : 
    7119             :          END IF
    7120             : 
    7121         404 :       ELSE IF (special_case .EQ. xalmo_case_fully_deloc) THEN ! the entire system is a block
    7122             : 
    7123          44 :          IF (skip_inversion) THEN
    7124           0 :             CALL dbcsr_copy(m_prec_out, m_tmp_nn_1)
    7125             :          ELSE
    7126             : 
    7127             :             ! invert using cholesky (works with S matrix, will not work with S-SRS matrix)
    7128             :             CALL cp_dbcsr_cholesky_decompose(m_prec_out, &
    7129             :                                              para_env=para_env, &
    7130          44 :                                              blacs_env=blacs_env)
    7131             :             CALL cp_dbcsr_cholesky_invert(m_prec_out, &
    7132             :                                           para_env=para_env, &
    7133             :                                           blacs_env=blacs_env, &
    7134          44 :                                           upper_to_full=.TRUE.)
    7135             :          END IF !skip_inversion
    7136             : 
    7137          44 :          CALL dbcsr_filter(m_prec_out, eps_filter)
    7138             : 
    7139             :       ELSE
    7140             : 
    7141             :          !!! use a true domain preconditioner with overlapping domains
    7142         360 :          IF (assume_t0_q0x) THEN
    7143          26 :             precond_domain_projector = -1
    7144             :          ELSE
    7145         334 :             precond_domain_projector = 0
    7146             :          END IF
    7147             :          !! RZK-warning: use PRESENT to make two nearly-identical calls
    7148             :          !! this is done because intel compiler does not seem to conform
    7149             :          !! to the FORTRAN standard for passing through optional arguments
    7150         360 :          IF (PRESENT(bad_modes_projector_down_out)) THEN
    7151             :             CALL construct_domain_preconditioner( &
    7152             :                matrix_main=m_tmp_nn_1, &
    7153             :                subm_s_inv=domain_s_inv(:), &
    7154             :                subm_s_inv_half=domain_s_inv_half(:), &
    7155             :                subm_s_half=domain_s_half(:), &
    7156             :                subm_r_down=domain_r_down(:), &
    7157             :                matrix_trimmer=m_quench_t, &
    7158             :                dpattern=m_quench_t, &
    7159             :                map=domain_map, &
    7160             :                node_of_domain=cpu_of_domain, &
    7161             :                preconditioner=domain_prec_out(:), &
    7162             :                use_trimmer=.FALSE., &
    7163             :                bad_modes_projector_down=bad_modes_projector_down_out(:), &
    7164             :                eps_zero_eigenvalues=neg_thr, &
    7165             :                my_action=precond_domain_projector, &
    7166             :                skip_inversion=skip_inversion &
    7167          18 :                )
    7168             :          ELSE
    7169             :             CALL construct_domain_preconditioner( &
    7170             :                matrix_main=m_tmp_nn_1, &
    7171             :                subm_s_inv=domain_s_inv(:), &
    7172             :                subm_r_down=domain_r_down(:), &
    7173             :                matrix_trimmer=m_quench_t, &
    7174             :                dpattern=m_quench_t, &
    7175             :                map=domain_map, &
    7176             :                node_of_domain=cpu_of_domain, &
    7177             :                preconditioner=domain_prec_out(:), &
    7178             :                use_trimmer=.FALSE., &
    7179             :                !eps_zero_eigenvalues=neg_thr,&
    7180             :                my_action=precond_domain_projector, &
    7181             :                skip_inversion=skip_inversion &
    7182         342 :                )
    7183             :          END IF
    7184             : 
    7185             :       END IF ! special_case
    7186             : 
    7187             :       ! invert using cholesky (works with S matrix, will not work with S-SRS matrix)
    7188             :       !!!CALL cp_dbcsr_cholesky_decompose(prec_vv,&
    7189             :       !!!        para_env=almo_scf_env%para_env,&
    7190             :       !!!        blacs_env=almo_scf_env%blacs_env)
    7191             :       !!!CALL cp_dbcsr_cholesky_invert(prec_vv,&
    7192             :       !!!        para_env=almo_scf_env%para_env,&
    7193             :       !!!        blacs_env=almo_scf_env%blacs_env,&
    7194             :       !!!        upper_to_full=.TRUE.)
    7195             :       !!!CALL dbcsr_filter(prec_vv,&
    7196             :       !!!        almo_scf_env%eps_filter)
    7197             :       !!!
    7198             : 
    7199             :       ! re-create the matrix because desymmetrize is buggy -
    7200             :       ! it will create multiple copies of blocks
    7201             :       !!!DESYM!CALL dbcsr_create(prec_vv,&
    7202             :       !!!DESYM!        template=almo_scf_env%matrix_s(1),&
    7203             :       !!!DESYM!        matrix_type=dbcsr_type_no_symmetry)
    7204             :       !!!DESYM!CALL dbcsr_desymmetrize(almo_scf_env%matrix_s(1),&
    7205             :       !!!DESYM!        prec_vv)
    7206             :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    7207             :       !        almo_scf_env%matrix_s(1),&
    7208             :       !        matrix_t_out(ispin),&
    7209             :       !        0.0_dp,m_tmp_no_1,&
    7210             :       !        filter_eps=almo_scf_env%eps_filter)
    7211             :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    7212             :       !        m_tmp_no_1,&
    7213             :       !        almo_scf_env%matrix_sigma_inv(ispin),&
    7214             :       !        0.0_dp,m_tmp_no_3,&
    7215             :       !        filter_eps=almo_scf_env%eps_filter)
    7216             :       !CALL dbcsr_multiply("N","T",-1.0_dp,&
    7217             :       !        m_tmp_no_3,&
    7218             :       !        m_tmp_no_1,&
    7219             :       !        1.0_dp,prec_vv,&
    7220             :       !        filter_eps=almo_scf_env%eps_filter)
    7221             :       !CALL dbcsr_add_on_diag(prec_vv,&
    7222             :       !        prec_sf_mixing_s)
    7223             : 
    7224             :       !CALL dbcsr_create(prec_oo,&
    7225             :       !        template=almo_scf_env%matrix_sigma(ispin),&
    7226             :       !        matrix_type=dbcsr_type_no_symmetry)
    7227             :       !CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma(ispin),&
    7228             :       !        matrix_type=dbcsr_type_no_symmetry)
    7229             :       !CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma(ispin),&
    7230             :       !        prec_oo)
    7231             :       !CALL dbcsr_filter(prec_oo,&
    7232             :       !        almo_scf_env%eps_filter)
    7233             : 
    7234             :       !! invert using cholesky
    7235             :       !CALL dbcsr_create(prec_oo_inv,&
    7236             :       !        template=prec_oo,&
    7237             :       !        matrix_type=dbcsr_type_no_symmetry)
    7238             :       !CALL dbcsr_desymmetrize(prec_oo,&
    7239             :       !        prec_oo_inv)
    7240             :       !CALL cp_dbcsr_cholesky_decompose(prec_oo_inv,&
    7241             :       !        para_env=almo_scf_env%para_env,&
    7242             :       !        blacs_env=almo_scf_env%blacs_env)
    7243             :       !CALL cp_dbcsr_cholesky_invert(prec_oo_inv,&
    7244             :       !        para_env=almo_scf_env%para_env,&
    7245             :       !        blacs_env=almo_scf_env%blacs_env,&
    7246             :       !        upper_to_full=.TRUE.)
    7247             : 
    7248         500 :       CALL dbcsr_release(m_tmp_nn_1)
    7249         500 :       CALL dbcsr_release(m_tmp_no_3)
    7250             : 
    7251         500 :       CALL timestop(handle)
    7252             : 
    7253        1000 :    END SUBROUTINE compute_preconditioner
    7254             : 
    7255             : ! *****************************************************************************
    7256             : !> \brief Compute beta for conjugate gradient algorithms
    7257             : !> \param beta ...
    7258             : !> \param numer ...
    7259             : !> \param denom ...
    7260             : !> \param reset_conjugator ...
    7261             : !> \param conjugator ...
    7262             : !> \param grad ...
    7263             : !> \param prev_grad ...
    7264             : !> \param step ...
    7265             : !> \param prev_step ...
    7266             : !> \param prev_minus_prec_grad ...
    7267             : !> \par History
    7268             : !>       2015.04 created [Rustam Z Khaliullin]
    7269             : !> \author Rustam Z Khaliullin
    7270             : ! **************************************************************************************************
    7271        1016 :    SUBROUTINE compute_cg_beta(beta, numer, denom, reset_conjugator, conjugator, &
    7272         508 :                               grad, prev_grad, step, prev_step, prev_minus_prec_grad)
    7273             : 
    7274             :       REAL(KIND=dp), INTENT(INOUT)                       :: beta
    7275             :       REAL(KIND=dp), INTENT(INOUT), OPTIONAL             :: numer, denom
    7276             :       LOGICAL, INTENT(INOUT)                             :: reset_conjugator
    7277             :       INTEGER, INTENT(IN)                                :: conjugator
    7278             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: grad, prev_grad, step, prev_step
    7279             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT), &
    7280             :          OPTIONAL                                        :: prev_minus_prec_grad
    7281             : 
    7282             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_cg_beta'
    7283             : 
    7284             :       INTEGER                                            :: handle, i, nsize, unit_nr
    7285             :       REAL(KIND=dp)                                      :: den, kappa, my_denom, my_numer, &
    7286             :                                                             my_numer2, my_numer3, num, num2, num3, &
    7287             :                                                             tau
    7288             :       TYPE(cp_logger_type), POINTER                      :: logger
    7289             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1
    7290             : 
    7291         508 :       CALL timeset(routineN, handle)
    7292             : 
    7293             :       ! get a useful output_unit
    7294         508 :       logger => cp_get_default_logger()
    7295         508 :       IF (logger%para_env%is_source()) THEN
    7296         254 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    7297             :       ELSE
    7298             :          unit_nr = -1
    7299             :       END IF
    7300             : 
    7301         508 :       IF (.NOT. PRESENT(prev_minus_prec_grad)) THEN
    7302             :          IF (conjugator .EQ. cg_fletcher_reeves .OR. &
    7303          82 :              conjugator .EQ. cg_polak_ribiere .OR. &
    7304             :              conjugator .EQ. cg_hager_zhang) THEN
    7305           0 :             CPABORT("conjugator needs more input")
    7306             :          END IF
    7307             :       END IF
    7308             : 
    7309             :       ! return num denom so beta can be calculated spin-by-spin
    7310         508 :       IF (PRESENT(numer) .OR. PRESENT(denom)) THEN
    7311             :          IF (conjugator .EQ. cg_hestenes_stiefel .OR. &
    7312           0 :              conjugator .EQ. cg_dai_yuan .OR. &
    7313             :              conjugator .EQ. cg_hager_zhang) THEN
    7314           0 :             CPABORT("cannot return numer/denom")
    7315             :          END IF
    7316             :       END IF
    7317             : 
    7318         508 :       nsize = SIZE(grad)
    7319             : 
    7320         508 :       my_numer = 0.0_dp
    7321         508 :       my_numer2 = 0.0_dp
    7322         508 :       my_numer3 = 0.0_dp
    7323         508 :       my_denom = 0.0_dp
    7324             : 
    7325        1016 :       DO i = 1, nsize
    7326             : 
    7327             :          CALL dbcsr_create(m_tmp_no_1, &
    7328             :                            template=grad(i), &
    7329         508 :                            matrix_type=dbcsr_type_no_symmetry)
    7330             : 
    7331         570 :          SELECT CASE (conjugator)
    7332             :          CASE (cg_hestenes_stiefel)
    7333          62 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7334             :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), &
    7335          62 :                            1.0_dp, -1.0_dp)
    7336          62 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7337          62 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7338             :          CASE (cg_fletcher_reeves)
    7339          94 :             CALL dbcsr_dot(grad(i), step(i), num)
    7340          94 :             CALL dbcsr_dot(prev_grad(i), prev_minus_prec_grad(i), den)
    7341             :          CASE (cg_polak_ribiere)
    7342          30 :             CALL dbcsr_dot(prev_grad(i), prev_minus_prec_grad(i), den)
    7343          30 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7344          30 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7345          30 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7346             :          CASE (cg_fletcher)
    7347         172 :             CALL dbcsr_dot(grad(i), step(i), num)
    7348         172 :             CALL dbcsr_dot(prev_grad(i), prev_step(i), den)
    7349             :          CASE (cg_liu_storey)
    7350          20 :             CALL dbcsr_dot(prev_grad(i), prev_step(i), den)
    7351          20 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7352          20 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7353          20 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7354             :          CASE (cg_dai_yuan)
    7355          34 :             CALL dbcsr_dot(grad(i), step(i), num)
    7356          34 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7357          34 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7358          34 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7359             :          CASE (cg_hager_zhang)
    7360          72 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7361          72 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7362          72 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7363          72 :             CALL dbcsr_dot(m_tmp_no_1, prev_minus_prec_grad(i), num)
    7364          72 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num2)
    7365          72 :             CALL dbcsr_dot(prev_step(i), grad(i), num3)
    7366          72 :             my_numer2 = my_numer2 + num2
    7367          72 :             my_numer3 = my_numer3 + num3
    7368             :          CASE (cg_zero)
    7369          24 :             num = 0.0_dp
    7370          24 :             den = 1.0_dp
    7371             :          CASE DEFAULT
    7372         508 :             CPABORT("illegal conjugator")
    7373             :          END SELECT
    7374         508 :          my_numer = my_numer + num
    7375         508 :          my_denom = my_denom + den
    7376             : 
    7377        1016 :          CALL dbcsr_release(m_tmp_no_1)
    7378             : 
    7379             :       END DO ! i - nsize
    7380             : 
    7381        1016 :       DO i = 1, nsize
    7382             : 
    7383         508 :          SELECT CASE (conjugator)
    7384             :          CASE (cg_hestenes_stiefel, cg_dai_yuan)
    7385          96 :             beta = -1.0_dp*my_numer/my_denom
    7386             :          CASE (cg_fletcher_reeves, cg_polak_ribiere, cg_fletcher, cg_liu_storey)
    7387         316 :             beta = my_numer/my_denom
    7388             :          CASE (cg_hager_zhang)
    7389          72 :             kappa = -2.0_dp*my_numer/my_denom
    7390          72 :             tau = -1.0_dp*my_numer2/my_denom
    7391          72 :             beta = tau - kappa*my_numer3/my_denom
    7392             :          CASE (cg_zero)
    7393          24 :             beta = 0.0_dp
    7394             :          CASE DEFAULT
    7395         508 :             CPABORT("illegal conjugator")
    7396             :          END SELECT
    7397             : 
    7398             :       END DO ! i - nsize
    7399             : 
    7400         508 :       IF (beta .LT. 0.0_dp) THEN
    7401           0 :          IF (unit_nr > 0) THEN
    7402           0 :             WRITE (unit_nr, *) " Resetting conjugator because beta is negative: ", beta
    7403             :          END IF
    7404           0 :          reset_conjugator = .TRUE.
    7405             :       END IF
    7406             : 
    7407         508 :       IF (PRESENT(numer)) THEN
    7408           0 :          numer = my_numer
    7409             :       END IF
    7410         508 :       IF (PRESENT(denom)) THEN
    7411           0 :          denom = my_denom
    7412             :       END IF
    7413             : 
    7414         508 :       CALL timestop(handle)
    7415             : 
    7416         508 :    END SUBROUTINE compute_cg_beta
    7417             : 
    7418             : ! *****************************************************************************
    7419             : !> \brief computes the step matrix from the gradient and Hessian using
    7420             : !>         the Newton-Raphson method
    7421             : !> \param optimizer ...
    7422             : !> \param m_grad ...
    7423             : !> \param m_delta ...
    7424             : !> \param m_s ...
    7425             : !> \param m_ks ...
    7426             : !> \param m_siginv ...
    7427             : !> \param m_quench_t ...
    7428             : !> \param m_FTsiginv ...
    7429             : !> \param m_siginvTFTsiginv ...
    7430             : !> \param m_ST ...
    7431             : !> \param m_t ...
    7432             : !> \param m_sig_sqrti_ii ...
    7433             : !> \param domain_s_inv ...
    7434             : !> \param domain_r_down ...
    7435             : !> \param domain_map ...
    7436             : !> \param cpu_of_domain ...
    7437             : !> \param nocc_of_domain ...
    7438             : !> \param para_env ...
    7439             : !> \param blacs_env ...
    7440             : !> \param eps_filter ...
    7441             : !> \param optimize_theta ...
    7442             : !> \param penalty_occ_vol ...
    7443             : !> \param normalize_orbitals ...
    7444             : !> \param penalty_occ_vol_prefactor ...
    7445             : !> \param penalty_occ_vol_pf2 ...
    7446             : !> \param special_case ...
    7447             : !> \par History
    7448             : !>       2015.04 created [Rustam Z. Khaliullin]
    7449             : !> \author Rustam Z. Khaliullin
    7450             : ! **************************************************************************************************
    7451           0 :    SUBROUTINE newton_grad_to_step(optimizer, m_grad, m_delta, m_s, m_ks, &
    7452           0 :                                   m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_t, &
    7453           0 :                                   m_sig_sqrti_ii, domain_s_inv, domain_r_down, domain_map, cpu_of_domain, &
    7454           0 :                                   nocc_of_domain, para_env, blacs_env, eps_filter, optimize_theta, &
    7455           0 :                                   penalty_occ_vol, normalize_orbitals, penalty_occ_vol_prefactor, &
    7456           0 :                                   penalty_occ_vol_pf2, special_case)
    7457             : 
    7458             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
    7459             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_grad
    7460             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_delta, m_s, m_ks, m_siginv, m_quench_t
    7461             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_FTsiginv, m_siginvTFTsiginv, m_ST, &
    7462             :                                                             m_t, m_sig_sqrti_ii
    7463             :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
    7464             :          INTENT(IN)                                      :: domain_s_inv, domain_r_down
    7465             :       TYPE(domain_map_type), DIMENSION(:), INTENT(IN)    :: domain_map
    7466             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    7467             :       INTEGER, DIMENSION(:, :), INTENT(IN)               :: nocc_of_domain
    7468             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    7469             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    7470             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    7471             :       LOGICAL, INTENT(IN)                                :: optimize_theta, penalty_occ_vol, &
    7472             :                                                             normalize_orbitals
    7473             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: penalty_occ_vol_prefactor, &
    7474             :                                                             penalty_occ_vol_pf2
    7475             :       INTEGER, INTENT(IN)                                :: special_case
    7476             : 
    7477             :       CHARACTER(len=*), PARAMETER :: routineN = 'newton_grad_to_step'
    7478             : 
    7479             :       CHARACTER(LEN=20)                                  :: iter_type
    7480             :       INTEGER                                            :: handle, ispin, iteration, max_iter, &
    7481             :                                                             ndomains, nspins, outer_iteration, &
    7482             :                                                             outer_max_iter, unit_nr
    7483             :       LOGICAL :: converged, do_exact_inversion, outer_prepare_to_exit, prepare_to_exit, &
    7484             :          reset_conjugator, use_preconditioner
    7485             :       REAL(KIND=dp)                                      :: alpha, beta, denom, denom_ispin, &
    7486             :                                                             eps_error_target, numer, numer_ispin, &
    7487             :                                                             residue_norm, spin_factor, t1, t2
    7488           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: residue_max_norm
    7489             :       TYPE(cp_logger_type), POINTER                      :: logger
    7490             :       TYPE(dbcsr_type)                                   :: m_tmp_oo_1, m_tmp_oo_2
    7491           0 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_f_vo, m_f_vv, m_Hstep, m_prec, &
    7492           0 :                                                             m_residue, m_residue_prev, m_s_vv, &
    7493           0 :                                                             m_step, m_STsiginv, m_zet, m_zet_prev
    7494             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
    7495           0 :          DIMENSION(:, :)                                 :: domain_prec
    7496             : 
    7497           0 :       CALL timeset(routineN, handle)
    7498             : 
    7499             :       ! get a useful output_unit
    7500           0 :       logger => cp_get_default_logger()
    7501           0 :       IF (logger%para_env%is_source()) THEN
    7502           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    7503             :       ELSE
    7504             :          unit_nr = -1
    7505             :       END IF
    7506             : 
    7507             :       !!! Currently for non-theta only
    7508           0 :       IF (optimize_theta) THEN
    7509           0 :          CPABORT("theta is NYI")
    7510             :       END IF
    7511             : 
    7512             :       ! set optimizer options
    7513           0 :       use_preconditioner = (optimizer%preconditioner .NE. xalmo_prec_zero)
    7514           0 :       outer_max_iter = optimizer%max_iter_outer_loop
    7515           0 :       max_iter = optimizer%max_iter
    7516           0 :       eps_error_target = optimizer%eps_error
    7517             : 
    7518             :       ! set key dimensions
    7519           0 :       nspins = SIZE(m_ks)
    7520           0 :       ndomains = SIZE(domain_s_inv, 1)
    7521             : 
    7522           0 :       IF (nspins == 1) THEN
    7523           0 :          spin_factor = 2.0_dp
    7524             :       ELSE
    7525           0 :          spin_factor = 1.0_dp
    7526             :       END IF
    7527             : 
    7528           0 :       ALLOCATE (domain_prec(ndomains, nspins))
    7529           0 :       CALL init_submatrices(domain_prec)
    7530             : 
    7531             :       ! allocate matrices
    7532           0 :       ALLOCATE (m_residue(nspins))
    7533           0 :       ALLOCATE (m_residue_prev(nspins))
    7534           0 :       ALLOCATE (m_step(nspins))
    7535           0 :       ALLOCATE (m_zet(nspins))
    7536           0 :       ALLOCATE (m_zet_prev(nspins))
    7537           0 :       ALLOCATE (m_Hstep(nspins))
    7538           0 :       ALLOCATE (m_prec(nspins))
    7539           0 :       ALLOCATE (m_s_vv(nspins))
    7540           0 :       ALLOCATE (m_f_vv(nspins))
    7541           0 :       ALLOCATE (m_f_vo(nspins))
    7542           0 :       ALLOCATE (m_STsiginv(nspins))
    7543             : 
    7544           0 :       ALLOCATE (residue_max_norm(nspins))
    7545             : 
    7546             :       ! initiate objects before iterations
    7547           0 :       DO ispin = 1, nspins
    7548             : 
    7549             :          ! init matrices
    7550             :          CALL dbcsr_create(m_residue(ispin), &
    7551             :                            template=m_quench_t(ispin), &
    7552           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7553             :          CALL dbcsr_create(m_residue_prev(ispin), &
    7554             :                            template=m_quench_t(ispin), &
    7555           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7556             :          CALL dbcsr_create(m_step(ispin), &
    7557             :                            template=m_quench_t(ispin), &
    7558           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7559             :          CALL dbcsr_create(m_zet_prev(ispin), &
    7560             :                            template=m_quench_t(ispin), &
    7561           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7562             :          CALL dbcsr_create(m_zet(ispin), &
    7563             :                            template=m_quench_t(ispin), &
    7564           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7565             :          CALL dbcsr_create(m_Hstep(ispin), &
    7566             :                            template=m_quench_t(ispin), &
    7567           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7568             :          CALL dbcsr_create(m_f_vo(ispin), &
    7569             :                            template=m_quench_t(ispin), &
    7570           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7571             :          CALL dbcsr_create(m_STsiginv(ispin), &
    7572             :                            template=m_quench_t(ispin), &
    7573           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7574             :          CALL dbcsr_create(m_f_vv(ispin), &
    7575             :                            template=m_ks(ispin), &
    7576           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7577             :          CALL dbcsr_create(m_s_vv(ispin), &
    7578             :                            template=m_s(1), &
    7579           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7580             :          CALL dbcsr_create(m_prec(ispin), &
    7581             :                            template=m_ks(ispin), &
    7582           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7583             : 
    7584             :          ! compute the full "gradient" - it is necessary to
    7585             :          ! evaluate Hessian.X
    7586           0 :          CALL dbcsr_copy(m_f_vo(ispin), m_FTsiginv(ispin))
    7587             :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    7588             :                              m_ST(ispin), &
    7589             :                              m_siginvTFTsiginv(ispin), &
    7590             :                              1.0_dp, m_f_vo(ispin), &
    7591           0 :                              filter_eps=eps_filter)
    7592             : 
    7593             : ! RZK-warning
    7594             : ! compute preconditioner even if we do not use it
    7595             : ! this is for debugging because compute_preconditioner includes
    7596             : ! computing F_vv and S_vv necessary for
    7597             : !       IF ( use_preconditioner ) THEN
    7598             : 
    7599             : ! domain_s_inv and domain_r_down are never used with assume_t0_q0x=FALSE
    7600             :          CALL compute_preconditioner( &
    7601             :             domain_prec_out=domain_prec(:, ispin), &
    7602             :             m_prec_out=m_prec(ispin), &
    7603             :             m_ks=m_ks(ispin), &
    7604             :             m_s=m_s(1), &
    7605             :             m_siginv=m_siginv(ispin), &
    7606             :             m_quench_t=m_quench_t(ispin), &
    7607             :             m_FTsiginv=m_FTsiginv(ispin), &
    7608             :             m_siginvTFTsiginv=m_siginvTFTsiginv(ispin), &
    7609             :             m_ST=m_ST(ispin), &
    7610             :             m_STsiginv_out=m_STsiginv(ispin), &
    7611             :             m_s_vv_out=m_s_vv(ispin), &
    7612             :             m_f_vv_out=m_f_vv(ispin), &
    7613             :             para_env=para_env, &
    7614             :             blacs_env=blacs_env, &
    7615             :             nocc_of_domain=nocc_of_domain(:, ispin), &
    7616             :             domain_s_inv=domain_s_inv(:, ispin), &
    7617             :             domain_r_down=domain_r_down(:, ispin), &
    7618             :             cpu_of_domain=cpu_of_domain(:), &
    7619             :             domain_map=domain_map(ispin), &
    7620             :             assume_t0_q0x=.FALSE., &
    7621             :             penalty_occ_vol=penalty_occ_vol, &
    7622             :             penalty_occ_vol_prefactor=penalty_occ_vol_prefactor(ispin), &
    7623             :             eps_filter=eps_filter, &
    7624             :             neg_thr=0.5_dp, &
    7625             :             spin_factor=spin_factor, &
    7626             :             special_case=special_case, &
    7627             :             skip_inversion=.FALSE. &
    7628           0 :             )
    7629             : 
    7630             : !       ENDIF ! use_preconditioner
    7631             : 
    7632             :          ! initial guess
    7633           0 :          CALL dbcsr_copy(m_delta(ispin), m_quench_t(ispin))
    7634             :          ! in order to use dbcsr_set matrix blocks must exist
    7635           0 :          CALL dbcsr_set(m_delta(ispin), 0.0_dp)
    7636           0 :          CALL dbcsr_copy(m_residue(ispin), m_grad(ispin))
    7637           0 :          CALL dbcsr_scale(m_residue(ispin), -1.0_dp)
    7638             : 
    7639           0 :          do_exact_inversion = .FALSE.
    7640             :          IF (do_exact_inversion) THEN
    7641             : 
    7642             :             ! copy grad to m_step temporarily
    7643             :             ! use m_step as input to the inversion routine
    7644             :             CALL dbcsr_copy(m_step(ispin), m_grad(ispin))
    7645             : 
    7646             :             ! expensive "exact" inversion of the "nearly-exact" Hessian
    7647             :             ! hopefully returns Z=-H^(-1).G
    7648             :             CALL hessian_diag_apply( &
    7649             :                matrix_grad=m_step(ispin), &
    7650             :                matrix_step=m_zet(ispin), &
    7651             :                matrix_S_ao=m_s_vv(ispin), &
    7652             :                matrix_F_ao=m_f_vv(ispin), &
    7653             :                !matrix_S_ao=m_s(ispin),&
    7654             :                !matrix_F_ao=m_ks(ispin),&
    7655             :                matrix_S_mo=m_siginv(ispin), &
    7656             :                matrix_F_mo=m_siginvTFTsiginv(ispin), &
    7657             :                matrix_S_vo=m_STsiginv(ispin), &
    7658             :                matrix_F_vo=m_f_vo(ispin), &
    7659             :                quench_t=m_quench_t(ispin), &
    7660             :                spin_factor=spin_factor, &
    7661             :                eps_zero=eps_filter*10.0_dp, &
    7662             :                penalty_occ_vol=penalty_occ_vol, &
    7663             :                penalty_occ_vol_prefactor=penalty_occ_vol_prefactor(ispin), &
    7664             :                penalty_occ_vol_pf2=penalty_occ_vol_pf2(ispin), &
    7665             :                m_s=m_s(1), &
    7666             :                para_env=para_env, &
    7667             :                blacs_env=blacs_env &
    7668             :                )
    7669             :             ! correct solution by the spin factor
    7670             :             !CALL dbcsr_scale(m_zet(ispin),1.0_dp/(2.0_dp*spin_factor))
    7671             : 
    7672             :          ELSE ! use PCG to solve H.D=-G
    7673             : 
    7674           0 :             IF (use_preconditioner) THEN
    7675             : 
    7676           0 :                IF (special_case .EQ. xalmo_case_block_diag .OR. &
    7677             :                    special_case .EQ. xalmo_case_fully_deloc) THEN
    7678             : 
    7679             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7680             :                                       m_prec(ispin), &
    7681             :                                       m_residue(ispin), &
    7682             :                                       0.0_dp, m_zet(ispin), &
    7683           0 :                                       filter_eps=eps_filter)
    7684             : 
    7685             :                ELSE
    7686             : 
    7687             :                   CALL apply_domain_operators( &
    7688             :                      matrix_in=m_residue(ispin), &
    7689             :                      matrix_out=m_zet(ispin), &
    7690             :                      operator1=domain_prec(:, ispin), &
    7691             :                      dpattern=m_quench_t(ispin), &
    7692             :                      map=domain_map(ispin), &
    7693             :                      node_of_domain=cpu_of_domain(:), &
    7694             :                      my_action=0, &
    7695             :                      filter_eps=eps_filter &
    7696             :                      !matrix_trimmer=,&
    7697             :                      !use_trimmer=.FALSE.,&
    7698           0 :                      )
    7699             : 
    7700             :                END IF ! special_case
    7701             : 
    7702             :             ELSE ! do not use preconditioner
    7703             : 
    7704           0 :                CALL dbcsr_copy(m_zet(ispin), m_residue(ispin))
    7705             : 
    7706             :             END IF ! use_preconditioner
    7707             : 
    7708             :          END IF ! do_exact_inversion
    7709             : 
    7710           0 :          CALL dbcsr_copy(m_step(ispin), m_zet(ispin))
    7711             : 
    7712             :       END DO !ispin
    7713             : 
    7714             :       ! start the outer SCF loop
    7715           0 :       outer_prepare_to_exit = .FALSE.
    7716           0 :       outer_iteration = 0
    7717           0 :       residue_norm = 0.0_dp
    7718             : 
    7719             :       DO
    7720             : 
    7721             :          ! start the inner SCF loop
    7722           0 :          prepare_to_exit = .FALSE.
    7723           0 :          converged = .FALSE.
    7724           0 :          iteration = 0
    7725           0 :          t1 = m_walltime()
    7726             : 
    7727             :          DO
    7728             : 
    7729             :             ! apply hessian to the step matrix
    7730             :             CALL apply_hessian( &
    7731             :                m_x_in=m_step, &
    7732             :                m_x_out=m_Hstep, &
    7733             :                m_ks=m_ks, &
    7734             :                m_s=m_s, &
    7735             :                m_siginv=m_siginv, &
    7736             :                m_quench_t=m_quench_t, &
    7737             :                m_FTsiginv=m_FTsiginv, &
    7738             :                m_siginvTFTsiginv=m_siginvTFTsiginv, &
    7739             :                m_ST=m_ST, &
    7740             :                m_STsiginv=m_STsiginv, &
    7741             :                m_s_vv=m_s_vv, &
    7742             :                m_ks_vv=m_f_vv, &
    7743             :                !m_s_vv=m_s,&
    7744             :                !m_ks_vv=m_ks,&
    7745             :                m_g_full=m_f_vo, &
    7746             :                m_t=m_t, &
    7747             :                m_sig_sqrti_ii=m_sig_sqrti_ii, &
    7748             :                penalty_occ_vol=penalty_occ_vol, &
    7749             :                normalize_orbitals=normalize_orbitals, &
    7750             :                penalty_occ_vol_prefactor=penalty_occ_vol_prefactor, &
    7751             :                eps_filter=eps_filter, &
    7752             :                path_num=hessian_path_reuse &
    7753           0 :                )
    7754             : 
    7755             :             ! alpha is computed outside the spin loop
    7756           0 :             numer = 0.0_dp
    7757           0 :             denom = 0.0_dp
    7758           0 :             DO ispin = 1, nspins
    7759             : 
    7760           0 :                CALL dbcsr_dot(m_residue(ispin), m_zet(ispin), numer_ispin)
    7761           0 :                CALL dbcsr_dot(m_step(ispin), m_Hstep(ispin), denom_ispin)
    7762             : 
    7763           0 :                numer = numer + numer_ispin
    7764           0 :                denom = denom + denom_ispin
    7765             : 
    7766             :             END DO !ispin
    7767             : 
    7768           0 :             alpha = numer/denom
    7769             : 
    7770           0 :             DO ispin = 1, nspins
    7771             : 
    7772             :                ! update the variable
    7773           0 :                CALL dbcsr_add(m_delta(ispin), m_step(ispin), 1.0_dp, alpha)
    7774           0 :                CALL dbcsr_copy(m_residue_prev(ispin), m_residue(ispin))
    7775             :                CALL dbcsr_add(m_residue(ispin), m_Hstep(ispin), &
    7776           0 :                               1.0_dp, -1.0_dp*alpha)
    7777             :                CALL dbcsr_norm(m_residue(ispin), dbcsr_norm_maxabsnorm, &
    7778           0 :                                norm_scalar=residue_max_norm(ispin))
    7779             : 
    7780             :             END DO ! ispin
    7781             : 
    7782             :             ! check convergence and other exit criteria
    7783           0 :             residue_norm = MAXVAL(residue_max_norm)
    7784           0 :             converged = (residue_norm .LT. eps_error_target)
    7785           0 :             IF (converged .OR. (iteration .GE. max_iter)) THEN
    7786             :                prepare_to_exit = .TRUE.
    7787             :             END IF
    7788             : 
    7789           0 :             IF (.NOT. prepare_to_exit) THEN
    7790             : 
    7791           0 :                DO ispin = 1, nspins
    7792             : 
    7793             :                   ! save current z before the update
    7794           0 :                   CALL dbcsr_copy(m_zet_prev(ispin), m_zet(ispin))
    7795             : 
    7796             :                   ! compute the new step (apply preconditioner if available)
    7797           0 :                   IF (use_preconditioner) THEN
    7798             : 
    7799             :                      !IF (unit_nr>0) THEN
    7800             :                      !   WRITE(unit_nr,*) "....applying preconditioner...."
    7801             :                      !ENDIF
    7802             : 
    7803           0 :                      IF (special_case .EQ. xalmo_case_block_diag .OR. &
    7804             :                          special_case .EQ. xalmo_case_fully_deloc) THEN
    7805             : 
    7806             :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7807             :                                             m_prec(ispin), &
    7808             :                                             m_residue(ispin), &
    7809             :                                             0.0_dp, m_zet(ispin), &
    7810           0 :                                             filter_eps=eps_filter)
    7811             : 
    7812             :                      ELSE
    7813             : 
    7814             :                         CALL apply_domain_operators( &
    7815             :                            matrix_in=m_residue(ispin), &
    7816             :                            matrix_out=m_zet(ispin), &
    7817             :                            operator1=domain_prec(:, ispin), &
    7818             :                            dpattern=m_quench_t(ispin), &
    7819             :                            map=domain_map(ispin), &
    7820             :                            node_of_domain=cpu_of_domain(:), &
    7821             :                            my_action=0, &
    7822             :                            filter_eps=eps_filter &
    7823             :                            !matrix_trimmer=,&
    7824             :                            !use_trimmer=.FALSE.,&
    7825           0 :                            )
    7826             : 
    7827             :                      END IF ! special case
    7828             : 
    7829             :                   ELSE
    7830             : 
    7831           0 :                      CALL dbcsr_copy(m_zet(ispin), m_residue(ispin))
    7832             : 
    7833             :                   END IF
    7834             : 
    7835             :                END DO !ispin
    7836             : 
    7837             :                ! compute the conjugation coefficient - beta
    7838             :                CALL compute_cg_beta( &
    7839             :                   beta=beta, &
    7840             :                   reset_conjugator=reset_conjugator, &
    7841             :                   conjugator=cg_fletcher, &
    7842             :                   grad=m_residue, &
    7843             :                   prev_grad=m_residue_prev, &
    7844             :                   step=m_zet, &
    7845           0 :                   prev_step=m_zet_prev)
    7846             : 
    7847           0 :                DO ispin = 1, nspins
    7848             : 
    7849             :                   ! conjugate the step direction
    7850           0 :                   CALL dbcsr_add(m_step(ispin), m_zet(ispin), beta, 1.0_dp)
    7851             : 
    7852             :                END DO !ispin
    7853             : 
    7854             :             END IF ! not.prepare_to_exit
    7855             : 
    7856           0 :             t2 = m_walltime()
    7857           0 :             IF (unit_nr > 0) THEN
    7858             :                !iter_type=TRIM("ALMO SCF "//iter_type)
    7859           0 :                iter_type = TRIM("NR STEP")
    7860             :                WRITE (unit_nr, '(T6,A9,I6,F14.5,F14.5,F15.10,F9.2)') &
    7861           0 :                   iter_type, iteration, &
    7862           0 :                   alpha, beta, residue_norm, &
    7863           0 :                   t2 - t1
    7864             :             END IF
    7865           0 :             t1 = m_walltime()
    7866             : 
    7867           0 :             iteration = iteration + 1
    7868           0 :             IF (prepare_to_exit) EXIT
    7869             : 
    7870             :          END DO ! inner loop
    7871             : 
    7872           0 :          IF (converged .OR. (outer_iteration .GE. outer_max_iter)) THEN
    7873             :             outer_prepare_to_exit = .TRUE.
    7874             :          END IF
    7875             : 
    7876           0 :          outer_iteration = outer_iteration + 1
    7877           0 :          IF (outer_prepare_to_exit) EXIT
    7878             : 
    7879             :       END DO ! outer loop
    7880             : 
    7881             : ! is not necessary if penalty_occ_vol_pf2=0.0
    7882             : #if 0
    7883             : 
    7884             :       IF (penalty_occ_vol) THEN
    7885             : 
    7886             :          DO ispin = 1, nspins
    7887             : 
    7888             :             CALL dbcsr_copy(m_zet(ispin), m_grad(ispin))
    7889             :             CALL dbcsr_dot(m_delta(ispin), m_zet(ispin), alpha)
    7890             :             WRITE (unit_nr, *) "trace(grad.delta): ", alpha
    7891             :             alpha = -1.0_dp/(penalty_occ_vol_pf2(ispin)*alpha - 1.0_dp)
    7892             :             WRITE (unit_nr, *) "correction alpha: ", alpha
    7893             :             CALL dbcsr_scale(m_delta(ispin), alpha)
    7894             : 
    7895             :          END DO
    7896             : 
    7897             :       END IF
    7898             : 
    7899             : #endif
    7900             : 
    7901           0 :       DO ispin = 1, nspins
    7902             : 
    7903             :          ! check whether the step lies entirely in R or Q
    7904             :          CALL dbcsr_create(m_tmp_oo_1, &
    7905             :                            template=m_siginv(ispin), &
    7906           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7907             :          CALL dbcsr_create(m_tmp_oo_2, &
    7908             :                            template=m_siginv(ispin), &
    7909           0 :                            matrix_type=dbcsr_type_no_symmetry)
    7910             :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    7911             :                              m_ST(ispin), &
    7912             :                              m_delta(ispin), &
    7913             :                              0.0_dp, m_tmp_oo_1, &
    7914           0 :                              filter_eps=eps_filter)
    7915             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7916             :                              m_siginv(ispin), &
    7917             :                              m_tmp_oo_1, &
    7918             :                              0.0_dp, m_tmp_oo_2, &
    7919           0 :                              filter_eps=eps_filter)
    7920           0 :          CALL dbcsr_copy(m_zet(ispin), m_quench_t(ispin))
    7921             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7922             :                              m_t(ispin), &
    7923             :                              m_tmp_oo_2, &
    7924             :                              0.0_dp, m_zet(ispin), &
    7925           0 :                              retain_sparsity=.TRUE.)
    7926             :          CALL dbcsr_norm(m_zet(ispin), dbcsr_norm_maxabsnorm, &
    7927           0 :                          norm_scalar=alpha)
    7928           0 :          WRITE (unit_nr, "(A50,2F20.10)") "Occupied-space projection of the step", alpha
    7929           0 :          CALL dbcsr_add(m_zet(ispin), m_delta(ispin), -1.0_dp, 1.0_dp)
    7930             :          CALL dbcsr_norm(m_zet(ispin), dbcsr_norm_maxabsnorm, &
    7931           0 :                          norm_scalar=alpha)
    7932           0 :          WRITE (unit_nr, "(A50,2F20.10)") "Virtual-space projection of the step", alpha
    7933             :          CALL dbcsr_norm(m_delta(ispin), dbcsr_norm_maxabsnorm, &
    7934           0 :                          norm_scalar=alpha)
    7935           0 :          WRITE (unit_nr, "(A50,2F20.10)") "Full step", alpha
    7936           0 :          CALL dbcsr_release(m_tmp_oo_1)
    7937           0 :          CALL dbcsr_release(m_tmp_oo_2)
    7938             : 
    7939             :       END DO
    7940             : 
    7941             :       ! clean up
    7942           0 :       DO ispin = 1, nspins
    7943           0 :          CALL release_submatrices(domain_prec(:, ispin))
    7944           0 :          CALL dbcsr_release(m_residue(ispin))
    7945           0 :          CALL dbcsr_release(m_residue_prev(ispin))
    7946           0 :          CALL dbcsr_release(m_step(ispin))
    7947           0 :          CALL dbcsr_release(m_zet(ispin))
    7948           0 :          CALL dbcsr_release(m_zet_prev(ispin))
    7949           0 :          CALL dbcsr_release(m_Hstep(ispin))
    7950           0 :          CALL dbcsr_release(m_f_vo(ispin))
    7951           0 :          CALL dbcsr_release(m_f_vv(ispin))
    7952           0 :          CALL dbcsr_release(m_s_vv(ispin))
    7953           0 :          CALL dbcsr_release(m_prec(ispin))
    7954           0 :          CALL dbcsr_release(m_STsiginv(ispin))
    7955             :       END DO !ispin
    7956           0 :       DEALLOCATE (domain_prec)
    7957           0 :       DEALLOCATE (m_residue)
    7958           0 :       DEALLOCATE (m_residue_prev)
    7959           0 :       DEALLOCATE (m_step)
    7960           0 :       DEALLOCATE (m_zet)
    7961           0 :       DEALLOCATE (m_zet_prev)
    7962           0 :       DEALLOCATE (m_prec)
    7963           0 :       DEALLOCATE (m_Hstep)
    7964           0 :       DEALLOCATE (m_s_vv)
    7965           0 :       DEALLOCATE (m_f_vv)
    7966           0 :       DEALLOCATE (m_f_vo)
    7967           0 :       DEALLOCATE (m_STsiginv)
    7968           0 :       DEALLOCATE (residue_max_norm)
    7969             : 
    7970           0 :       IF (.NOT. converged) THEN
    7971           0 :          CPABORT("Optimization not converged!")
    7972             :       END IF
    7973             : 
    7974             :       ! check that the step satisfies H.step=-grad
    7975             : 
    7976           0 :       CALL timestop(handle)
    7977             : 
    7978           0 :    END SUBROUTINE newton_grad_to_step
    7979             : 
    7980             : ! *****************************************************************************
    7981             : !> \brief Computes Hessian.X
    7982             : !> \param m_x_in ...
    7983             : !> \param m_x_out ...
    7984             : !> \param m_ks ...
    7985             : !> \param m_s ...
    7986             : !> \param m_siginv ...
    7987             : !> \param m_quench_t ...
    7988             : !> \param m_FTsiginv ...
    7989             : !> \param m_siginvTFTsiginv ...
    7990             : !> \param m_ST ...
    7991             : !> \param m_STsiginv ...
    7992             : !> \param m_s_vv ...
    7993             : !> \param m_ks_vv ...
    7994             : !> \param m_g_full ...
    7995             : !> \param m_t ...
    7996             : !> \param m_sig_sqrti_ii ...
    7997             : !> \param penalty_occ_vol ...
    7998             : !> \param normalize_orbitals ...
    7999             : !> \param penalty_occ_vol_prefactor ...
    8000             : !> \param eps_filter ...
    8001             : !> \param path_num ...
    8002             : !> \par History
    8003             : !>       2015.04 created [Rustam Z Khaliullin]
    8004             : !> \author Rustam Z Khaliullin
    8005             : ! **************************************************************************************************
    8006           0 :    SUBROUTINE apply_hessian(m_x_in, m_x_out, m_ks, m_s, m_siginv, &
    8007           0 :                             m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_STsiginv, m_s_vv, &
    8008           0 :                             m_ks_vv, m_g_full, m_t, m_sig_sqrti_ii, penalty_occ_vol, &
    8009           0 :                             normalize_orbitals, penalty_occ_vol_prefactor, eps_filter, path_num)
    8010             : 
    8011             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_x_in, m_x_out, m_ks, m_s
    8012             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_siginv, m_quench_t, m_FTsiginv, &
    8013             :                                                             m_siginvTFTsiginv, m_ST, m_STsiginv
    8014             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_s_vv, m_ks_vv, m_g_full
    8015             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_t, m_sig_sqrti_ii
    8016             :       LOGICAL, INTENT(IN)                                :: penalty_occ_vol, normalize_orbitals
    8017             :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: penalty_occ_vol_prefactor
    8018             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    8019             :       INTEGER, INTENT(IN)                                :: path_num
    8020             : 
    8021             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'apply_hessian'
    8022             : 
    8023             :       INTEGER                                            :: dim0, handle, ispin, nspins
    8024             :       REAL(KIND=dp)                                      :: penalty_prefactor_local, spin_factor
    8025           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal
    8026             :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_no_2, m_tmp_oo_1, &
    8027             :                                                             m_tmp_x_in
    8028             : 
    8029           0 :       CALL timeset(routineN, handle)
    8030             : 
    8031             :       !JHU: test and use for unused debug variables
    8032           0 :       IF (penalty_occ_vol) penalty_prefactor_local = 1._dp
    8033           0 :       CPASSERT(SIZE(m_STsiginv) >= 0)
    8034           0 :       CPASSERT(SIZE(m_siginvTFTsiginv) >= 0)
    8035           0 :       CPASSERT(SIZE(m_s) >= 0)
    8036           0 :       CPASSERT(SIZE(m_g_full) >= 0)
    8037           0 :       CPASSERT(SIZE(m_FTsiginv) >= 0)
    8038             :       MARK_USED(m_siginvTFTsiginv)
    8039             :       MARK_USED(m_STsiginv)
    8040             :       MARK_USED(m_FTsiginv)
    8041             :       MARK_USED(m_g_full)
    8042             :       MARK_USED(m_s)
    8043             : 
    8044           0 :       nspins = SIZE(m_ks)
    8045             : 
    8046           0 :       IF (nspins .EQ. 1) THEN
    8047             :          spin_factor = 2.0_dp
    8048             :       ELSE
    8049           0 :          spin_factor = 1.0_dp
    8050             :       END IF
    8051             : 
    8052           0 :       DO ispin = 1, nspins
    8053             : 
    8054           0 :          penalty_prefactor_local = penalty_occ_vol_prefactor(ispin)/(2.0_dp*spin_factor)
    8055             : 
    8056             :          CALL dbcsr_create(m_tmp_oo_1, &
    8057             :                            template=m_siginv(ispin), &
    8058           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8059             :          CALL dbcsr_create(m_tmp_no_1, &
    8060             :                            template=m_quench_t(ispin), &
    8061           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8062             :          CALL dbcsr_create(m_tmp_no_2, &
    8063             :                            template=m_quench_t(ispin), &
    8064           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8065             :          CALL dbcsr_create(m_tmp_x_in, &
    8066             :                            template=m_quench_t(ispin), &
    8067           0 :                            matrix_type=dbcsr_type_no_symmetry)
    8068             : 
    8069             :          ! transform the input X to take into account the normalization constraint
    8070           0 :          IF (normalize_orbitals) THEN
    8071             : 
    8072             :             ! H.D = ( (H.D) - ST.[tr(T).(H.D)]_ii ) . [sig_sqrti]_ii
    8073             : 
    8074             :             ! get [tr(T).HD]_ii
    8075           0 :             CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii(ispin))
    8076             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    8077             :                                 m_x_in(ispin), &
    8078             :                                 m_ST(ispin), &
    8079             :                                 0.0_dp, m_tmp_oo_1, &
    8080           0 :                                 retain_sparsity=.TRUE.)
    8081           0 :             CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), nfullrows_total=dim0)
    8082           0 :             ALLOCATE (tg_diagonal(dim0))
    8083           0 :             CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    8084           0 :             CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    8085           0 :             CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    8086           0 :             DEALLOCATE (tg_diagonal)
    8087             : 
    8088           0 :             CALL dbcsr_copy(m_tmp_no_1, m_x_in(ispin))
    8089             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    8090             :                                 m_t(ispin), &
    8091             :                                 m_tmp_oo_1, &
    8092             :                                 1.0_dp, m_tmp_no_1, &
    8093           0 :                                 filter_eps=eps_filter)
    8094             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8095             :                                 m_tmp_no_1, &
    8096             :                                 m_sig_sqrti_ii(ispin), &
    8097             :                                 0.0_dp, m_tmp_x_in, &
    8098           0 :                                 filter_eps=eps_filter)
    8099             : 
    8100             :          ELSE
    8101             : 
    8102           0 :             CALL dbcsr_copy(m_tmp_x_in, m_x_in(ispin))
    8103             : 
    8104             :          END IF ! normalize_orbitals
    8105             : 
    8106           0 :          IF (path_num .EQ. hessian_path_reuse) THEN
    8107             : 
    8108             :             ! apply pre-computed F_vv and S_vv to X
    8109             : 
    8110             : #if 0
    8111             : ! RZK-warning: negative sign at penalty_prefactor_local is that
    8112             : ! magical fix for the negative definite problem
    8113             : ! (since penalty_prefactor_local<0 the coeff before S_vv must
    8114             : ! be multiplied by -1 to take the step in the right direction)
    8115             : !CALL dbcsr_multiply("N","N",-4.0_dp*penalty_prefactor_local,&
    8116             : !        m_s_vv(ispin),&
    8117             : !        m_tmp_x_in,&
    8118             : !        0.0_dp,m_tmp_no_1,&
    8119             : !        filter_eps=eps_filter)
    8120             : !CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8121             : !CALL dbcsr_multiply("N","N",1.0_dp,&
    8122             : !        m_tmp_no_1,&
    8123             : !        m_siginv(ispin),&
    8124             : !        0.0_dp,m_x_out(ispin),&
    8125             : !        retain_sparsity=.TRUE.)
    8126             : 
    8127             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8128             :                                 m_s(1), &
    8129             :                                 m_tmp_x_in, &
    8130             :                                 0.0_dp, m_tmp_no_1, &
    8131             :                                 filter_eps=eps_filter)
    8132             :             CALL dbcsr_copy(m_x_out(ispin), m_quench_t(ispin))
    8133             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8134             :                                 m_tmp_no_1, &
    8135             :                                 m_siginv(ispin), &
    8136             :                                 0.0_dp, m_x_out(ispin), &
    8137             :                                 retain_sparsity=.TRUE.)
    8138             : 
    8139             : !CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8140             : !CALL dbcsr_multiply("N","N",1.0_dp,&
    8141             : !        m_s(1),&
    8142             : !        m_tmp_x_in,&
    8143             : !        0.0_dp,m_x_out(ispin),&
    8144             : !        retain_sparsity=.TRUE.)
    8145             : 
    8146             : #else
    8147             : 
    8148             :             ! debugging: only vv matrices, oo matrices are kronecker
    8149           0 :             CALL dbcsr_copy(m_x_out(ispin), m_quench_t(ispin))
    8150             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8151             :                                 m_ks_vv(ispin), &
    8152             :                                 m_tmp_x_in, &
    8153             :                                 0.0_dp, m_x_out(ispin), &
    8154           0 :                                 retain_sparsity=.TRUE.)
    8155             : 
    8156           0 :             CALL dbcsr_copy(m_tmp_no_2, m_quench_t(ispin))
    8157             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8158             :                                 m_s_vv(ispin), &
    8159             :                                 m_tmp_x_in, &
    8160             :                                 0.0_dp, m_tmp_no_2, &
    8161           0 :                                 retain_sparsity=.TRUE.)
    8162             :             CALL dbcsr_add(m_x_out(ispin), m_tmp_no_2, &
    8163           0 :                            1.0_dp, -4.0_dp*penalty_prefactor_local + 1.0_dp)
    8164             : #endif
    8165             : 
    8166             : !          ! F_vv.X.S_oo
    8167             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8168             : !                  m_ks_vv(ispin),&
    8169             : !                  m_tmp_x_in,&
    8170             : !                  0.0_dp,m_tmp_no_1,&
    8171             : !                  filter_eps=eps_filter,&
    8172             : !                  )
    8173             : !          CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8174             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8175             : !                  m_tmp_no_1,&
    8176             : !                  m_siginv(ispin),&
    8177             : !                  0.0_dp,m_x_out(ispin),&
    8178             : !                  retain_sparsity=.TRUE.,&
    8179             : !                  )
    8180             : !
    8181             : !          ! S_vv.X.F_oo
    8182             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8183             : !                  m_s_vv(ispin),&
    8184             : !                  m_tmp_x_in,&
    8185             : !                  0.0_dp,m_tmp_no_1,&
    8186             : !                  filter_eps=eps_filter,&
    8187             : !                  )
    8188             : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8189             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8190             : !                  m_tmp_no_1,&
    8191             : !                  m_siginvTFTsiginv(ispin),&
    8192             : !                  0.0_dp,m_tmp_no_2,&
    8193             : !                  retain_sparsity=.TRUE.,&
    8194             : !                  )
    8195             : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8196             : !               1.0_dp,-1.0_dp)
    8197             : !! we have to add occ voll penalty here (the Svv termi (i.e. both Svv.D.Soo)
    8198             : !!  and STsiginv terms)
    8199             : !
    8200             : !         ! S_vo.X^t.F_vo
    8201             : !          CALL dbcsr_multiply("T","N",1.0_dp,&
    8202             : !                  m_tmp_x_in,&
    8203             : !                  m_g_full(ispin),&
    8204             : !                  0.0_dp,m_tmp_oo_1,&
    8205             : !                  filter_eps=eps_filter,&
    8206             : !                  )
    8207             : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8208             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8209             : !                  m_STsiginv(ispin),&
    8210             : !                  m_tmp_oo_1,&
    8211             : !                  0.0_dp,m_tmp_no_2,&
    8212             : !                  retain_sparsity=.TRUE.,&
    8213             : !                  )
    8214             : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8215             : !                  1.0_dp,-1.0_dp)
    8216             : !
    8217             : !          ! S_vo.X^t.F_vo
    8218             : !          CALL dbcsr_multiply("T","N",1.0_dp,&
    8219             : !                  m_tmp_x_in,&
    8220             : !                  m_STsiginv(ispin),&
    8221             : !                  0.0_dp,m_tmp_oo_1,&
    8222             : !                  filter_eps=eps_filter,&
    8223             : !                  )
    8224             : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8225             : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8226             : !                  m_g_full(ispin),&
    8227             : !                  m_tmp_oo_1,&
    8228             : !                  0.0_dp,m_tmp_no_2,&
    8229             : !                  retain_sparsity=.TRUE.,&
    8230             : !                  )
    8231             : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8232             : !                  1.0_dp,-1.0_dp)
    8233             : 
    8234           0 :          ELSE IF (path_num .EQ. hessian_path_assemble) THEN
    8235             : 
    8236             :             ! compute F_vv.X and S_vv.X directly
    8237             :             ! this path will be advantageous if the number
    8238             :             ! of PCG iterations is small
    8239           0 :             CPABORT("path is NYI")
    8240             : 
    8241             :          ELSE
    8242           0 :             CPABORT("illegal path")
    8243             :          END IF ! path
    8244             : 
    8245             :          ! transform the output to take into account the normalization constraint
    8246           0 :          IF (normalize_orbitals) THEN
    8247             : 
    8248             :             ! H.D = ( (H.D) - ST.[tr(T).(H.D)]_ii ) . [sig_sqrti]_ii
    8249             : 
    8250             :             ! get [tr(T).HD]_ii
    8251           0 :             CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii(ispin))
    8252             :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    8253             :                                 m_t(ispin), &
    8254             :                                 m_x_out(ispin), &
    8255             :                                 0.0_dp, m_tmp_oo_1, &
    8256           0 :                                 retain_sparsity=.TRUE.)
    8257           0 :             CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), nfullrows_total=dim0)
    8258           0 :             ALLOCATE (tg_diagonal(dim0))
    8259           0 :             CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    8260           0 :             CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    8261           0 :             CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    8262           0 :             DEALLOCATE (tg_diagonal)
    8263             : 
    8264             :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    8265             :                                 m_ST(ispin), &
    8266             :                                 m_tmp_oo_1, &
    8267             :                                 1.0_dp, m_x_out(ispin), &
    8268           0 :                                 retain_sparsity=.TRUE.)
    8269           0 :             CALL dbcsr_copy(m_tmp_no_1, m_x_out(ispin))
    8270             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8271             :                                 m_tmp_no_1, &
    8272             :                                 m_sig_sqrti_ii(ispin), &
    8273             :                                 0.0_dp, m_x_out(ispin), &
    8274           0 :                                 retain_sparsity=.TRUE.)
    8275             : 
    8276             :          END IF ! normalize_orbitals
    8277             : 
    8278             :          CALL dbcsr_scale(m_x_out(ispin), &
    8279           0 :                           2.0_dp*spin_factor)
    8280             : 
    8281           0 :          CALL dbcsr_release(m_tmp_oo_1)
    8282           0 :          CALL dbcsr_release(m_tmp_no_1)
    8283           0 :          CALL dbcsr_release(m_tmp_no_2)
    8284           0 :          CALL dbcsr_release(m_tmp_x_in)
    8285             : 
    8286             :       END DO !ispin
    8287             : 
    8288             :       ! there is one more part of the hessian that comes
    8289             :       ! from T-dependence of the KS matrix
    8290             :       ! it is neglected here
    8291             : 
    8292           0 :       CALL timestop(handle)
    8293             : 
    8294           0 :    END SUBROUTINE apply_hessian
    8295             : 
    8296             : ! *****************************************************************************
    8297             : !> \brief Serial code that constructs an approximate Hessian
    8298             : !> \param matrix_grad ...
    8299             : !> \param matrix_step ...
    8300             : !> \param matrix_S_ao ...
    8301             : !> \param matrix_F_ao ...
    8302             : !> \param matrix_S_mo ...
    8303             : !> \param matrix_F_mo ...
    8304             : !> \param matrix_S_vo ...
    8305             : !> \param matrix_F_vo ...
    8306             : !> \param quench_t ...
    8307             : !> \param penalty_occ_vol ...
    8308             : !> \param penalty_occ_vol_prefactor ...
    8309             : !> \param penalty_occ_vol_pf2 ...
    8310             : !> \param spin_factor ...
    8311             : !> \param eps_zero ...
    8312             : !> \param m_s ...
    8313             : !> \param para_env ...
    8314             : !> \param blacs_env ...
    8315             : !> \par History
    8316             : !>       2012.02 created [Rustam Z. Khaliullin]
    8317             : !> \author Rustam Z. Khaliullin
    8318             : ! **************************************************************************************************
    8319           0 :    SUBROUTINE hessian_diag_apply(matrix_grad, matrix_step, matrix_S_ao, &
    8320             :                                  matrix_F_ao, matrix_S_mo, matrix_F_mo, matrix_S_vo, matrix_F_vo, quench_t, &
    8321             :                                  penalty_occ_vol, penalty_occ_vol_prefactor, penalty_occ_vol_pf2, &
    8322             :                                  spin_factor, eps_zero, m_s, para_env, blacs_env)
    8323             : 
    8324             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_grad, matrix_step, matrix_S_ao, &
    8325             :                                                             matrix_F_ao, matrix_S_mo
    8326             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_F_mo
    8327             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_S_vo, matrix_F_vo, quench_t
    8328             :       LOGICAL, INTENT(IN)                                :: penalty_occ_vol
    8329             :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, &
    8330             :                                                             penalty_occ_vol_pf2, spin_factor, &
    8331             :                                                             eps_zero
    8332             :       TYPE(dbcsr_type), INTENT(IN)                       :: m_s
    8333             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    8334             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    8335             : 
    8336             :       CHARACTER(len=*), PARAMETER :: routineN = 'hessian_diag_apply'
    8337             : 
    8338             :       INTEGER :: ao_hori_offset, ao_vert_offset, block_col, block_row, col, H_size, handle, ii, &
    8339             :          INFO, jj, lev1_hori_offset, lev1_vert_offset, lev2_hori_offset, lev2_vert_offset, LWORK, &
    8340             :          nblkcols_tot, nblkrows_tot, ncores, orb_i, orb_j, row, unit_nr, zero_neg_eiv
    8341           0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ao_block_sizes, ao_domain_sizes, &
    8342           0 :                                                             mo_block_sizes
    8343           0 :       INTEGER, DIMENSION(:), POINTER                     :: ao_blk_sizes, mo_blk_sizes
    8344             :       LOGICAL                                            :: found, found_col, found_row
    8345             :       REAL(KIND=dp)                                      :: penalty_prefactor_local, test_error
    8346           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: eigenvalues, Grad_vec, Step_vec, tmp, &
    8347           0 :                                                             tmpr, work
    8348           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: F_ao_block, F_mo_block, H, Hinv, &
    8349           0 :                                                             S_ao_block, S_mo_block, test, test2
    8350           0 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p, p_new_block
    8351             :       TYPE(cp_logger_type), POINTER                      :: logger
    8352             :       TYPE(dbcsr_distribution_type)                      :: main_dist
    8353             :       TYPE(dbcsr_type)                                   :: matrix_F_ao_sym, matrix_F_mo_sym, &
    8354             :                                                             matrix_S_ao_sym, matrix_S_mo_sym
    8355             : 
    8356           0 :       CALL timeset(routineN, handle)
    8357             : 
    8358             :       ! get a useful output_unit
    8359           0 :       logger => cp_get_default_logger()
    8360           0 :       IF (logger%para_env%is_source()) THEN
    8361           0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    8362             :       ELSE
    8363             :          unit_nr = -1
    8364             :       END IF
    8365             : 
    8366             :       !JHU use and test for unused debug variables
    8367           0 :       CPASSERT(ASSOCIATED(blacs_env))
    8368           0 :       CPASSERT(ASSOCIATED(para_env))
    8369             :       MARK_USED(blacs_env)
    8370             :       MARK_USED(para_env)
    8371             : 
    8372           0 :       CALL dbcsr_get_info(m_s, row_blk_size=ao_blk_sizes)
    8373           0 :       CALL dbcsr_get_info(matrix_S_vo, row_blk_size=ao_blk_sizes)
    8374           0 :       CALL dbcsr_get_info(matrix_F_vo, row_blk_size=ao_blk_sizes)
    8375             : 
    8376             :       ! serial code only
    8377           0 :       CALL dbcsr_get_info(matrix=matrix_S_ao, distribution=main_dist)
    8378           0 :       CALL dbcsr_distribution_get(main_dist, numnodes=ncores)
    8379           0 :       IF (ncores .GT. 1) THEN
    8380           0 :          CPABORT("serial code only")
    8381             :       END IF
    8382             : 
    8383           0 :       nblkrows_tot = dbcsr_nblkrows_total(quench_t)
    8384           0 :       nblkcols_tot = dbcsr_nblkcols_total(quench_t)
    8385           0 :       CPASSERT(nblkrows_tot == nblkcols_tot)
    8386           0 :       CALL dbcsr_get_info(quench_t, row_blk_size=ao_blk_sizes)
    8387           0 :       CALL dbcsr_get_info(quench_t, col_blk_size=mo_blk_sizes)
    8388           0 :       ALLOCATE (mo_block_sizes(nblkcols_tot), ao_block_sizes(nblkcols_tot))
    8389           0 :       ALLOCATE (ao_domain_sizes(nblkcols_tot))
    8390           0 :       mo_block_sizes(:) = mo_blk_sizes(:)
    8391           0 :       ao_block_sizes(:) = ao_blk_sizes(:)
    8392           0 :       ao_domain_sizes(:) = 0
    8393             : 
    8394             :       CALL dbcsr_create(matrix_S_ao_sym, &
    8395             :                         template=matrix_S_ao, &
    8396           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8397           0 :       CALL dbcsr_desymmetrize(matrix_S_ao, matrix_S_ao_sym)
    8398           0 :       CALL dbcsr_scale(matrix_S_ao_sym, 2.0_dp*spin_factor)
    8399             : 
    8400             :       CALL dbcsr_create(matrix_F_ao_sym, &
    8401             :                         template=matrix_F_ao, &
    8402           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8403           0 :       CALL dbcsr_desymmetrize(matrix_F_ao, matrix_F_ao_sym)
    8404           0 :       CALL dbcsr_scale(matrix_F_ao_sym, 2.0_dp*spin_factor)
    8405             : 
    8406             :       CALL dbcsr_create(matrix_S_mo_sym, &
    8407             :                         template=matrix_S_mo, &
    8408           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8409           0 :       CALL dbcsr_desymmetrize(matrix_S_mo, matrix_S_mo_sym)
    8410             : 
    8411             :       CALL dbcsr_create(matrix_F_mo_sym, &
    8412             :                         template=matrix_F_mo, &
    8413           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8414           0 :       CALL dbcsr_desymmetrize(matrix_F_mo, matrix_F_mo_sym)
    8415             : 
    8416           0 :       IF (penalty_occ_vol) THEN
    8417           0 :          penalty_prefactor_local = penalty_occ_vol_prefactor/(2.0_dp*spin_factor)
    8418             :       ELSE
    8419           0 :          penalty_prefactor_local = 0.0_dp
    8420             :       END IF
    8421             : 
    8422           0 :       WRITE (unit_nr, *) "penalty_prefactor_local: ", penalty_prefactor_local
    8423           0 :       WRITE (unit_nr, *) "penalty_prefactor_2: ", penalty_occ_vol_pf2
    8424             : 
    8425             :       !CALL dbcsr_print(matrix_grad)
    8426             :       !CALL dbcsr_print(matrix_F_ao_sym)
    8427             :       !CALL dbcsr_print(matrix_S_ao_sym)
    8428             :       !CALL dbcsr_print(matrix_F_mo_sym)
    8429             :       !CALL dbcsr_print(matrix_S_mo_sym)
    8430             : 
    8431             :       ! loop over domains to find the size of the Hessian
    8432           0 :       H_size = 0
    8433           0 :       DO col = 1, nblkcols_tot
    8434             : 
    8435             :          ! find sizes of AO submatrices
    8436           0 :          DO row = 1, nblkrows_tot
    8437             : 
    8438             :             CALL dbcsr_get_block_p(quench_t, &
    8439           0 :                                    row, col, block_p, found)
    8440           0 :             IF (found) THEN
    8441           0 :                ao_domain_sizes(col) = ao_domain_sizes(col) + ao_blk_sizes(row)
    8442             :             END IF
    8443             : 
    8444             :          END DO
    8445             : 
    8446           0 :          H_size = H_size + ao_domain_sizes(col)*mo_block_sizes(col)
    8447             : 
    8448             :       END DO
    8449             : 
    8450           0 :       ALLOCATE (H(H_size, H_size))
    8451           0 :       H(:, :) = 0.0_dp
    8452             : 
    8453             :       ! fill the Hessian matrix
    8454           0 :       lev1_vert_offset = 0
    8455             :       ! loop over all pairs of fragments
    8456           0 :       DO row = 1, nblkcols_tot
    8457             : 
    8458           0 :          lev1_hori_offset = 0
    8459           0 :          DO col = 1, nblkcols_tot
    8460             : 
    8461             :             ! prepare blocks for the current row-column fragment pair
    8462           0 :             ALLOCATE (F_ao_block(ao_domain_sizes(row), ao_domain_sizes(col)))
    8463           0 :             ALLOCATE (S_ao_block(ao_domain_sizes(row), ao_domain_sizes(col)))
    8464           0 :             ALLOCATE (F_mo_block(mo_block_sizes(row), mo_block_sizes(col)))
    8465           0 :             ALLOCATE (S_mo_block(mo_block_sizes(row), mo_block_sizes(col)))
    8466             : 
    8467           0 :             F_ao_block(:, :) = 0.0_dp
    8468           0 :             S_ao_block(:, :) = 0.0_dp
    8469           0 :             F_mo_block(:, :) = 0.0_dp
    8470           0 :             S_mo_block(:, :) = 0.0_dp
    8471             : 
    8472             :             ! fill AO submatrices
    8473             :             ! loop over all blocks of the AO dbcsr matrix
    8474           0 :             ao_vert_offset = 0
    8475           0 :             DO block_row = 1, nblkcols_tot
    8476             : 
    8477             :                CALL dbcsr_get_block_p(quench_t, &
    8478           0 :                                       block_row, row, block_p, found_row)
    8479           0 :                IF (found_row) THEN
    8480             : 
    8481           0 :                   ao_hori_offset = 0
    8482           0 :                   DO block_col = 1, nblkcols_tot
    8483             : 
    8484             :                      CALL dbcsr_get_block_p(quench_t, &
    8485           0 :                                             block_col, col, block_p, found_col)
    8486           0 :                      IF (found_col) THEN
    8487             : 
    8488             :                         CALL dbcsr_get_block_p(matrix_F_ao_sym, &
    8489           0 :                                                block_row, block_col, block_p, found)
    8490           0 :                         IF (found) THEN
    8491             :                            ! copy the block into the submatrix
    8492             :                            F_ao_block(ao_vert_offset + 1:ao_vert_offset + ao_block_sizes(block_row), &
    8493             :                                       ao_hori_offset + 1:ao_hori_offset + ao_block_sizes(block_col)) &
    8494           0 :                               = block_p(:, :)
    8495             :                         END IF
    8496             : 
    8497             :                         CALL dbcsr_get_block_p(matrix_S_ao_sym, &
    8498           0 :                                                block_row, block_col, block_p, found)
    8499           0 :                         IF (found) THEN
    8500             :                            ! copy the block into the submatrix
    8501             :                            S_ao_block(ao_vert_offset + 1:ao_vert_offset + ao_block_sizes(block_row), &
    8502             :                                       ao_hori_offset + 1:ao_hori_offset + ao_block_sizes(block_col)) &
    8503           0 :                               = block_p(:, :)
    8504             :                         END IF
    8505             : 
    8506           0 :                         ao_hori_offset = ao_hori_offset + ao_block_sizes(block_col)
    8507             : 
    8508             :                      END IF
    8509             : 
    8510             :                   END DO
    8511             : 
    8512           0 :                   ao_vert_offset = ao_vert_offset + ao_block_sizes(block_row)
    8513             : 
    8514             :                END IF
    8515             : 
    8516             :             END DO
    8517             : 
    8518             :             ! fill MO submatrices
    8519           0 :             CALL dbcsr_get_block_p(matrix_F_mo_sym, row, col, block_p, found)
    8520           0 :             IF (found) THEN
    8521             :                ! copy the block into the submatrix
    8522           0 :                F_mo_block(1:mo_block_sizes(row), 1:mo_block_sizes(col)) = block_p(:, :)
    8523             :             END IF
    8524           0 :             CALL dbcsr_get_block_p(matrix_S_mo_sym, row, col, block_p, found)
    8525           0 :             IF (found) THEN
    8526             :                ! copy the block into the submatrix
    8527           0 :                S_mo_block(1:mo_block_sizes(row), 1:mo_block_sizes(col)) = block_p(:, :)
    8528             :             END IF
    8529             : 
    8530             :             !WRITE(*,*) "F_AO_BLOCK", row, col, ao_domain_sizes(row), ao_domain_sizes(col)
    8531             :             !DO ii=1,ao_domain_sizes(row)
    8532             :             !  WRITE(*,'(100F13.9)') F_ao_block(ii,:)
    8533             :             !ENDDO
    8534             :             !WRITE(*,*) "S_AO_BLOCK", row, col
    8535             :             !DO ii=1,ao_domain_sizes(row)
    8536             :             !  WRITE(*,'(100F13.9)') S_ao_block(ii,:)
    8537             :             !ENDDO
    8538             :             !WRITE(*,*) "F_MO_BLOCK", row, col
    8539             :             !DO ii=1,mo_block_sizes(row)
    8540             :             !  WRITE(*,'(100F13.9)') F_mo_block(ii,:)
    8541             :             !ENDDO
    8542             :             !WRITE(*,*) "S_MO_BLOCK", row, col, mo_block_sizes(row), mo_block_sizes(col)
    8543             :             !DO ii=1,mo_block_sizes(row)
    8544             :             !  WRITE(*,'(100F13.9)') S_mo_block(ii,:)
    8545             :             !ENDDO
    8546             : 
    8547             :             ! construct tensor products for the current row-column fragment pair
    8548             :             lev2_vert_offset = 0
    8549           0 :             DO orb_j = 1, mo_block_sizes(row)
    8550             : 
    8551             :                lev2_hori_offset = 0
    8552           0 :                DO orb_i = 1, mo_block_sizes(col)
    8553           0 :                   IF (orb_i .EQ. orb_j .AND. row .EQ. col) THEN
    8554             :                      H(lev1_vert_offset + lev2_vert_offset + 1:lev1_vert_offset + lev2_vert_offset + ao_domain_sizes(row), &
    8555             :                        lev1_hori_offset + lev2_hori_offset + 1:lev1_hori_offset + lev2_hori_offset + ao_domain_sizes(col)) &
    8556             :                         != -penalty_prefactor_local*S_ao_block(:,:)
    8557           0 :                         = F_ao_block(:, :) + S_ao_block(:, :)
    8558             : !=S_ao_block(:,:)
    8559             : !RZK-warning               =F_ao_block(:,:)+( 1.0_dp + penalty_prefactor_local )*S_ao_block(:,:)
    8560             : !               =S_mo_block(orb_j,orb_i)*F_ao_block(:,:)&
    8561             : !               -F_mo_block(orb_j,orb_i)*S_ao_block(:,:)&
    8562             : !               +penalty_prefactor_local*S_mo_block(orb_j,orb_i)*S_ao_block(:,:)
    8563             :                   END IF
    8564             :                   !WRITE(*,*) row, col, orb_j, orb_i, lev1_vert_offset+lev2_vert_offset+1, ao_domain_sizes(row),&
    8565             :                   !   lev1_hori_offset+lev2_hori_offset+1, ao_domain_sizes(col), S_mo_block(orb_j,orb_i)
    8566             : 
    8567           0 :                   lev2_hori_offset = lev2_hori_offset + ao_domain_sizes(col)
    8568             : 
    8569             :                END DO
    8570             : 
    8571           0 :                lev2_vert_offset = lev2_vert_offset + ao_domain_sizes(row)
    8572             : 
    8573             :             END DO
    8574             : 
    8575           0 :             lev1_hori_offset = lev1_hori_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8576             : 
    8577           0 :             DEALLOCATE (F_ao_block)
    8578           0 :             DEALLOCATE (S_ao_block)
    8579           0 :             DEALLOCATE (F_mo_block)
    8580           0 :             DEALLOCATE (S_mo_block)
    8581             : 
    8582             :          END DO ! col fragment
    8583             : 
    8584           0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(row)*mo_block_sizes(row)
    8585             : 
    8586             :       END DO ! row fragment
    8587             : 
    8588           0 :       CALL dbcsr_release(matrix_S_ao_sym)
    8589           0 :       CALL dbcsr_release(matrix_F_ao_sym)
    8590           0 :       CALL dbcsr_release(matrix_S_mo_sym)
    8591           0 :       CALL dbcsr_release(matrix_F_mo_sym)
    8592             : 
    8593             : !!    ! Two more terms of the Hessian: S_vo.D.F_vo and F_vo.D.S_vo
    8594             : !!    ! It seems that these terms break positive definite property of the Hessian
    8595             : !!    ALLOCATE(H1(H_size,H_size))
    8596             : !!    ALLOCATE(H2(H_size,H_size))
    8597             : !!    H1=0.0_dp
    8598             : !!    H2=0.0_dp
    8599             : !!    DO row = 1, nblkcols_tot
    8600             : !!
    8601             : !!       lev1_hori_offset=0
    8602             : !!       DO col = 1, nblkcols_tot
    8603             : !!
    8604             : !!          CALL dbcsr_get_block_p(matrix_F_vo,&
    8605             : !!                  row, col, block_p, found)
    8606             : !!          CALL dbcsr_get_block_p(matrix_S_vo,&
    8607             : !!                  row, col, block_p2, found2)
    8608             : !!
    8609             : !!          lev1_vert_offset=0
    8610             : !!          DO block_col = 1, nblkcols_tot
    8611             : !!
    8612             : !!             CALL dbcsr_get_block_p(quench_t,&
    8613             : !!                     row, block_col, p_new_block, found_row)
    8614             : !!
    8615             : !!             IF (found_row) THEN
    8616             : !!
    8617             : !!                ! determine offset in this short loop
    8618             : !!                lev2_vert_offset=0
    8619             : !!                DO block_row=1,row-1
    8620             : !!                   CALL dbcsr_get_block_p(quench_t,&
    8621             : !!                           block_row, block_col, p_new_block, found_col)
    8622             : !!                   IF (found_col) lev2_vert_offset=lev2_vert_offset+ao_block_sizes(block_row)
    8623             : !!                ENDDO
    8624             : !!                !!!!!!!! short loop
    8625             : !!
    8626             : !!                ! over all electrons of the block
    8627             : !!                DO orb_i=1, mo_block_sizes(col)
    8628             : !!
    8629             : !!                   ! into all possible locations
    8630             : !!                   DO orb_j=1, mo_block_sizes(block_col)
    8631             : !!
    8632             : !!                      ! column is copied several times
    8633             : !!                      DO copy=1, ao_domain_sizes(col)
    8634             : !!
    8635             : !!                         IF (found) THEN
    8636             : !!
    8637             : !!                            !WRITE(*,*) row, col, block_col, orb_i, orb_j, copy,&
    8638             : !!                            ! lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1,&
    8639             : !!                            ! lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy
    8640             : !!
    8641             : !!                            H1( lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1:&
    8642             : !!                                lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+ao_block_sizes(row),&
    8643             : !!                                lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy )&
    8644             : !!                              =block_p(:,orb_i)
    8645             : !!
    8646             : !!                         ENDIF ! found block in the data matrix
    8647             : !!
    8648             : !!                         IF (found2) THEN
    8649             : !!
    8650             : !!                            H2( lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1:&
    8651             : !!                                lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+ao_block_sizes(row),&
    8652             : !!                                lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy )&
    8653             : !!                              =block_p2(:,orb_i)
    8654             : !!
    8655             : !!                         ENDIF ! found block in the data matrix
    8656             : !!
    8657             : !!                      ENDDO
    8658             : !!
    8659             : !!                   ENDDO
    8660             : !!
    8661             : !!                ENDDO
    8662             : !!
    8663             : !!                !lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8664             : !!
    8665             : !!             ENDIF ! found block in the quench matrix
    8666             : !!
    8667             : !!             lev1_vert_offset=lev1_vert_offset+&
    8668             : !!                ao_domain_sizes(block_col)*mo_block_sizes(block_col)
    8669             : !!
    8670             : !!          ENDDO
    8671             : !!
    8672             : !!          lev1_hori_offset=lev1_hori_offset+&
    8673             : !!             ao_domain_sizes(col)*mo_block_sizes(col)
    8674             : !!
    8675             : !!       ENDDO
    8676             : !!
    8677             : !!       !lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8678             : !!
    8679             : !!    ENDDO
    8680             : !!    H1(:,:)=H1(:,:)*2.0_dp*spin_factor
    8681             : !!    !!!WRITE(*,*) "F_vo"
    8682             : !!    !!!DO ii=1,H_size
    8683             : !!    !!! WRITE(*,'(100F13.9)') H1(ii,:)
    8684             : !!    !!!ENDDO
    8685             : !!    !!!WRITE(*,*) "S_vo"
    8686             : !!    !!!DO ii=1,H_size
    8687             : !!    !!! WRITE(*,'(100F13.9)') H2(ii,:)
    8688             : !!    !!!ENDDO
    8689             : !!    !!!!! add terms to the hessian
    8690             : !!    DO ii=1,H_size
    8691             : !!       DO jj=1,H_size
    8692             : !!! add penalty_occ_vol term
    8693             : !!          H(ii,jj)=H(ii,jj)-H1(ii,jj)*H2(jj,ii)-H1(jj,ii)*H2(ii,jj)
    8694             : !!       ENDDO
    8695             : !!    ENDDO
    8696             : !!    DEALLOCATE(H1)
    8697             : !!    DEALLOCATE(H2)
    8698             : 
    8699             : !!    ! S_vo.S_vo diagonal component due to determiant constraint
    8700             : !!    ! use grad vector temporarily
    8701             : !!    IF (penalty_occ_vol) THEN
    8702             : !!       ALLOCATE(Grad_vec(H_size))
    8703             : !!       Grad_vec(:)=0.0_dp
    8704             : !!       lev1_vert_offset=0
    8705             : !!       ! loop over all electron blocks
    8706             : !!       DO col = 1, nblkcols_tot
    8707             : !!
    8708             : !!          ! loop over AO-rows of the dbcsr matrix
    8709             : !!          lev2_vert_offset=0
    8710             : !!          DO row = 1, nblkrows_tot
    8711             : !!
    8712             : !!             CALL dbcsr_get_block_p(quench_t,&
    8713             : !!                     row, col, block_p, found_row)
    8714             : !!             IF (found_row) THEN
    8715             : !!
    8716             : !!                CALL dbcsr_get_block_p(matrix_S_vo,&
    8717             : !!                        row, col, block_p, found)
    8718             : !!                IF (found) THEN
    8719             : !!                   ! copy the data into the vector, column by column
    8720             : !!                   DO orb_i=1, mo_block_sizes(col)
    8721             : !!                      Grad_vec(lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1:&
    8722             : !!                               lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+ao_block_sizes(row))&
    8723             : !!                               =block_p(:,orb_i)
    8724             : !!                   ENDDO
    8725             : !!
    8726             : !!                ENDIF
    8727             : !!
    8728             : !!                lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8729             : !!
    8730             : !!             ENDIF
    8731             : !!
    8732             : !!          ENDDO
    8733             : !!
    8734             : !!          lev1_vert_offset=lev1_vert_offset+ao_domain_sizes(col)*mo_block_sizes(col)
    8735             : !!
    8736             : !!       ENDDO ! loop over electron blocks
    8737             : !!       ! update H now
    8738             : !!       DO ii=1,H_size
    8739             : !!          DO jj=1,H_size
    8740             : !!             H(ii,jj)=H(ii,jj)+penalty_occ_vol_prefactor*&
    8741             : !!                      penalty_occ_vol_pf2*Grad_vec(ii)*Grad_vec(jj)
    8742             : !!          ENDDO
    8743             : !!       ENDDO
    8744             : !!       DEALLOCATE(Grad_vec)
    8745             : !!    ENDIF ! penalty_occ_vol
    8746             : 
    8747             : !S-1.G ! invert S using cholesky
    8748             : !S-1.G CALL dbcsr_create(m_prec_out,&
    8749             : !S-1.G         template=m_s,&
    8750             : !S-1.G         matrix_type=dbcsr_type_no_symmetry)
    8751             : !S-1.G CALL dbcsr_copy(m_prec_out,m_s)
    8752             : !S-1.G CALL dbcsr_cholesky_decompose(m_prec_out,&
    8753             : !S-1.G         para_env=para_env,&
    8754             : !S-1.G         blacs_env=blacs_env)
    8755             : !S-1.G CALL dbcsr_cholesky_invert(m_prec_out,&
    8756             : !S-1.G         para_env=para_env,&
    8757             : !S-1.G         blacs_env=blacs_env,&
    8758             : !S-1.G         upper_to_full=.TRUE.)
    8759             : !S-1.G CALL dbcsr_multiply("N","N",1.0_dp,&
    8760             : !S-1.G         m_prec_out,&
    8761             : !S-1.G         matrix_grad,&
    8762             : !S-1.G         0.0_dp,matrix_step,&
    8763             : !S-1.G         filter_eps=1.0E-10_dp)
    8764             : !S-1.G !CALL dbcsr_release(m_prec_out)
    8765             : !S-1.G ALLOCATE(test3(H_size))
    8766             : 
    8767             :       ! convert gradient from the dbcsr matrix to the vector form
    8768           0 :       ALLOCATE (Grad_vec(H_size))
    8769           0 :       Grad_vec(:) = 0.0_dp
    8770           0 :       lev1_vert_offset = 0
    8771             :       ! loop over all electron blocks
    8772           0 :       DO col = 1, nblkcols_tot
    8773             : 
    8774             :          ! loop over AO-rows of the dbcsr matrix
    8775           0 :          lev2_vert_offset = 0
    8776           0 :          DO row = 1, nblkrows_tot
    8777             : 
    8778             :             CALL dbcsr_get_block_p(quench_t, &
    8779           0 :                                    row, col, block_p, found_row)
    8780           0 :             IF (found_row) THEN
    8781             : 
    8782             :                CALL dbcsr_get_block_p(matrix_grad, &
    8783           0 :                                       row, col, block_p, found)
    8784           0 :                IF (found) THEN
    8785             :                   ! copy the data into the vector, column by column
    8786           0 :                   DO orb_i = 1, mo_block_sizes(col)
    8787             :                      Grad_vec(lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + 1: &
    8788             :                               lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + ao_block_sizes(row)) &
    8789           0 :                         = block_p(:, orb_i)
    8790             : !WRITE(*,*) "GRAD: ", row, col, orb_i, lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1, ao_block_sizes(row)
    8791             :                   END DO
    8792             : 
    8793             :                END IF
    8794             : 
    8795             : !S-1.G CALL dbcsr_get_block_p(matrix_step,&
    8796             : !S-1.G         row, col, block_p, found)
    8797             : !S-1.G IF (found) THEN
    8798             : !S-1.G    ! copy the data into the vector, column by column
    8799             : !S-1.G    DO orb_i=1, mo_block_sizes(col)
    8800             : !S-1.G       test3(lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1:&
    8801             : !S-1.G                lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+ao_block_sizes(row))&
    8802             : !S-1.G                =block_p(:,orb_i)
    8803             : !S-1.G    ENDDO
    8804             : !S-1.G ENDIF
    8805             : 
    8806           0 :                lev2_vert_offset = lev2_vert_offset + ao_block_sizes(row)
    8807             : 
    8808             :             END IF
    8809             : 
    8810             :          END DO
    8811             : 
    8812           0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8813             : 
    8814             :       END DO ! loop over electron blocks
    8815             : 
    8816             :       !WRITE(*,*) "HESSIAN"
    8817             :       !DO ii=1,H_size
    8818             :       ! WRITE(*,*) ii
    8819             :       ! WRITE(*,'(20F14.10)') H(ii,:)
    8820             :       !ENDDO
    8821             : 
    8822             :       ! invert the Hessian
    8823           0 :       INFO = 0
    8824           0 :       ALLOCATE (Hinv(H_size, H_size))
    8825           0 :       Hinv(:, :) = H(:, :)
    8826             : 
    8827             :       ! before inverting diagonalize
    8828           0 :       ALLOCATE (eigenvalues(H_size))
    8829             :       ! Query the optimal workspace for dsyev
    8830           0 :       LWORK = -1
    8831           0 :       ALLOCATE (WORK(MAX(1, LWORK)))
    8832           0 :       CALL DSYEV('V', 'L', H_size, Hinv, H_size, eigenvalues, WORK, LWORK, INFO)
    8833           0 :       LWORK = INT(WORK(1))
    8834           0 :       DEALLOCATE (WORK)
    8835             :       ! Allocate the workspace and solve the eigenproblem
    8836           0 :       ALLOCATE (WORK(MAX(1, LWORK)))
    8837           0 :       CALL DSYEV('V', 'L', H_size, Hinv, H_size, eigenvalues, WORK, LWORK, INFO)
    8838           0 :       IF (INFO .NE. 0) THEN
    8839           0 :          WRITE (unit_nr, *) 'DSYEV ERROR MESSAGE: ', INFO
    8840           0 :          CPABORT("DSYEV failed")
    8841             :       END IF
    8842           0 :       DEALLOCATE (WORK)
    8843             : 
    8844             :       ! compute grad vector in the basis of Hessian eigenvectors
    8845           0 :       ALLOCATE (Step_vec(H_size))
    8846             :       ! Step_vec contains Grad_vec here
    8847           0 :       Step_vec(:) = MATMUL(TRANSPOSE(Hinv), Grad_vec)
    8848             : 
    8849             :       ! compute U.tr(U)-1 = error
    8850             :       !ALLOCATE(test(H_size,H_size))
    8851             :       !test(:,:)=MATMUL(TRANSPOSE(Hinv),Hinv)
    8852             :       !DO ii=1,H_size
    8853             :       !   test(ii,ii)=test(ii,ii)-1.0_dp
    8854             :       !ENDDO
    8855             :       !test_error=0.0_dp
    8856             :       !DO ii=1,H_size
    8857             :       !   DO jj=1,H_size
    8858             :       !      test_error=test_error+test(jj,ii)*test(jj,ii)
    8859             :       !   ENDDO
    8860             :       !ENDDO
    8861             :       !WRITE(*,*) "U.tr(U)-1 error: ", SQRT(test_error)
    8862             :       !DEALLOCATE(test)
    8863             : 
    8864             :       ! invert eigenvalues and use eigenvectors to compute the Hessian inverse
    8865             :       ! project out zero-eigenvalue directions
    8866           0 :       ALLOCATE (test(H_size, H_size))
    8867           0 :       zero_neg_eiv = 0
    8868           0 :       DO jj = 1, H_size
    8869           0 :          WRITE (unit_nr, "(I10,F20.10,F20.10)") jj, eigenvalues(jj), Step_vec(jj)
    8870           0 :          IF (eigenvalues(jj) .GT. eps_zero) THEN
    8871           0 :             test(jj, :) = Hinv(:, jj)/eigenvalues(jj)
    8872             :          ELSE
    8873           0 :             test(jj, :) = Hinv(:, jj)*0.0_dp
    8874           0 :             zero_neg_eiv = zero_neg_eiv + 1
    8875             :          END IF
    8876             :       END DO
    8877           0 :       WRITE (unit_nr, *) 'ZERO OR NEGATIVE EIGENVALUES: ', zero_neg_eiv
    8878           0 :       DEALLOCATE (Step_vec)
    8879             : 
    8880           0 :       ALLOCATE (test2(H_size, H_size))
    8881           0 :       test2(:, :) = MATMUL(Hinv, test)
    8882           0 :       Hinv(:, :) = test2(:, :)
    8883           0 :       DEALLOCATE (test, test2)
    8884             : 
    8885             :       !! shift to kill singularity
    8886             :       !shift=0.0_dp
    8887             :       !IF (eigenvalues(1).lt.0.0_dp) THEN
    8888             :       !   CPABORT("Negative eigenvalue(s)")
    8889             :       !   shift=abs(eigenvalues(1))
    8890             :       !   WRITE(*,*) "Lowest eigenvalue: ", eigenvalues(1)
    8891             :       !ENDIF
    8892             :       !DO ii=1, H_size
    8893             :       !   IF (eigenvalues(ii).gt.eps_zero) THEN
    8894             :       !      shift=shift+min(1.0_dp,eigenvalues(ii))*1.0E-4_dp
    8895             :       !      EXIT
    8896             :       !   ENDIF
    8897             :       !ENDDO
    8898             :       !WRITE(*,*) "Hessian shift: ", shift
    8899             :       !DO ii=1, H_size
    8900             :       !   H(ii,ii)=H(ii,ii)+shift
    8901             :       !ENDDO
    8902             :       !! end shift
    8903             : 
    8904           0 :       DEALLOCATE (eigenvalues)
    8905             : 
    8906             : !!!!    Hinv=H
    8907             : !!!!    INFO=0
    8908             : !!!!    CALL DPOTRF('L', H_size, Hinv, H_size, INFO )
    8909             : !!!!    IF( INFO.NE.0 ) THEN
    8910             : !!!!       WRITE(*,*) 'DPOTRF ERROR MESSAGE: ', INFO
    8911             : !!!!       CPABORT("DPOTRF failed")
    8912             : !!!!    END IF
    8913             : !!!!    CALL DPOTRI('L', H_size, Hinv, H_size, INFO )
    8914             : !!!!    IF( INFO.NE.0 ) THEN
    8915             : !!!!       WRITE(*,*) 'DPOTRI ERROR MESSAGE: ', INFO
    8916             : !!!!       CPABORT("DPOTRI failed")
    8917             : !!!!    END IF
    8918             : !!!!    ! complete the matrix
    8919             : !!!!    DO ii=1,H_size
    8920             : !!!!       DO jj=ii+1,H_size
    8921             : !!!!          Hinv(ii,jj)=Hinv(jj,ii)
    8922             : !!!!       ENDDO
    8923             : !!!!    ENDDO
    8924             : 
    8925             :       ! compute the inversion error
    8926           0 :       ALLOCATE (test(H_size, H_size))
    8927           0 :       test(:, :) = MATMUL(Hinv, H)
    8928           0 :       DO ii = 1, H_size
    8929           0 :          test(ii, ii) = test(ii, ii) - 1.0_dp
    8930             :       END DO
    8931           0 :       test_error = 0.0_dp
    8932           0 :       DO ii = 1, H_size
    8933           0 :          DO jj = 1, H_size
    8934           0 :             test_error = test_error + test(jj, ii)*test(jj, ii)
    8935             :          END DO
    8936             :       END DO
    8937           0 :       WRITE (unit_nr, *) "Hessian inversion error: ", SQRT(test_error)
    8938           0 :       DEALLOCATE (test)
    8939             : 
    8940             :       ! prepare the output vector
    8941           0 :       ALLOCATE (Step_vec(H_size))
    8942           0 :       ALLOCATE (tmp(H_size))
    8943           0 :       tmp(:) = MATMUL(Hinv, Grad_vec)
    8944             :       !tmp(:)=MATMUL(Hinv,test3)
    8945           0 :       Step_vec(:) = -1.0_dp*tmp(:)
    8946             : 
    8947           0 :       ALLOCATE (tmpr(H_size))
    8948           0 :       tmpr(:) = MATMUL(H, Step_vec)
    8949           0 :       tmp(:) = tmpr(:) + Grad_vec(:)
    8950           0 :       DEALLOCATE (tmpr)
    8951           0 :       WRITE (unit_nr, *) "NEWTOV step error: ", MAXVAL(ABS(tmp))
    8952             : 
    8953           0 :       DEALLOCATE (tmp)
    8954             : 
    8955           0 :       DEALLOCATE (H)
    8956           0 :       DEALLOCATE (Hinv)
    8957           0 :       DEALLOCATE (Grad_vec)
    8958             : 
    8959             : !S-1.G DEALLOCATE(test3)
    8960             : 
    8961             :       ! copy the step from the vector into the dbcsr matrix
    8962             : 
    8963             :       ! re-create the step matrix to remove all blocks
    8964             :       CALL dbcsr_create(matrix_step, &
    8965             :                         template=matrix_grad, &
    8966           0 :                         matrix_type=dbcsr_type_no_symmetry)
    8967           0 :       CALL dbcsr_work_create(matrix_step, work_mutable=.TRUE.)
    8968             : 
    8969           0 :       lev1_vert_offset = 0
    8970             :       ! loop over all electron blocks
    8971           0 :       DO col = 1, nblkcols_tot
    8972             : 
    8973             :          ! loop over AO-rows of the dbcsr matrix
    8974           0 :          lev2_vert_offset = 0
    8975           0 :          DO row = 1, nblkrows_tot
    8976             : 
    8977             :             CALL dbcsr_get_block_p(quench_t, &
    8978           0 :                                    row, col, block_p, found_row)
    8979           0 :             IF (found_row) THEN
    8980             : 
    8981           0 :                NULLIFY (p_new_block)
    8982           0 :                CALL dbcsr_reserve_block2d(matrix_step, row, col, p_new_block)
    8983           0 :                CPASSERT(ASSOCIATED(p_new_block))
    8984             :                ! copy the data column by column
    8985           0 :                DO orb_i = 1, mo_block_sizes(col)
    8986             :                   p_new_block(:, orb_i) = &
    8987             :                      Step_vec(lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + 1: &
    8988           0 :                               lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + ao_block_sizes(row))
    8989             :                END DO
    8990             : 
    8991           0 :                lev2_vert_offset = lev2_vert_offset + ao_block_sizes(row)
    8992             : 
    8993             :             END IF
    8994             : 
    8995             :          END DO
    8996             : 
    8997           0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8998             : 
    8999             :       END DO ! loop over electron blocks
    9000             : 
    9001           0 :       DEALLOCATE (Step_vec)
    9002             : 
    9003           0 :       CALL dbcsr_finalize(matrix_step)
    9004             : 
    9005             : !S-1.G CALL dbcsr_create(m_tmp_no_1,&
    9006             : !S-1.G         template=matrix_step,&
    9007             : !S-1.G         matrix_type=dbcsr_type_no_symmetry)
    9008             : !S-1.G CALL dbcsr_multiply("N","N",1.0_dp,&
    9009             : !S-1.G         m_prec_out,&
    9010             : !S-1.G         matrix_step,&
    9011             : !S-1.G         0.0_dp,m_tmp_no_1,&
    9012             : !S-1.G         filter_eps=1.0E-10_dp,&
    9013             : !S-1.G         )
    9014             : !S-1.G CALL dbcsr_copy(matrix_step,m_tmp_no_1)
    9015             : !S-1.G CALL dbcsr_release(m_tmp_no_1)
    9016             : !S-1.G CALL dbcsr_release(m_prec_out)
    9017             : 
    9018           0 :       DEALLOCATE (mo_block_sizes, ao_block_sizes)
    9019           0 :       DEALLOCATE (ao_domain_sizes)
    9020             : 
    9021             :       CALL dbcsr_create(matrix_S_ao_sym, &
    9022             :                         template=quench_t, &
    9023           0 :                         matrix_type=dbcsr_type_no_symmetry)
    9024           0 :       CALL dbcsr_copy(matrix_S_ao_sym, quench_t)
    9025             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9026             :                           matrix_F_ao, &
    9027             :                           matrix_step, &
    9028             :                           0.0_dp, matrix_S_ao_sym, &
    9029           0 :                           retain_sparsity=.TRUE.)
    9030             :       CALL dbcsr_create(matrix_F_ao_sym, &
    9031             :                         template=quench_t, &
    9032           0 :                         matrix_type=dbcsr_type_no_symmetry)
    9033           0 :       CALL dbcsr_copy(matrix_F_ao_sym, quench_t)
    9034             :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9035             :                           matrix_S_ao, &
    9036             :                           matrix_step, &
    9037             :                           0.0_dp, matrix_F_ao_sym, &
    9038           0 :                           retain_sparsity=.TRUE.)
    9039             :       CALL dbcsr_add(matrix_S_ao_sym, matrix_F_ao_sym, &
    9040           0 :                      1.0_dp, 1.0_dp)
    9041           0 :       CALL dbcsr_scale(matrix_S_ao_sym, 2.0_dp*spin_factor)
    9042             :       CALL dbcsr_add(matrix_S_ao_sym, matrix_grad, &
    9043           0 :                      1.0_dp, 1.0_dp)
    9044             :       CALL dbcsr_norm(matrix_S_ao_sym, dbcsr_norm_maxabsnorm, &
    9045           0 :                       norm_scalar=test_error)
    9046           0 :       WRITE (unit_nr, *) "NEWTOL step error: ", test_error
    9047           0 :       CALL dbcsr_release(matrix_S_ao_sym)
    9048           0 :       CALL dbcsr_release(matrix_F_ao_sym)
    9049             : 
    9050           0 :       CALL timestop(handle)
    9051             : 
    9052           0 :    END SUBROUTINE hessian_diag_apply
    9053             : 
    9054             : ! **************************************************************************************************
    9055             : !> \brief Optimization of ALMOs using trust region minimizers
    9056             : !> \param qs_env ...
    9057             : !> \param almo_scf_env ...
    9058             : !> \param optimizer   controls the optimization algorithm
    9059             : !> \param quench_t ...
    9060             : !> \param matrix_t_in ...
    9061             : !> \param matrix_t_out ...
    9062             : !> \param perturbation_only - perturbative (do not update Hamiltonian)
    9063             : !> \param special_case   to reduce the overhead special cases are implemented:
    9064             : !>                       xalmo_case_normal - no special case (i.e. xALMOs)
    9065             : !>                       xalmo_case_block_diag
    9066             : !>                       xalmo_case_fully_deloc
    9067             : !> \par History
    9068             : !>       2020.01 created [Rustam Z Khaliullin]
    9069             : !> \author Rustam Z Khaliullin
    9070             : ! **************************************************************************************************
    9071          18 :    SUBROUTINE almo_scf_xalmo_trustr(qs_env, almo_scf_env, optimizer, quench_t, &
    9072             :                                     matrix_t_in, matrix_t_out, perturbation_only, &
    9073             :                                     special_case)
    9074             : 
    9075             :       TYPE(qs_environment_type), POINTER                 :: qs_env
    9076             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    9077             :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
    9078             :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: quench_t, matrix_t_in, matrix_t_out
    9079             :       LOGICAL, INTENT(IN)                                :: perturbation_only
    9080             :       INTEGER, INTENT(IN), OPTIONAL                      :: special_case
    9081             : 
    9082             :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_trustr'
    9083             : 
    9084             :       INTEGER :: handle, ispin, iteration, iteration_type_to_report, my_special_case, ndomains, &
    9085             :          nspins, outer_iteration, prec_type, unit_nr
    9086          18 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
    9087             :       LOGICAL :: assume_t0_q0x, border_reached, inner_loop_success, normalize_orbitals, &
    9088             :          optimize_theta, penalty_occ_vol, reset_conjugator, same_position, scf_converged
    9089             :       REAL(kind=dp) :: beta, energy_start, energy_trial, eta, expected_reduction, &
    9090             :          fake_step_size_to_report, grad_norm_ratio, grad_norm_ref, loss_change_to_report, &
    9091             :          loss_start, loss_trial, model_grad_norm, penalty_amplitude, penalty_start, penalty_trial, &
    9092             :          radius_current, radius_max, real_temp, rho, spin_factor, step_norm, step_size, t1, &
    9093             :          t1outer, t2, t2outer, y_scalar
    9094          18 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: grad_norm_spin, &
    9095          18 :                                                             penalty_occ_vol_g_prefactor, &
    9096          18 :                                                             penalty_occ_vol_h_prefactor
    9097             :       TYPE(cp_logger_type), POINTER                      :: logger
    9098             :       TYPE(dbcsr_type)                                   :: m_s_inv
    9099          18 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: FTsiginv, grad, m_model_Bd, m_model_d, &
    9100          18 :          m_model_hessian, m_model_hessian_inv, m_model_r, m_model_r_prev, m_model_rt, &
    9101          18 :          m_model_rt_prev, m_sig_sqrti_ii, m_theta, m_theta_trial, prev_step, siginvTFTsiginv, ST, &
    9102          18 :          step, STsiginv_0
    9103             :       TYPE(domain_submatrix_type), ALLOCATABLE, &
    9104          18 :          DIMENSION(:, :)                                 :: domain_model_hessian_inv, domain_r_down
    9105             : 
    9106             :       ! RZK-warning: number of temporary storage matrices can be reduced
    9107          18 :       CALL timeset(routineN, handle)
    9108             : 
    9109          18 :       t1outer = m_walltime()
    9110             : 
    9111          18 :       my_special_case = xalmo_case_normal
    9112          18 :       IF (PRESENT(special_case)) my_special_case = special_case
    9113             : 
    9114             :       ! get a useful output_unit
    9115          18 :       logger => cp_get_default_logger()
    9116          18 :       IF (logger%para_env%is_source()) THEN
    9117           9 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    9118             :       ELSE
    9119           9 :          unit_nr = -1
    9120             :       END IF
    9121             : 
    9122             :       ! Trust radius code is written to obviate the need in projected orbitals
    9123          18 :       assume_t0_q0x = .FALSE.
    9124             :       ! Smoothing of the orbitals have not been implemented
    9125          18 :       optimize_theta = .FALSE.
    9126             : 
    9127          18 :       nspins = almo_scf_env%nspins
    9128          18 :       IF (nspins == 1) THEN
    9129          18 :          spin_factor = 2.0_dp
    9130             :       ELSE
    9131           0 :          spin_factor = 1.0_dp
    9132             :       END IF
    9133             : 
    9134          18 :       IF (unit_nr > 0) THEN
    9135           9 :          WRITE (unit_nr, *)
    9136           1 :          SELECT CASE (my_special_case)
    9137             :          CASE (xalmo_case_block_diag)
    9138           1 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
    9139           2 :                " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
    9140             :          CASE (xalmo_case_fully_deloc)
    9141           0 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
    9142           0 :                " Optimization of fully delocalized MOs ", REPEAT("-", 20)
    9143             :          CASE (xalmo_case_normal)
    9144           8 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 27), &
    9145          17 :                " Optimization of XALMOs ", REPEAT("-", 28)
    9146             :          END SELECT
    9147           9 :          WRITE (unit_nr, *)
    9148             :          CALL trust_r_report(unit_nr, &
    9149             :                              iter_type=0, & ! print header, all values are ignored
    9150             :                              iteration=0, &
    9151             :                              radius=0.0_dp, &
    9152             :                              loss=0.0_dp, &
    9153             :                              delta_loss=0.0_dp, &
    9154             :                              grad_norm=0.0_dp, &
    9155             :                              predicted_reduction=0.0_dp, &
    9156             :                              rho=0.0_dp, &
    9157             :                              new=.TRUE., &
    9158           9 :                              time=0.0_dp)
    9159           9 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
    9160             :       END IF
    9161             : 
    9162             :       ! penalty amplitude adjusts the strength of volume conservation
    9163          18 :       penalty_occ_vol = .FALSE.
    9164             :       !(almo_scf_env%penalty%occ_vol_method .NE. almo_occ_vol_penalty_none .AND. &
    9165             :       !                   my_special_case .EQ. xalmo_case_fully_deloc)
    9166          18 :       normalize_orbitals = penalty_occ_vol
    9167          18 :       penalty_amplitude = 0.0_dp !almo_scf_env%penalty%occ_vol_coeff
    9168          54 :       ALLOCATE (penalty_occ_vol_g_prefactor(nspins))
    9169          54 :       ALLOCATE (penalty_occ_vol_h_prefactor(nspins))
    9170          36 :       penalty_occ_vol_g_prefactor(:) = 0.0_dp
    9171          36 :       penalty_occ_vol_h_prefactor(:) = 0.0_dp
    9172             : 
    9173             :       ! here preconditioner is the Hessian of model function
    9174          18 :       prec_type = optimizer%preconditioner
    9175             : 
    9176          54 :       ALLOCATE (grad_norm_spin(nspins))
    9177          54 :       ALLOCATE (nocc(nspins))
    9178             : 
    9179             :       ! m_theta contains a set of variational parameters
    9180             :       ! that define one-electron orbitals (simple, projected, etc.)
    9181          72 :       ALLOCATE (m_theta(nspins))
    9182          36 :       DO ispin = 1, nspins
    9183             :          CALL dbcsr_create(m_theta(ispin), &
    9184             :                            template=matrix_t_out(ispin), &
    9185          36 :                            matrix_type=dbcsr_type_no_symmetry)
    9186             :       END DO
    9187             : 
    9188             :       ! create initial guess from the initial orbitals
    9189             :       CALL xalmo_initial_guess(m_guess=m_theta, &
    9190             :                                m_t_in=matrix_t_in, &
    9191             :                                m_t0=almo_scf_env%matrix_t_blk, &
    9192             :                                m_quench_t=quench_t, &
    9193             :                                m_overlap=almo_scf_env%matrix_s(1), &
    9194             :                                m_sigma_tmpl=almo_scf_env%matrix_sigma_inv, &
    9195             :                                nspins=nspins, &
    9196             :                                xalmo_history=almo_scf_env%xalmo_history, &
    9197             :                                assume_t0_q0x=assume_t0_q0x, &
    9198             :                                optimize_theta=optimize_theta, &
    9199             :                                envelope_amplitude=almo_scf_env%envelope_amplitude, &
    9200             :                                eps_filter=almo_scf_env%eps_filter, &
    9201             :                                order_lanczos=almo_scf_env%order_lanczos, &
    9202             :                                eps_lanczos=almo_scf_env%eps_lanczos, &
    9203             :                                max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    9204          18 :                                nocc_of_domain=almo_scf_env%nocc_of_domain)
    9205             : 
    9206          18 :       ndomains = almo_scf_env%ndomains
    9207         218 :       ALLOCATE (domain_r_down(ndomains, nspins))
    9208          18 :       CALL init_submatrices(domain_r_down)
    9209         218 :       ALLOCATE (domain_model_hessian_inv(ndomains, nspins))
    9210          18 :       CALL init_submatrices(domain_model_hessian_inv)
    9211             : 
    9212          72 :       ALLOCATE (m_model_hessian(nspins))
    9213          72 :       ALLOCATE (m_model_hessian_inv(nspins))
    9214          72 :       ALLOCATE (siginvTFTsiginv(nspins))
    9215          72 :       ALLOCATE (STsiginv_0(nspins))
    9216          72 :       ALLOCATE (FTsiginv(nspins))
    9217          72 :       ALLOCATE (ST(nspins))
    9218          72 :       ALLOCATE (grad(nspins))
    9219          72 :       ALLOCATE (prev_step(nspins))
    9220          72 :       ALLOCATE (step(nspins))
    9221          72 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    9222          72 :       ALLOCATE (m_model_r(nspins))
    9223          72 :       ALLOCATE (m_model_rt(nspins))
    9224          72 :       ALLOCATE (m_model_d(nspins))
    9225          72 :       ALLOCATE (m_model_Bd(nspins))
    9226          72 :       ALLOCATE (m_model_r_prev(nspins))
    9227          72 :       ALLOCATE (m_model_rt_prev(nspins))
    9228          72 :       ALLOCATE (m_theta_trial(nspins))
    9229             : 
    9230          36 :       DO ispin = 1, nspins
    9231             : 
    9232             :          ! init temporary storage
    9233             :          CALL dbcsr_create(m_model_hessian_inv(ispin), &
    9234             :                            template=almo_scf_env%matrix_ks(ispin), &
    9235          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9236             :          CALL dbcsr_create(m_model_hessian(ispin), &
    9237             :                            template=almo_scf_env%matrix_ks(ispin), &
    9238          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9239             :          CALL dbcsr_create(siginvTFTsiginv(ispin), &
    9240             :                            template=almo_scf_env%matrix_sigma(ispin), &
    9241          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9242             :          CALL dbcsr_create(STsiginv_0(ispin), &
    9243             :                            template=matrix_t_out(ispin), &
    9244          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9245             :          CALL dbcsr_create(FTsiginv(ispin), &
    9246             :                            template=matrix_t_out(ispin), &
    9247          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9248             :          CALL dbcsr_create(ST(ispin), &
    9249             :                            template=matrix_t_out(ispin), &
    9250          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9251             :          CALL dbcsr_create(grad(ispin), &
    9252             :                            template=matrix_t_out(ispin), &
    9253          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9254             :          CALL dbcsr_create(prev_step(ispin), &
    9255             :                            template=matrix_t_out(ispin), &
    9256          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9257             :          CALL dbcsr_create(step(ispin), &
    9258             :                            template=matrix_t_out(ispin), &
    9259          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9260             :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    9261             :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    9262          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9263             :          CALL dbcsr_create(m_model_r(ispin), &
    9264             :                            template=matrix_t_out(ispin), &
    9265          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9266             :          CALL dbcsr_create(m_model_rt(ispin), &
    9267             :                            template=matrix_t_out(ispin), &
    9268          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9269             :          CALL dbcsr_create(m_model_d(ispin), &
    9270             :                            template=matrix_t_out(ispin), &
    9271          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9272             :          CALL dbcsr_create(m_model_Bd(ispin), &
    9273             :                            template=matrix_t_out(ispin), &
    9274          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9275             :          CALL dbcsr_create(m_model_r_prev(ispin), &
    9276             :                            template=matrix_t_out(ispin), &
    9277          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9278             :          CALL dbcsr_create(m_model_rt_prev(ispin), &
    9279             :                            template=matrix_t_out(ispin), &
    9280          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9281             :          CALL dbcsr_create(m_theta_trial(ispin), &
    9282             :                            template=matrix_t_out(ispin), &
    9283          18 :                            matrix_type=dbcsr_type_no_symmetry)
    9284             : 
    9285          18 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    9286          18 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    9287             : 
    9288             :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
    9289          18 :                              nfullrows_total=nocc(ispin))
    9290             : 
    9291             :          ! invert S domains if necessary
    9292             :          ! Note: domains for alpha and beta electrons might be different
    9293             :          ! that is why the inversion of the AO overlap is inside the spin loop
    9294          36 :          IF (my_special_case .EQ. xalmo_case_normal) THEN
    9295             : 
    9296             :             CALL construct_domain_s_inv( &
    9297             :                matrix_s=almo_scf_env%matrix_s(1), &
    9298             :                subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9299             :                dpattern=quench_t(ispin), &
    9300             :                map=almo_scf_env%domain_map(ispin), &
    9301          16 :                node_of_domain=almo_scf_env%cpu_of_domain)
    9302             : 
    9303             :          END IF
    9304             : 
    9305             :       END DO ! ispin
    9306             : 
    9307             :       ! invert metric for special case where metric is spin independent
    9308          18 :       IF (my_special_case .EQ. xalmo_case_block_diag) THEN
    9309             : 
    9310             :          CALL dbcsr_create(m_s_inv, &
    9311             :                            template=almo_scf_env%matrix_s(1), &
    9312           2 :                            matrix_type=dbcsr_type_no_symmetry)
    9313             :          CALL invert_Hotelling(m_s_inv, &
    9314             :                                almo_scf_env%matrix_s_blk(1), &
    9315             :                                threshold=almo_scf_env%eps_filter, &
    9316           2 :                                filter_eps=almo_scf_env%eps_filter)
    9317             : 
    9318          16 :       ELSE IF (my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9319             : 
    9320             :          ! invert S using cholesky
    9321             :          CALL dbcsr_create(m_s_inv, &
    9322             :                            template=almo_scf_env%matrix_s(1), &
    9323           0 :                            matrix_type=dbcsr_type_no_symmetry)
    9324           0 :          CALL dbcsr_desymmetrize(almo_scf_env%matrix_s(1), m_s_inv)
    9325             :          CALL cp_dbcsr_cholesky_decompose(m_s_inv, &
    9326             :                                           para_env=almo_scf_env%para_env, &
    9327           0 :                                           blacs_env=almo_scf_env%blacs_env)
    9328             :          CALL cp_dbcsr_cholesky_invert(m_s_inv, &
    9329             :                                        para_env=almo_scf_env%para_env, &
    9330             :                                        blacs_env=almo_scf_env%blacs_env, &
    9331           0 :                                        upper_to_full=.TRUE.)
    9332           0 :          CALL dbcsr_filter(m_s_inv, almo_scf_env%eps_filter)
    9333             : 
    9334             :       END IF ! s_inv
    9335             : 
    9336          18 :       radius_max = optimizer%max_trust_radius
    9337          18 :       radius_current = MIN(optimizer%initial_trust_radius, radius_max)
    9338             :       ! eta must be between 0 and 0.25
    9339          18 :       eta = MIN(MAX(optimizer%rho_do_not_update, 0.0_dp), 0.25_dp)
    9340             :       energy_start = 0.0_dp
    9341          18 :       energy_trial = 0.0_dp
    9342             :       penalty_start = 0.0_dp
    9343          18 :       penalty_trial = 0.0_dp
    9344             :       loss_start = 0.0_dp ! sum of the energy and penalty
    9345          18 :       loss_trial = 0.0_dp
    9346             : 
    9347          18 :       same_position = .FALSE.
    9348             : 
    9349             :       ! compute the energy
    9350             :       CALL main_var_to_xalmos_and_loss_func( &
    9351             :          almo_scf_env=almo_scf_env, &
    9352             :          qs_env=qs_env, &
    9353             :          m_main_var_in=m_theta, &
    9354             :          m_t_out=matrix_t_out, &
    9355             :          m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
    9356             :          energy_out=energy_start, &
    9357             :          penalty_out=penalty_start, &
    9358             :          m_FTsiginv_out=FTsiginv, &
    9359             :          m_siginvTFTsiginv_out=siginvTFTsiginv, &
    9360             :          m_ST_out=ST, &
    9361             :          m_STsiginv0_in=STsiginv_0, &
    9362             :          m_quench_t_in=quench_t, &
    9363             :          domain_r_down_in=domain_r_down, &
    9364             :          assume_t0_q0x=assume_t0_q0x, &
    9365             :          just_started=.TRUE., &
    9366             :          optimize_theta=optimize_theta, &
    9367             :          normalize_orbitals=normalize_orbitals, &
    9368             :          perturbation_only=perturbation_only, &
    9369             :          do_penalty=penalty_occ_vol, &
    9370          18 :          special_case=my_special_case)
    9371          18 :       loss_start = energy_start + penalty_start
    9372          18 :       IF (my_special_case .EQ. xalmo_case_block_diag) THEN
    9373           2 :          almo_scf_env%almo_scf_energy = energy_start
    9374             :       END IF
    9375          36 :       DO ispin = 1, nspins
    9376          36 :          IF (penalty_occ_vol) THEN
    9377             :             penalty_occ_vol_g_prefactor(ispin) = &
    9378           0 :                -2.0_dp*penalty_amplitude*spin_factor*nocc(ispin)
    9379           0 :             penalty_occ_vol_h_prefactor(ispin) = 0.0_dp
    9380             :          END IF
    9381             :       END DO ! ispin
    9382             : 
    9383             :       ! start the outer step-size-adjustment loop
    9384          18 :       scf_converged = .FALSE.
    9385         426 :       adjust_r_loop: DO outer_iteration = 1, optimizer%max_iter_outer_loop
    9386             : 
    9387             :          ! start the inner fixed-radius loop
    9388         426 :          border_reached = .FALSE.
    9389             : 
    9390         852 :          DO ispin = 1, nspins
    9391         426 :             CALL dbcsr_set(step(ispin), 0.0_dp)
    9392         852 :             CALL dbcsr_filter(step(ispin), almo_scf_env%eps_filter)
    9393             :          END DO
    9394             : 
    9395         426 :          IF (.NOT. same_position) THEN
    9396             : 
    9397         852 :             DO ispin = 1, nspins
    9398             : 
    9399             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Compute model gradient"
    9400             :                CALL compute_gradient( &
    9401             :                   m_grad_out=grad(ispin), &
    9402             :                   m_ks=almo_scf_env%matrix_ks(ispin), &
    9403             :                   m_s=almo_scf_env%matrix_s(1), &
    9404             :                   m_t=matrix_t_out(ispin), &
    9405             :                   m_t0=almo_scf_env%matrix_t_blk(ispin), &
    9406             :                   m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9407             :                   m_quench_t=quench_t(ispin), &
    9408             :                   m_FTsiginv=FTsiginv(ispin), &
    9409             :                   m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9410             :                   m_ST=ST(ispin), &
    9411             :                   m_STsiginv0=STsiginv_0(ispin), &
    9412             :                   m_theta=m_theta(ispin), &
    9413             :                   m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    9414             :                   domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9415             :                   domain_r_down=domain_r_down(:, ispin), &
    9416             :                   cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9417             :                   domain_map=almo_scf_env%domain_map(ispin), &
    9418             :                   assume_t0_q0x=assume_t0_q0x, &
    9419             :                   optimize_theta=optimize_theta, &
    9420             :                   normalize_orbitals=normalize_orbitals, &
    9421             :                   penalty_occ_vol=penalty_occ_vol, &
    9422             :                   penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9423             :                   envelope_amplitude=almo_scf_env%envelope_amplitude, &
    9424             :                   eps_filter=almo_scf_env%eps_filter, &
    9425             :                   spin_factor=spin_factor, &
    9426         852 :                   special_case=my_special_case)
    9427             : 
    9428             :             END DO ! ispin
    9429             : 
    9430             :          END IF ! skip_grad
    9431             : 
    9432             :          ! check convergence and other exit criteria
    9433         852 :          DO ispin = 1, nspins
    9434             :             CALL dbcsr_norm(grad(ispin), dbcsr_norm_maxabsnorm, &
    9435         852 :                             norm_scalar=grad_norm_spin(ispin))
    9436             :             !grad_norm_frob = dbcsr_frobenius_norm(grad(ispin)) / &
    9437             :             !                 dbcsr_frobenius_norm(quench_t(ispin))
    9438             :          END DO ! ispin
    9439        1278 :          grad_norm_ref = MAXVAL(grad_norm_spin)
    9440             : 
    9441         426 :          t2outer = m_walltime()
    9442             :          CALL trust_r_report(unit_nr, &
    9443             :                              iter_type=1, & ! only some data is important
    9444             :                              iteration=outer_iteration, &
    9445             :                              loss=loss_start, &
    9446             :                              delta_loss=0.0_dp, &
    9447             :                              grad_norm=grad_norm_ref, &
    9448             :                              predicted_reduction=0.0_dp, &
    9449             :                              rho=0.0_dp, &
    9450             :                              radius=radius_current, &
    9451             :                              new=.NOT. same_position, &
    9452         426 :                              time=t2outer - t1outer)
    9453         426 :          t1outer = m_walltime()
    9454             : 
    9455         426 :          IF (grad_norm_ref .LE. optimizer%eps_error) THEN
    9456          18 :             scf_converged = .TRUE.
    9457          18 :             border_reached = .FALSE.
    9458          18 :             expected_reduction = 0.0_dp
    9459          18 :             IF (.NOT. (optimizer%early_stopping_on .AND. outer_iteration .EQ. 1)) &
    9460             :                EXIT adjust_r_loop
    9461             :          ELSE
    9462             :             scf_converged = .FALSE.
    9463             :          END IF
    9464             : 
    9465         816 :          DO ispin = 1, nspins
    9466             : 
    9467         408 :             CALL dbcsr_copy(m_model_r(ispin), grad(ispin))
    9468         408 :             CALL dbcsr_scale(m_model_r(ispin), -1.0_dp)
    9469             : 
    9470         408 :             IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    9471             :                 my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9472             : 
    9473             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Multiply Sinv.r"
    9474             :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9475             :                                    m_s_inv, &
    9476             :                                    m_model_r(ispin), &
    9477             :                                    0.0_dp, m_model_rt(ispin), &
    9478          92 :                                    filter_eps=almo_scf_env%eps_filter)
    9479             : 
    9480         316 :             ELSE IF (my_special_case .EQ. xalmo_case_normal) THEN
    9481             : 
    9482             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Multiply Sinv_xx.r"
    9483             :                CALL apply_domain_operators( &
    9484             :                   matrix_in=m_model_r(ispin), &
    9485             :                   matrix_out=m_model_rt(ispin), &
    9486             :                   operator1=almo_scf_env%domain_s_inv(:, ispin), &
    9487             :                   dpattern=quench_t(ispin), &
    9488             :                   map=almo_scf_env%domain_map(ispin), &
    9489             :                   node_of_domain=almo_scf_env%cpu_of_domain, &
    9490             :                   my_action=0, &
    9491         316 :                   filter_eps=almo_scf_env%eps_filter)
    9492             : 
    9493             :             ELSE
    9494           0 :                CPABORT("Unknown XALMO special case")
    9495             :             END IF
    9496             : 
    9497         816 :             CALL dbcsr_copy(m_model_d(ispin), m_model_rt(ispin))
    9498             : 
    9499             :          END DO ! ispin
    9500             : 
    9501             :          ! compute model Hessian
    9502         408 :          IF (.NOT. same_position) THEN
    9503             : 
    9504             :             SELECT CASE (prec_type)
    9505             :             CASE (xalmo_prec_domain)
    9506             : 
    9507             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Compute model Hessian"
    9508         816 :                DO ispin = 1, nspins
    9509             :                   CALL compute_preconditioner( &
    9510             :                      domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    9511             :                      m_prec_out=m_model_hessian(ispin), &
    9512             :                      m_ks=almo_scf_env%matrix_ks(ispin), &
    9513             :                      m_s=almo_scf_env%matrix_s(1), &
    9514             :                      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9515             :                      m_quench_t=quench_t(ispin), &
    9516             :                      m_FTsiginv=FTsiginv(ispin), &
    9517             :                      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9518             :                      m_ST=ST(ispin), &
    9519             :                      para_env=almo_scf_env%para_env, &
    9520             :                      blacs_env=almo_scf_env%blacs_env, &
    9521             :                      nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    9522             :                      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9523             :                      domain_r_down=domain_r_down(:, ispin), &
    9524             :                      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9525             :                      domain_map=almo_scf_env%domain_map(ispin), &
    9526             :                      assume_t0_q0x=.FALSE., &
    9527             :                      penalty_occ_vol=penalty_occ_vol, &
    9528             :                      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9529             :                      eps_filter=almo_scf_env%eps_filter, &
    9530             :                      neg_thr=0.5_dp, &
    9531             :                      spin_factor=spin_factor, &
    9532             :                      skip_inversion=.TRUE., &
    9533         816 :                      special_case=my_special_case)
    9534             :                END DO ! ispin
    9535             : 
    9536             :             CASE DEFAULT
    9537             : 
    9538         408 :                CPABORT("Unknown preconditioner")
    9539             : 
    9540             :             END SELECT ! preconditioner type fork
    9541             : 
    9542             :          END IF  ! not same position
    9543             : 
    9544             :          ! print the header (argument values are ignored)
    9545             :          CALL fixed_r_report(unit_nr, &
    9546             :                              iter_type=0, &
    9547             :                              iteration=0, &
    9548             :                              step_size=0.0_dp, &
    9549             :                              border_reached=.FALSE., &
    9550             :                              curvature=0.0_dp, &
    9551             :                              grad_norm_ratio=0.0_dp, &
    9552         408 :                              time=0.0_dp)
    9553             : 
    9554             :          IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Start inner loop"
    9555             : 
    9556         408 :          t1 = m_walltime()
    9557         408 :          inner_loop_success = .FALSE.
    9558             :          ! trustr_steihaug, trustr_cauchy, trustr_dogleg
    9559         490 :          fixed_r_loop: DO iteration = 1, optimizer%max_iter
    9560             : 
    9561             :             ! Step 2. Get curvature. If negative, step to the border
    9562         490 :             y_scalar = 0.0_dp
    9563         980 :             DO ispin = 1, nspins
    9564             : 
    9565             :                ! Get B.d
    9566         490 :                IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    9567             :                    my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9568             : 
    9569             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9570             :                                       m_model_hessian(ispin), &
    9571             :                                       m_model_d(ispin), &
    9572             :                                       0.0_dp, m_model_Bd(ispin), &
    9573          92 :                                       filter_eps=almo_scf_env%eps_filter)
    9574             : 
    9575             :                ELSE
    9576             : 
    9577             :                   CALL apply_domain_operators( &
    9578             :                      matrix_in=m_model_d(ispin), &
    9579             :                      matrix_out=m_model_Bd(ispin), &
    9580             :                      operator1=almo_scf_env%domain_preconditioner(:, ispin), &
    9581             :                      dpattern=quench_t(ispin), &
    9582             :                      map=almo_scf_env%domain_map(ispin), &
    9583             :                      node_of_domain=almo_scf_env%cpu_of_domain, &
    9584             :                      my_action=0, &
    9585         398 :                      filter_eps=almo_scf_env%eps_filter)
    9586             : 
    9587             :                END IF ! special case
    9588             : 
    9589             :                ! Get y=d^T.B.d
    9590         490 :                CALL dbcsr_dot(m_model_d(ispin), m_model_Bd(ispin), real_temp)
    9591         980 :                y_scalar = y_scalar + real_temp
    9592             : 
    9593             :             END DO ! ispin
    9594             :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Curvature: ", y_scalar
    9595             : 
    9596             :             ! step to the border
    9597         490 :             IF (y_scalar .LT. 0.0_dp) THEN
    9598             : 
    9599             :                CALL step_size_to_border( &
    9600             :                   step_size_out=step_size, &
    9601             :                   metric_in=almo_scf_env%matrix_s, &
    9602             :                   position_in=step, &
    9603             :                   direction_in=m_model_d, &
    9604             :                   trust_radius_in=radius_current, &
    9605             :                   quench_t_in=quench_t, &
    9606             :                   eps_filter_in=almo_scf_env%eps_filter &
    9607           0 :                   )
    9608             : 
    9609           0 :                DO ispin = 1, nspins
    9610           0 :                   CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9611             :                END DO
    9612             : 
    9613           0 :                border_reached = .TRUE.
    9614           0 :                inner_loop_success = .TRUE.
    9615             : 
    9616             :                CALL predicted_reduction( &
    9617             :                   reduction_out=expected_reduction, &
    9618             :                   grad_in=grad, &
    9619             :                   step_in=step, &
    9620             :                   hess_in=m_model_hessian, &
    9621             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9622             :                   quench_t_in=quench_t, &
    9623             :                   special_case=my_special_case, &
    9624             :                   eps_filter=almo_scf_env%eps_filter, &
    9625             :                   domain_map=almo_scf_env%domain_map, &
    9626             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9627           0 :                   )
    9628             : 
    9629           0 :                t2 = m_walltime()
    9630             :                CALL fixed_r_report(unit_nr, &
    9631             :                                    iter_type=2, &
    9632             :                                    iteration=iteration, &
    9633             :                                    step_size=step_size, &
    9634             :                                    border_reached=border_reached, &
    9635             :                                    curvature=y_scalar, &
    9636             :                                    grad_norm_ratio=expected_reduction, &
    9637           0 :                                    time=t2 - t1)
    9638             : 
    9639             :                EXIT fixed_r_loop ! the inner loop
    9640             : 
    9641             :             END IF ! y is negative
    9642             : 
    9643             :             ! Step 3. Compute the step size along the direction
    9644         490 :             step_size = 0.0_dp
    9645         980 :             DO ispin = 1, nspins
    9646         490 :                CALL dbcsr_dot(m_model_r(ispin), m_model_rt(ispin), real_temp)
    9647         980 :                step_size = step_size + real_temp
    9648             :             END DO ! ispin
    9649         490 :             step_size = step_size/y_scalar
    9650             :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Proposed step size: ", step_size
    9651             : 
    9652             :             ! Update the step matrix
    9653         980 :             DO ispin = 1, nspins
    9654         490 :                CALL dbcsr_copy(prev_step(ispin), step(ispin))
    9655         980 :                CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9656             :             END DO
    9657             : 
    9658             :             ! Compute step norm
    9659             :             CALL contravariant_matrix_norm( &
    9660             :                norm_out=step_norm, &
    9661             :                matrix_in=step, &
    9662             :                metric_in=almo_scf_env%matrix_s, &
    9663             :                quench_t_in=quench_t, &
    9664             :                eps_filter_in=almo_scf_env%eps_filter &
    9665         490 :                )
    9666             :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9667             : 
    9668             :             ! Do not step beyond the trust radius
    9669         490 :             IF (step_norm .GT. radius_current) THEN
    9670             : 
    9671             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Norm is too large"
    9672             :                CALL step_size_to_border( &
    9673             :                   step_size_out=step_size, &
    9674             :                   metric_in=almo_scf_env%matrix_s, &
    9675             :                   position_in=prev_step, &
    9676             :                   direction_in=m_model_d, &
    9677             :                   trust_radius_in=radius_current, &
    9678             :                   quench_t_in=quench_t, &
    9679             :                   eps_filter_in=almo_scf_env%eps_filter &
    9680          34 :                   )
    9681             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step size to border: ", step_size
    9682             : 
    9683          68 :                DO ispin = 1, nspins
    9684          34 :                   CALL dbcsr_copy(step(ispin), prev_step(ispin))
    9685          68 :                   CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9686             :                END DO
    9687             : 
    9688             :                IF (debug_mode) THEN
    9689             :                   ! Compute step norm
    9690             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Extra norm evaluation"
    9691             :                   CALL contravariant_matrix_norm( &
    9692             :                      norm_out=step_norm, &
    9693             :                      matrix_in=step, &
    9694             :                      metric_in=almo_scf_env%matrix_s, &
    9695             :                      quench_t_in=quench_t, &
    9696             :                      eps_filter_in=almo_scf_env%eps_filter &
    9697             :                      )
    9698             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9699             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Current radius: ", radius_current
    9700             :                END IF
    9701             : 
    9702          34 :                border_reached = .TRUE.
    9703          34 :                inner_loop_success = .TRUE.
    9704             : 
    9705             :                CALL predicted_reduction( &
    9706             :                   reduction_out=expected_reduction, &
    9707             :                   grad_in=grad, &
    9708             :                   step_in=step, &
    9709             :                   hess_in=m_model_hessian, &
    9710             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9711             :                   quench_t_in=quench_t, &
    9712             :                   special_case=my_special_case, &
    9713             :                   eps_filter=almo_scf_env%eps_filter, &
    9714             :                   domain_map=almo_scf_env%domain_map, &
    9715             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9716          34 :                   )
    9717             : 
    9718          34 :                t2 = m_walltime()
    9719             :                CALL fixed_r_report(unit_nr, &
    9720             :                                    iter_type=3, &
    9721             :                                    iteration=iteration, &
    9722             :                                    step_size=step_size, &
    9723             :                                    border_reached=border_reached, &
    9724             :                                    curvature=y_scalar, &
    9725             :                                    grad_norm_ratio=expected_reduction, &
    9726          34 :                                    time=t2 - t1)
    9727             : 
    9728             :                EXIT fixed_r_loop ! the inner loop
    9729             : 
    9730             :             END IF
    9731             : 
    9732         456 :             IF (optimizer%trustr_algorithm .EQ. trustr_cauchy) THEN
    9733             :                ! trustr_steihaug, trustr_cauchy, trustr_dogleg
    9734             : 
    9735          80 :                border_reached = .FALSE.
    9736          80 :                inner_loop_success = .TRUE.
    9737             : 
    9738             :                CALL predicted_reduction( &
    9739             :                   reduction_out=expected_reduction, &
    9740             :                   grad_in=grad, &
    9741             :                   step_in=step, &
    9742             :                   hess_in=m_model_hessian, &
    9743             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9744             :                   quench_t_in=quench_t, &
    9745             :                   special_case=my_special_case, &
    9746             :                   eps_filter=almo_scf_env%eps_filter, &
    9747             :                   domain_map=almo_scf_env%domain_map, &
    9748             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9749          80 :                   )
    9750             : 
    9751          80 :                t2 = m_walltime()
    9752             :                CALL fixed_r_report(unit_nr, &
    9753             :                                    iter_type=5, & ! Cauchy point
    9754             :                                    iteration=iteration, &
    9755             :                                    step_size=step_size, &
    9756             :                                    border_reached=border_reached, &
    9757             :                                    curvature=y_scalar, &
    9758             :                                    grad_norm_ratio=expected_reduction, &
    9759          80 :                                    time=t2 - t1)
    9760             : 
    9761             :                EXIT fixed_r_loop ! the inner loop
    9762             : 
    9763         376 :             ELSE IF (optimizer%trustr_algorithm .EQ. trustr_dogleg) THEN
    9764             : 
    9765             :                ! invert or pseudo-invert B
    9766         268 :                SELECT CASE (prec_type)
    9767             :                CASE (xalmo_prec_domain)
    9768             : 
    9769             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Pseudo-invert model Hessian"
    9770         268 :                   IF (special_case .EQ. xalmo_case_block_diag) THEN ! non-overlapping diagonal blocks
    9771             : 
    9772         156 :                      DO ispin = 1, nspins
    9773             :                         CALL pseudo_invert_diagonal_blk( &
    9774             :                            matrix_in=m_model_hessian(ispin), &
    9775             :                            matrix_out=m_model_hessian_inv(ispin), &
    9776             :                            nocc=almo_scf_env%nocc_of_domain(:, ispin) &
    9777         156 :                            )
    9778             :                      END DO
    9779             : 
    9780         190 :                   ELSE IF (special_case .EQ. xalmo_case_fully_deloc) THEN ! the entire system is a block
    9781             : 
    9782             :                      ! invert using cholesky decomposition
    9783           0 :                      DO ispin = 1, nspins
    9784             :                         CALL dbcsr_copy(m_model_hessian_inv(ispin), &
    9785           0 :                                         m_model_hessian(ispin))
    9786             :                         CALL cp_dbcsr_cholesky_decompose(m_model_hessian_inv(ispin), &
    9787             :                                                          para_env=almo_scf_env%para_env, &
    9788           0 :                                                          blacs_env=almo_scf_env%blacs_env)
    9789             :                         CALL cp_dbcsr_cholesky_invert(m_model_hessian_inv(ispin), &
    9790             :                                                       para_env=almo_scf_env%para_env, &
    9791             :                                                       blacs_env=almo_scf_env%blacs_env, &
    9792           0 :                                                       upper_to_full=.TRUE.)
    9793             :                         CALL dbcsr_filter(m_model_hessian_inv(ispin), &
    9794           0 :                                           almo_scf_env%eps_filter)
    9795             :                      END DO
    9796             : 
    9797             :                   ELSE
    9798             : 
    9799         380 :                      DO ispin = 1, nspins
    9800             :                         CALL construct_domain_preconditioner( &
    9801             :                            matrix_main=m_model_hessian(ispin), &
    9802             :                            subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9803             :                            subm_r_down=domain_r_down(:, ispin), &
    9804             :                            matrix_trimmer=quench_t(ispin), &
    9805             :                            dpattern=quench_t(ispin), &
    9806             :                            map=almo_scf_env%domain_map(ispin), &
    9807             :                            node_of_domain=almo_scf_env%cpu_of_domain, &
    9808             :                            preconditioner=domain_model_hessian_inv(:, ispin), &
    9809             :                            use_trimmer=.FALSE., &
    9810             :                            my_action=0, & ! do not do domain (1-r0) projection
    9811             :                            skip_inversion=.FALSE. &
    9812         380 :                            )
    9813             :                      END DO
    9814             : 
    9815             :                   END IF ! special_case
    9816             : 
    9817             :                   ! slower but more reliable way to get inverted hessian
    9818             :                   !DO ispin = 1, nspins
    9819             :                   !   CALL compute_preconditioner( &
    9820             :                   !      domain_prec_out=domain_model_hessian_inv(:, ispin), &
    9821             :                   !      m_prec_out=m_model_hessian_inv(ispin), & ! RZK-warning: this one is not inverted if DOMAINs
    9822             :                   !      m_ks=almo_scf_env%matrix_ks(ispin), &
    9823             :                   !      m_s=almo_scf_env%matrix_s(1), &
    9824             :                   !      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9825             :                   !      m_quench_t=quench_t(ispin), &
    9826             :                   !      m_FTsiginv=FTsiginv(ispin), &
    9827             :                   !      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9828             :                   !      m_ST=ST(ispin), &
    9829             :                   !      para_env=almo_scf_env%para_env, &
    9830             :                   !      blacs_env=almo_scf_env%blacs_env, &
    9831             :                   !      nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    9832             :                   !      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9833             :                   !      domain_r_down=domain_r_down(:, ispin), &
    9834             :                   !      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9835             :                   !      domain_map=almo_scf_env%domain_map(ispin), &
    9836             :                   !      assume_t0_q0x=.FALSE., &
    9837             :                   !      penalty_occ_vol=penalty_occ_vol, &
    9838             :                   !      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9839             :                   !      eps_filter=almo_scf_env%eps_filter, &
    9840             :                   !      neg_thr=1.0E10_dp, &
    9841             :                   !      spin_factor=spin_factor, &
    9842             :                   !      skip_inversion=.FALSE., &
    9843             :                   !      special_case=my_special_case)
    9844             :                   !ENDDO ! ispin
    9845             : 
    9846             :                CASE DEFAULT
    9847             : 
    9848         268 :                   CPABORT("Unknown preconditioner")
    9849             : 
    9850             :                END SELECT ! preconditioner type fork
    9851             : 
    9852             :                ! get pB = Binv.m_model_r = -Binv.grad
    9853         536 :                DO ispin = 1, nspins
    9854             : 
    9855             :                   ! Get B.d
    9856         268 :                   IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
    9857         268 :                       my_special_case .EQ. xalmo_case_fully_deloc) THEN
    9858             : 
    9859             :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9860             :                                          m_model_hessian_inv(ispin), &
    9861             :                                          m_model_r(ispin), &
    9862             :                                          0.0_dp, m_model_Bd(ispin), &
    9863          78 :                                          filter_eps=almo_scf_env%eps_filter)
    9864             : 
    9865             :                   ELSE
    9866             : 
    9867             :                      CALL apply_domain_operators( &
    9868             :                         matrix_in=m_model_r(ispin), &
    9869             :                         matrix_out=m_model_Bd(ispin), &
    9870             :                         operator1=domain_model_hessian_inv(:, ispin), &
    9871             :                         dpattern=quench_t(ispin), &
    9872             :                         map=almo_scf_env%domain_map(ispin), &
    9873             :                         node_of_domain=almo_scf_env%cpu_of_domain, &
    9874             :                         my_action=0, &
    9875         190 :                         filter_eps=almo_scf_env%eps_filter)
    9876             : 
    9877             :                   END IF ! special case
    9878             : 
    9879             :                END DO ! ispin
    9880             : 
    9881             :                ! Compute norm of pB
    9882             :                CALL contravariant_matrix_norm( &
    9883             :                   norm_out=step_norm, &
    9884             :                   matrix_in=m_model_Bd, &
    9885             :                   metric_in=almo_scf_env%matrix_s, &
    9886             :                   quench_t_in=quench_t, &
    9887             :                   eps_filter_in=almo_scf_env%eps_filter &
    9888         268 :                   )
    9889             :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...pB norm: ", step_norm
    9890             : 
    9891             :                ! Do not step beyond the trust radius
    9892         268 :                IF (step_norm .LE. radius_current) THEN
    9893             : 
    9894             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Full dogleg"
    9895             : 
    9896         266 :                   border_reached = .FALSE.
    9897             : 
    9898         532 :                   DO ispin = 1, nspins
    9899         532 :                      CALL dbcsr_copy(step(ispin), m_model_Bd(ispin))
    9900             :                   END DO
    9901             : 
    9902         266 :                   fake_step_size_to_report = 2.0_dp
    9903         266 :                   iteration_type_to_report = 6
    9904             : 
    9905             :                ELSE ! take a shorter dogleg step
    9906             : 
    9907             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...pB norm is too large"
    9908             : 
    9909           2 :                   border_reached = .TRUE.
    9910             : 
    9911             :                   ! compute the dogleg vector = pB - pU
    9912             :                   ! this destroys -Binv.grad content
    9913           4 :                   DO ispin = 1, nspins
    9914           4 :                      CALL dbcsr_add(m_model_Bd(ispin), step(ispin), 1.0_dp, -1.0_dp)
    9915             :                   END DO
    9916             : 
    9917             :                   CALL step_size_to_border( &
    9918             :                      step_size_out=step_size, &
    9919             :                      metric_in=almo_scf_env%matrix_s, &
    9920             :                      position_in=step, &
    9921             :                      direction_in=m_model_Bd, &
    9922             :                      trust_radius_in=radius_current, &
    9923             :                      quench_t_in=quench_t, &
    9924             :                      eps_filter_in=almo_scf_env%eps_filter &
    9925           2 :                      )
    9926             :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step size to border: ", step_size
    9927           2 :                   IF (step_size .GT. 1.0_dp .OR. step_size .LT. 0.0_dp) THEN
    9928           0 :                      IF (unit_nr > 0) &
    9929           0 :                         WRITE (unit_nr, *) "Step size (", step_size, ") must lie inside (0,1)"
    9930           0 :                      CPABORT("Wrong dog leg step. We should never end up here.")
    9931             :                   END IF
    9932             : 
    9933           4 :                   DO ispin = 1, nspins
    9934           4 :                      CALL dbcsr_add(step(ispin), m_model_Bd(ispin), 1.0_dp, step_size)
    9935             :                   END DO
    9936             : 
    9937           2 :                   fake_step_size_to_report = 1.0_dp + step_size
    9938           2 :                   iteration_type_to_report = 7
    9939             : 
    9940             :                END IF ! full or partial dogleg?
    9941             : 
    9942             :                IF (debug_mode) THEN
    9943             :                   ! Compute step norm
    9944             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Extra norm evaluation"
    9945             :                   CALL contravariant_matrix_norm( &
    9946             :                      norm_out=step_norm, &
    9947             :                      matrix_in=step, &
    9948             :                      metric_in=almo_scf_env%matrix_s, &
    9949             :                      quench_t_in=quench_t, &
    9950             :                      eps_filter_in=almo_scf_env%eps_filter &
    9951             :                      )
    9952             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9953             :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Current radius: ", radius_current
    9954             :                END IF
    9955             : 
    9956             :                CALL predicted_reduction( &
    9957             :                   reduction_out=expected_reduction, &
    9958             :                   grad_in=grad, &
    9959             :                   step_in=step, &
    9960             :                   hess_in=m_model_hessian, &
    9961             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9962             :                   quench_t_in=quench_t, &
    9963             :                   special_case=my_special_case, &
    9964             :                   eps_filter=almo_scf_env%eps_filter, &
    9965             :                   domain_map=almo_scf_env%domain_map, &
    9966             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9967         268 :                   )
    9968             : 
    9969         268 :                inner_loop_success = .TRUE.
    9970             : 
    9971         268 :                t2 = m_walltime()
    9972             :                CALL fixed_r_report(unit_nr, &
    9973             :                                    iter_type=iteration_type_to_report, &
    9974             :                                    iteration=iteration, &
    9975             :                                    step_size=fake_step_size_to_report, &
    9976             :                                    border_reached=border_reached, &
    9977             :                                    curvature=y_scalar, &
    9978             :                                    grad_norm_ratio=expected_reduction, &
    9979         268 :                                    time=t2 - t1)
    9980             : 
    9981             :                EXIT fixed_r_loop ! the inner loop
    9982             : 
    9983             :             END IF ! Non-iterative subproblem methods exit here
    9984             : 
    9985             :             ! Step 4: update model gradient
    9986         216 :             DO ispin = 1, nspins
    9987             :                ! save previous data
    9988         108 :                CALL dbcsr_copy(m_model_r_prev(ispin), m_model_r(ispin))
    9989             :                CALL dbcsr_add(m_model_r(ispin), m_model_Bd(ispin), &
    9990         216 :                               1.0_dp, -step_size)
    9991             :             END DO ! ispin
    9992             : 
    9993             :             ! Model grad norm
    9994         216 :             DO ispin = 1, nspins
    9995             :                CALL dbcsr_norm(m_model_r(ispin), dbcsr_norm_maxabsnorm, &
    9996         216 :                                norm_scalar=grad_norm_spin(ispin))
    9997             :                !grad_norm_frob = dbcsr_frobenius_norm(grad(ispin)) / &
    9998             :                !                 dbcsr_frobenius_norm(quench_t(ispin))
    9999             :             END DO ! ispin
   10000         324 :             model_grad_norm = MAXVAL(grad_norm_spin)
   10001             : 
   10002             :             ! Check norm reduction
   10003         108 :             grad_norm_ratio = model_grad_norm/grad_norm_ref
   10004         108 :             IF (grad_norm_ratio .LT. optimizer%model_grad_norm_ratio) THEN
   10005             : 
   10006          26 :                border_reached = .FALSE.
   10007          26 :                inner_loop_success = .TRUE.
   10008             : 
   10009             :                CALL predicted_reduction( &
   10010             :                   reduction_out=expected_reduction, &
   10011             :                   grad_in=grad, &
   10012             :                   step_in=step, &
   10013             :                   hess_in=m_model_hessian, &
   10014             :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
   10015             :                   quench_t_in=quench_t, &
   10016             :                   special_case=my_special_case, &
   10017             :                   eps_filter=almo_scf_env%eps_filter, &
   10018             :                   domain_map=almo_scf_env%domain_map, &
   10019             :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
   10020          26 :                   )
   10021             : 
   10022          26 :                t2 = m_walltime()
   10023             :                CALL fixed_r_report(unit_nr, &
   10024             :                                    iter_type=4, &
   10025             :                                    iteration=iteration, &
   10026             :                                    step_size=step_size, &
   10027             :                                    border_reached=border_reached, &
   10028             :                                    curvature=y_scalar, &
   10029             :                                    grad_norm_ratio=expected_reduction, &
   10030          26 :                                    time=t2 - t1)
   10031             : 
   10032             :                EXIT fixed_r_loop ! the inner loop
   10033             : 
   10034             :             END IF
   10035             : 
   10036             :             ! Step 5: update model direction
   10037         164 :             DO ispin = 1, nspins
   10038             :                ! save previous data
   10039         164 :                CALL dbcsr_copy(m_model_rt_prev(ispin), m_model_rt(ispin))
   10040             :             END DO ! ispin
   10041             : 
   10042         164 :             DO ispin = 1, nspins
   10043             : 
   10044          82 :                IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
   10045          82 :                    my_special_case .EQ. xalmo_case_fully_deloc) THEN
   10046             : 
   10047             :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10048             :                                       m_s_inv, &
   10049             :                                       m_model_r(ispin), &
   10050             :                                       0.0_dp, m_model_rt(ispin), &
   10051           0 :                                       filter_eps=almo_scf_env%eps_filter)
   10052             : 
   10053          82 :                ELSE IF (my_special_case .EQ. xalmo_case_normal) THEN
   10054             : 
   10055             :                   CALL apply_domain_operators( &
   10056             :                      matrix_in=m_model_r(ispin), &
   10057             :                      matrix_out=m_model_rt(ispin), &
   10058             :                      operator1=almo_scf_env%domain_s_inv(:, ispin), &
   10059             :                      dpattern=quench_t(ispin), &
   10060             :                      map=almo_scf_env%domain_map(ispin), &
   10061             :                      node_of_domain=almo_scf_env%cpu_of_domain, &
   10062             :                      my_action=0, &
   10063          82 :                      filter_eps=almo_scf_env%eps_filter)
   10064             : 
   10065             :                END IF
   10066             : 
   10067             :             END DO ! ispin
   10068             : 
   10069             :             CALL compute_cg_beta( &
   10070             :                beta=beta, &
   10071             :                reset_conjugator=reset_conjugator, &
   10072             :                conjugator=optimizer%conjugator, &
   10073             :                grad=m_model_r(:), &
   10074             :                prev_grad=m_model_r_prev(:), &
   10075             :                step=m_model_rt(:), &
   10076             :                prev_step=m_model_rt_prev(:) &
   10077          82 :                )
   10078             : 
   10079         164 :             DO ispin = 1, nspins
   10080             :                ! update direction
   10081         164 :                CALL dbcsr_add(m_model_d(ispin), m_model_rt(ispin), beta, 1.0_dp)
   10082             :             END DO ! ispin
   10083             : 
   10084          82 :             t2 = m_walltime()
   10085             :             CALL fixed_r_report(unit_nr, &
   10086             :                                 iter_type=1, &
   10087             :                                 iteration=iteration, &
   10088             :                                 step_size=step_size, &
   10089             :                                 border_reached=border_reached, &
   10090             :                                 curvature=y_scalar, &
   10091             :                                 grad_norm_ratio=grad_norm_ratio, &
   10092          82 :                                 time=t2 - t1)
   10093          82 :             t1 = m_walltime()
   10094             : 
   10095             :          END DO fixed_r_loop
   10096             :          !!!! done with the inner loop
   10097             :          ! the inner loop must return: step, predicted reduction,
   10098             :          ! whether it reached the border and completed successfully
   10099             : 
   10100             :          IF (.NOT. inner_loop_success) THEN
   10101           0 :             CPABORT("Inner loop did not produce solution")
   10102             :          END IF
   10103             : 
   10104         816 :          DO ispin = 1, nspins
   10105             : 
   10106         408 :             CALL dbcsr_copy(m_theta_trial(ispin), m_theta(ispin))
   10107         816 :             CALL dbcsr_add(m_theta_trial(ispin), step(ispin), 1.0_dp, 1.0_dp)
   10108             : 
   10109             :          END DO ! ispin
   10110             : 
   10111             :          ! compute the energy
   10112             :          !IF (.NOT. same_position) THEN
   10113             :          CALL main_var_to_xalmos_and_loss_func( &
   10114             :             almo_scf_env=almo_scf_env, &
   10115             :             qs_env=qs_env, &
   10116             :             m_main_var_in=m_theta_trial, &
   10117             :             m_t_out=matrix_t_out, &
   10118             :             m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
   10119             :             energy_out=energy_trial, &
   10120             :             penalty_out=penalty_trial, &
   10121             :             m_FTsiginv_out=FTsiginv, &
   10122             :             m_siginvTFTsiginv_out=siginvTFTsiginv, &
   10123             :             m_ST_out=ST, &
   10124             :             m_STsiginv0_in=STsiginv_0, &
   10125             :             m_quench_t_in=quench_t, &
   10126             :             domain_r_down_in=domain_r_down, &
   10127             :             assume_t0_q0x=assume_t0_q0x, &
   10128             :             just_started=.FALSE., &
   10129             :             optimize_theta=optimize_theta, &
   10130             :             normalize_orbitals=normalize_orbitals, &
   10131             :             perturbation_only=perturbation_only, &
   10132             :             do_penalty=penalty_occ_vol, &
   10133         408 :             special_case=my_special_case)
   10134         408 :          loss_trial = energy_trial + penalty_trial
   10135             :          !ENDIF ! not same_position
   10136             : 
   10137         408 :          rho = (loss_trial - loss_start)/expected_reduction
   10138         408 :          loss_change_to_report = loss_trial - loss_start
   10139             : 
   10140         408 :          IF (rho < 0.25_dp) THEN
   10141           0 :             radius_current = 0.25_dp*radius_current
   10142             :          ELSE
   10143         408 :             IF (rho > 0.75_dp .AND. border_reached) THEN
   10144           2 :                radius_current = MIN(2.0_dp*radius_current, radius_max)
   10145             :             END IF
   10146             :          END IF ! radius adjustment
   10147             : 
   10148         408 :          IF (rho > eta) THEN
   10149         816 :             DO ispin = 1, nspins
   10150         816 :                CALL dbcsr_copy(m_theta(ispin), m_theta_trial(ispin))
   10151             :             END DO ! ispin
   10152         408 :             loss_start = loss_trial
   10153         408 :             energy_start = energy_trial
   10154         408 :             penalty_start = penalty_trial
   10155         408 :             same_position = .FALSE.
   10156         408 :             IF (my_special_case .EQ. xalmo_case_block_diag) THEN
   10157          92 :                almo_scf_env%almo_scf_energy = energy_trial
   10158             :             END IF
   10159             :          ELSE
   10160           0 :             same_position = .TRUE.
   10161           0 :             IF (my_special_case .EQ. xalmo_case_block_diag) THEN
   10162           0 :                almo_scf_env%almo_scf_energy = energy_start
   10163             :             END IF
   10164             :          END IF ! finalize step
   10165             : 
   10166         408 :          t2outer = m_walltime()
   10167             :          CALL trust_r_report(unit_nr, &
   10168             :                              iter_type=2, &
   10169             :                              iteration=outer_iteration, &
   10170             :                              loss=loss_trial, &
   10171             :                              delta_loss=loss_change_to_report, &
   10172             :                              grad_norm=0.0_dp, &
   10173             :                              predicted_reduction=expected_reduction, &
   10174             :                              rho=rho, &
   10175             :                              radius=radius_current, &
   10176             :                              new=.NOT. same_position, &
   10177         408 :                              time=t2outer - t1outer)
   10178         426 :          t1outer = m_walltime()
   10179             : 
   10180             :       END DO adjust_r_loop
   10181             : 
   10182             :       ! post SCF-loop calculations
   10183          18 :       IF (scf_converged) THEN
   10184             : 
   10185             :          CALL wrap_up_xalmo_scf( &
   10186             :             qs_env=qs_env, &
   10187             :             almo_scf_env=almo_scf_env, &
   10188             :             perturbation_in=perturbation_only, &
   10189             :             m_xalmo_in=matrix_t_out, &
   10190             :             m_quench_in=quench_t, &
   10191          18 :             energy_inout=energy_start)
   10192             : 
   10193             :       END IF ! if converged
   10194             : 
   10195          36 :       DO ispin = 1, nspins
   10196          18 :          CALL dbcsr_release(m_model_hessian_inv(ispin))
   10197          18 :          CALL dbcsr_release(m_model_hessian(ispin))
   10198          18 :          CALL dbcsr_release(STsiginv_0(ispin))
   10199          18 :          CALL dbcsr_release(ST(ispin))
   10200          18 :          CALL dbcsr_release(FTsiginv(ispin))
   10201          18 :          CALL dbcsr_release(siginvTFTsiginv(ispin))
   10202          18 :          CALL dbcsr_release(prev_step(ispin))
   10203          18 :          CALL dbcsr_release(grad(ispin))
   10204          18 :          CALL dbcsr_release(step(ispin))
   10205          18 :          CALL dbcsr_release(m_theta(ispin))
   10206          18 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
   10207          18 :          CALL dbcsr_release(m_model_r(ispin))
   10208          18 :          CALL dbcsr_release(m_model_rt(ispin))
   10209          18 :          CALL dbcsr_release(m_model_d(ispin))
   10210          18 :          CALL dbcsr_release(m_model_Bd(ispin))
   10211          18 :          CALL dbcsr_release(m_model_r_prev(ispin))
   10212          18 :          CALL dbcsr_release(m_model_rt_prev(ispin))
   10213          18 :          CALL dbcsr_release(m_theta_trial(ispin))
   10214          18 :          CALL release_submatrices(domain_r_down(:, ispin))
   10215          36 :          CALL release_submatrices(domain_model_hessian_inv(:, ispin))
   10216             :       END DO ! ispin
   10217             : 
   10218          18 :       IF (my_special_case .EQ. xalmo_case_block_diag .OR. &
   10219             :           my_special_case .EQ. xalmo_case_fully_deloc) THEN
   10220           2 :          CALL dbcsr_release(m_s_inv)
   10221             :       END IF
   10222             : 
   10223          18 :       DEALLOCATE (m_model_hessian)
   10224          18 :       DEALLOCATE (m_model_hessian_inv)
   10225          18 :       DEALLOCATE (siginvTFTsiginv)
   10226          18 :       DEALLOCATE (STsiginv_0)
   10227          18 :       DEALLOCATE (FTsiginv)
   10228          18 :       DEALLOCATE (ST)
   10229          18 :       DEALLOCATE (grad)
   10230          18 :       DEALLOCATE (prev_step)
   10231          18 :       DEALLOCATE (step)
   10232          18 :       DEALLOCATE (m_sig_sqrti_ii)
   10233          18 :       DEALLOCATE (m_model_r)
   10234          18 :       DEALLOCATE (m_model_rt)
   10235          18 :       DEALLOCATE (m_model_d)
   10236          18 :       DEALLOCATE (m_model_Bd)
   10237          18 :       DEALLOCATE (m_model_r_prev)
   10238          18 :       DEALLOCATE (m_model_rt_prev)
   10239          18 :       DEALLOCATE (m_theta_trial)
   10240             : 
   10241         146 :       DEALLOCATE (domain_r_down)
   10242         146 :       DEALLOCATE (domain_model_hessian_inv)
   10243             : 
   10244          18 :       DEALLOCATE (penalty_occ_vol_g_prefactor)
   10245          18 :       DEALLOCATE (penalty_occ_vol_h_prefactor)
   10246          18 :       DEALLOCATE (grad_norm_spin)
   10247          18 :       DEALLOCATE (nocc)
   10248             : 
   10249          18 :       DEALLOCATE (m_theta)
   10250             : 
   10251          18 :       IF (.NOT. scf_converged .AND. .NOT. optimizer%early_stopping_on) THEN
   10252           0 :          CPABORT("Optimization not converged! ")
   10253             :       END IF
   10254             : 
   10255          18 :       CALL timestop(handle)
   10256             : 
   10257          36 :    END SUBROUTINE almo_scf_xalmo_trustr
   10258             : 
   10259             : ! **************************************************************************************************
   10260             : !> \brief Computes molecular orbitals and the objective (loss) function from the main variables
   10261             : !>        Most important input and output variables are given as arguments explicitly.
   10262             : !>        Some variables inside almo_scf_env (KS, DM) and qs_env are also updated but are not
   10263             : !>        listed as arguments for brevity
   10264             : !> \param almo_scf_env ...
   10265             : !> \param qs_env ...
   10266             : !> \param m_main_var_in ...
   10267             : !> \param m_t_out ...
   10268             : !> \param energy_out ...
   10269             : !> \param penalty_out ...
   10270             : !> \param m_sig_sqrti_ii_out ...
   10271             : !> \param m_FTsiginv_out ...
   10272             : !> \param m_siginvTFTsiginv_out ...
   10273             : !> \param m_ST_out ...
   10274             : !> \param m_STsiginv0_in ...
   10275             : !> \param m_quench_t_in ...
   10276             : !> \param domain_r_down_in ...
   10277             : !> \param assume_t0_q0x ...
   10278             : !> \param just_started ...
   10279             : !> \param optimize_theta ...
   10280             : !> \param normalize_orbitals ...
   10281             : !> \param perturbation_only ...
   10282             : !> \param do_penalty ...
   10283             : !> \param special_case ...
   10284             : !> \par History
   10285             : !>       2019.12 created [Rustam Z Khaliullin]
   10286             : !> \author Rustam Z Khaliullin
   10287             : ! **************************************************************************************************
   10288        1474 :    SUBROUTINE main_var_to_xalmos_and_loss_func(almo_scf_env, qs_env, m_main_var_in, &
   10289        1474 :                                                m_t_out, energy_out, penalty_out, m_sig_sqrti_ii_out, m_FTsiginv_out, &
   10290        1474 :                                                m_siginvTFTsiginv_out, m_ST_out, m_STsiginv0_in, m_quench_t_in, domain_r_down_in, &
   10291             :                                                assume_t0_q0x, just_started, optimize_theta, normalize_orbitals, perturbation_only, &
   10292             :                                                do_penalty, special_case)
   10293             : 
   10294             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
   10295             :       TYPE(qs_environment_type), POINTER                 :: qs_env
   10296             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_main_var_in
   10297             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_t_out
   10298             :       REAL(KIND=dp), INTENT(OUT)                         :: energy_out, penalty_out
   10299             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_sig_sqrti_ii_out, m_FTsiginv_out, &
   10300             :                                                             m_siginvTFTsiginv_out, m_ST_out
   10301             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_STsiginv0_in, m_quench_t_in
   10302             :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
   10303             :          INTENT(IN)                                      :: domain_r_down_in
   10304             :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, just_started, &
   10305             :                                                             optimize_theta, normalize_orbitals, &
   10306             :                                                             perturbation_only, do_penalty
   10307             :       INTEGER, INTENT(IN)                                :: special_case
   10308             : 
   10309             :       CHARACTER(len=*), PARAMETER :: routineN = 'main_var_to_xalmos_and_loss_func'
   10310             : 
   10311             :       INTEGER                                            :: handle, ispin, nspins
   10312        1474 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10313             :       REAL(KIND=dp)                                      :: det1, energy_ispin, penalty_amplitude, &
   10314             :                                                             spin_factor
   10315             : 
   10316        1474 :       CALL timeset(routineN, handle)
   10317             : 
   10318        1474 :       energy_out = 0.0_dp
   10319        1474 :       penalty_out = 0.0_dp
   10320             : 
   10321        1474 :       nspins = SIZE(m_main_var_in)
   10322        1474 :       IF (nspins == 1) THEN
   10323        1474 :          spin_factor = 2.0_dp
   10324             :       ELSE
   10325           0 :          spin_factor = 1.0_dp
   10326             :       END IF
   10327             : 
   10328        1474 :       penalty_amplitude = 0.0_dp !almo_scf_env%penalty%occ_vol_coeff
   10329             : 
   10330        4422 :       ALLOCATE (nocc(nspins))
   10331        2948 :       DO ispin = 1, nspins
   10332             :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
   10333        2948 :                              nfullrows_total=nocc(ispin))
   10334             :       END DO
   10335             : 
   10336        2948 :       DO ispin = 1, nspins
   10337             : 
   10338             :          ! compute MO coefficients from the main variable
   10339             :          CALL compute_xalmos_from_main_var( &
   10340             :             m_var_in=m_main_var_in(ispin), &
   10341             :             m_t_out=m_t_out(ispin), &
   10342             :             m_quench_t=m_quench_t_in(ispin), &
   10343             :             m_t0=almo_scf_env%matrix_t_blk(ispin), &
   10344             :             m_oo_template=almo_scf_env%matrix_sigma_inv(ispin), &
   10345             :             m_STsiginv0=m_STsiginv0_in(ispin), &
   10346             :             m_s=almo_scf_env%matrix_s(1), &
   10347             :             m_sig_sqrti_ii_out=m_sig_sqrti_ii_out(ispin), &
   10348             :             domain_r_down=domain_r_down_in(:, ispin), &
   10349             :             domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
   10350             :             domain_map=almo_scf_env%domain_map(ispin), &
   10351             :             cpu_of_domain=almo_scf_env%cpu_of_domain, &
   10352             :             assume_t0_q0x=assume_t0_q0x, &
   10353             :             just_started=just_started, &
   10354             :             optimize_theta=optimize_theta, &
   10355             :             normalize_orbitals=normalize_orbitals, &
   10356             :             envelope_amplitude=almo_scf_env%envelope_amplitude, &
   10357             :             eps_filter=almo_scf_env%eps_filter, &
   10358             :             special_case=special_case, &
   10359             :             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
   10360             :             order_lanczos=almo_scf_env%order_lanczos, &
   10361             :             eps_lanczos=almo_scf_env%eps_lanczos, &
   10362        1474 :             max_iter_lanczos=almo_scf_env%max_iter_lanczos)
   10363             : 
   10364             :          ! compute the global projectors (for the density matrix)
   10365             :          CALL almo_scf_t_to_proj( &
   10366             :             t=m_t_out(ispin), &
   10367             :             p=almo_scf_env%matrix_p(ispin), &
   10368             :             eps_filter=almo_scf_env%eps_filter, &
   10369             :             orthog_orbs=.FALSE., &
   10370             :             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
   10371             :             s=almo_scf_env%matrix_s(1), &
   10372             :             sigma=almo_scf_env%matrix_sigma(ispin), &
   10373             :             sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
   10374             :             use_guess=.FALSE., &
   10375             :             algorithm=almo_scf_env%sigma_inv_algorithm, &
   10376             :             inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
   10377             :             inverse_accelerator=almo_scf_env%order_lanczos, &
   10378             :             eps_lanczos=almo_scf_env%eps_lanczos, &
   10379             :             max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
   10380             :             para_env=almo_scf_env%para_env, &
   10381        1474 :             blacs_env=almo_scf_env%blacs_env)
   10382             : 
   10383             :          ! compute dm from the projector(s)
   10384             :          CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
   10385        2948 :                           spin_factor)
   10386             : 
   10387             :       END DO ! ispin
   10388             : 
   10389             :       ! update the KS matrix and energy if necessary
   10390        1474 :       IF (perturbation_only) THEN
   10391             :          ! note: do not combine the two IF statements
   10392         212 :          IF (just_started) THEN
   10393          48 :             DO ispin = 1, nspins
   10394             :                CALL dbcsr_copy(almo_scf_env%matrix_ks(ispin), &
   10395          48 :                                almo_scf_env%matrix_ks_0deloc(ispin))
   10396             :             END DO
   10397             :          END IF
   10398             :       ELSE
   10399             :          ! the KS matrix is updated outside the spin loop
   10400             :          CALL almo_dm_to_almo_ks(qs_env, &
   10401             :                                  almo_scf_env%matrix_p, &
   10402             :                                  almo_scf_env%matrix_ks, &
   10403             :                                  energy_out, &
   10404             :                                  almo_scf_env%eps_filter, &
   10405        1262 :                                  almo_scf_env%mat_distr_aos)
   10406             :       END IF
   10407             : 
   10408        1474 :       penalty_out = 0.0_dp
   10409        2948 :       DO ispin = 1, nspins
   10410             : 
   10411             :          CALL compute_frequently_used_matrices( &
   10412             :             filter_eps=almo_scf_env%eps_filter, &
   10413             :             m_T_in=m_t_out(ispin), &
   10414             :             m_siginv_in=almo_scf_env%matrix_sigma_inv(ispin), &
   10415             :             m_S_in=almo_scf_env%matrix_s(1), &
   10416             :             m_F_in=almo_scf_env%matrix_ks(ispin), &
   10417             :             m_FTsiginv_out=m_FTsiginv_out(ispin), &
   10418             :             m_siginvTFTsiginv_out=m_siginvTFTsiginv_out(ispin), &
   10419        1474 :             m_ST_out=m_ST_out(ispin))
   10420             : 
   10421        1474 :          IF (perturbation_only) THEN
   10422             :             ! calculate objective function Tr(F_0 R)
   10423         212 :             IF (ispin .EQ. 1) energy_out = 0.0_dp
   10424         212 :             CALL dbcsr_dot(m_t_out(ispin), m_FTsiginv_out(ispin), energy_ispin)
   10425         212 :             energy_out = energy_out + energy_ispin*spin_factor
   10426             :          END IF
   10427             : 
   10428        2948 :          IF (do_penalty) THEN
   10429             : 
   10430             :             CALL determinant(almo_scf_env%matrix_sigma(ispin), det1, &
   10431           0 :                              almo_scf_env%eps_filter)
   10432             :             penalty_out = penalty_out - &
   10433           0 :                           penalty_amplitude*spin_factor*nocc(ispin)*LOG(det1)
   10434             : 
   10435             :          END IF
   10436             : 
   10437             :       END DO ! ispin
   10438             : 
   10439        1474 :       DEALLOCATE (nocc)
   10440             : 
   10441        1474 :       CALL timestop(handle)
   10442             : 
   10443        1474 :    END SUBROUTINE main_var_to_xalmos_and_loss_func
   10444             : 
   10445             : ! **************************************************************************************************
   10446             : !> \brief Computes the step size required to reach the trust-radius border,
   10447             : !>        measured from the origin,
   10448             : !>        given the current position (position) in the direction (direction)
   10449             : !> \param step_size_out ...
   10450             : !> \param metric_in ...
   10451             : !> \param position_in ...
   10452             : !> \param direction_in ...
   10453             : !> \param trust_radius_in ...
   10454             : !> \param quench_t_in ...
   10455             : !> \param eps_filter_in ...
   10456             : !> \par History
   10457             : !>       2019.12 created [Rustam Z Khaliullin]
   10458             : !> \author Rustam Z Khaliullin
   10459             : ! **************************************************************************************************
   10460          36 :    SUBROUTINE step_size_to_border(step_size_out, metric_in, position_in, &
   10461          36 :                                   direction_in, trust_radius_in, quench_t_in, eps_filter_in)
   10462             : 
   10463             :       REAL(KIND=dp), INTENT(INOUT)                       :: step_size_out
   10464             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: metric_in, position_in, direction_in
   10465             :       REAL(KIND=dp), INTENT(IN)                          :: trust_radius_in
   10466             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: quench_t_in
   10467             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter_in
   10468             : 
   10469             :       INTEGER                                            :: isol, ispin, nsolutions, &
   10470             :                                                             nsolutions_found, nspins
   10471          36 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10472             :       REAL(KIND=dp)                                      :: discrim_sign, discriminant, solution, &
   10473             :                                                             spin_factor, temp_real
   10474             :       REAL(KIND=dp), DIMENSION(3)                        :: coef
   10475          36 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10476             : 
   10477          36 :       step_size_out = 0.0_dp
   10478             : 
   10479          36 :       nspins = SIZE(position_in)
   10480          36 :       IF (nspins == 1) THEN
   10481             :          spin_factor = 2.0_dp
   10482             :       ELSE
   10483           0 :          spin_factor = 1.0_dp
   10484             :       END IF
   10485             : 
   10486         108 :       ALLOCATE (nocc(nspins))
   10487         144 :       ALLOCATE (m_temp_no(nspins))
   10488             : 
   10489          36 :       coef(:) = 0.0_dp
   10490          72 :       DO ispin = 1, nspins
   10491             : 
   10492             :          CALL dbcsr_create(m_temp_no(ispin), &
   10493          36 :                            template=direction_in(ispin))
   10494             : 
   10495             :          CALL dbcsr_get_info(direction_in(ispin), &
   10496          36 :                              nfullcols_total=nocc(ispin))
   10497             : 
   10498          36 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10499             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10500             :                              metric_in(1), &
   10501             :                              position_in(ispin), &
   10502             :                              0.0_dp, m_temp_no(ispin), &
   10503          36 :                              retain_sparsity=.TRUE.)
   10504          36 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10505          36 :          CALL dbcsr_dot(position_in(ispin), m_temp_no(ispin), temp_real)
   10506          36 :          coef(3) = coef(3) + temp_real/nocc(ispin)
   10507          36 :          CALL dbcsr_dot(direction_in(ispin), m_temp_no(ispin), temp_real)
   10508          36 :          coef(2) = coef(2) + 2.0_dp*temp_real/nocc(ispin)
   10509          36 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10510             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10511             :                              metric_in(1), &
   10512             :                              direction_in(ispin), &
   10513             :                              0.0_dp, m_temp_no(ispin), &
   10514          36 :                              retain_sparsity=.TRUE.)
   10515          36 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10516          36 :          CALL dbcsr_dot(direction_in(ispin), m_temp_no(ispin), temp_real)
   10517          36 :          coef(1) = coef(1) + temp_real/nocc(ispin)
   10518             : 
   10519          72 :          CALL dbcsr_release(m_temp_no(ispin))
   10520             : 
   10521             :       END DO !ispin
   10522             : 
   10523          36 :       DEALLOCATE (nocc)
   10524          36 :       DEALLOCATE (m_temp_no)
   10525             : 
   10526         144 :       coef(:) = coef(:)*spin_factor
   10527          36 :       coef(3) = coef(3) - trust_radius_in*trust_radius_in
   10528             : 
   10529             :       ! solve the quadratic equation
   10530          36 :       discriminant = coef(2)*coef(2) - 4.0_dp*coef(1)*coef(3)
   10531          36 :       IF (discriminant .GT. TINY(discriminant)) THEN
   10532             :          nsolutions = 2
   10533           0 :       ELSE IF (discriminant .LT. 0.0_dp) THEN
   10534           0 :          nsolutions = 0
   10535           0 :          CPABORT("Step to border: no solutions")
   10536             :       ELSE
   10537             :          nsolutions = 1
   10538             :       END IF
   10539             : 
   10540          36 :       discrim_sign = 1.0_dp
   10541          36 :       nsolutions_found = 0
   10542         108 :       DO isol = 1, nsolutions
   10543          72 :          solution = (-coef(2) + discrim_sign*SQRT(discriminant))/(2.0_dp*coef(1))
   10544          72 :          IF (solution .GT. 0.0_dp) THEN
   10545          36 :             nsolutions_found = nsolutions_found + 1
   10546          36 :             step_size_out = solution
   10547             :          END IF
   10548         108 :          discrim_sign = -discrim_sign
   10549             :       END DO
   10550             : 
   10551          36 :       IF (nsolutions_found == 0) THEN
   10552           0 :          CPABORT("Step to border: no positive solutions")
   10553          36 :       ELSE IF (nsolutions_found == 2) THEN
   10554           0 :          CPABORT("Two positive border steps possible!")
   10555             :       END IF
   10556             : 
   10557          36 :    END SUBROUTINE step_size_to_border
   10558             : 
   10559             : ! **************************************************************************************************
   10560             : !> \brief Computes a norm of a contravariant NBasis x Occ matrix using proper metric
   10561             : !> \param norm_out ...
   10562             : !> \param matrix_in ...
   10563             : !> \param metric_in ...
   10564             : !> \param quench_t_in ...
   10565             : !> \param eps_filter_in ...
   10566             : !> \par History
   10567             : !>       2019.12 created [Rustam Z Khaliullin]
   10568             : !> \author Rustam Z Khaliullin
   10569             : ! **************************************************************************************************
   10570         758 :    SUBROUTINE contravariant_matrix_norm(norm_out, matrix_in, metric_in, &
   10571         758 :                                         quench_t_in, eps_filter_in)
   10572             : 
   10573             :       REAL(KIND=dp), INTENT(OUT)                         :: norm_out
   10574             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: matrix_in, metric_in, quench_t_in
   10575             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter_in
   10576             : 
   10577             :       INTEGER                                            :: ispin, nspins
   10578         758 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10579             :       REAL(KIND=dp)                                      :: my_norm, spin_factor, temp_real
   10580         758 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10581             : 
   10582             :       ! Frist thing: assign the output value to avoid norms being undefined
   10583         758 :       norm_out = 0.0_dp
   10584             : 
   10585         758 :       nspins = SIZE(matrix_in)
   10586         758 :       IF (nspins == 1) THEN
   10587             :          spin_factor = 2.0_dp
   10588             :       ELSE
   10589           0 :          spin_factor = 1.0_dp
   10590             :       END IF
   10591             : 
   10592        2274 :       ALLOCATE (nocc(nspins))
   10593        3032 :       ALLOCATE (m_temp_no(nspins))
   10594             : 
   10595         758 :       my_norm = 0.0_dp
   10596        1516 :       DO ispin = 1, nspins
   10597             : 
   10598         758 :          CALL dbcsr_create(m_temp_no(ispin), template=matrix_in(ispin))
   10599             : 
   10600             :          CALL dbcsr_get_info(matrix_in(ispin), &
   10601         758 :                              nfullcols_total=nocc(ispin))
   10602             : 
   10603         758 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10604             :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10605             :                              metric_in(1), &
   10606             :                              matrix_in(ispin), &
   10607             :                              0.0_dp, m_temp_no(ispin), &
   10608         758 :                              retain_sparsity=.TRUE.)
   10609         758 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10610         758 :          CALL dbcsr_dot(matrix_in(ispin), m_temp_no(ispin), temp_real)
   10611             : 
   10612         758 :          my_norm = my_norm + temp_real/nocc(ispin)
   10613             : 
   10614        1516 :          CALL dbcsr_release(m_temp_no(ispin))
   10615             : 
   10616             :       END DO !ispin
   10617             : 
   10618         758 :       DEALLOCATE (nocc)
   10619         758 :       DEALLOCATE (m_temp_no)
   10620             : 
   10621         758 :       my_norm = my_norm*spin_factor
   10622         758 :       norm_out = SQRT(my_norm)
   10623             : 
   10624         758 :    END SUBROUTINE contravariant_matrix_norm
   10625             : 
   10626             : ! **************************************************************************************************
   10627             : !> \brief Loss reduction for a given step is estimated using
   10628             : !>        gradient and hessian
   10629             : !> \param reduction_out ...
   10630             : !> \param grad_in ...
   10631             : !> \param step_in ...
   10632             : !> \param hess_in ...
   10633             : !> \param hess_submatrix_in ...
   10634             : !> \param quench_t_in ...
   10635             : !> \param special_case ...
   10636             : !> \param eps_filter ...
   10637             : !> \param domain_map ...
   10638             : !> \param cpu_of_domain ...
   10639             : !> \par History
   10640             : !>       2019.12 created [Rustam Z Khaliullin]
   10641             : !> \author Rustam Z Khaliullin
   10642             : ! **************************************************************************************************
   10643         408 :    SUBROUTINE predicted_reduction(reduction_out, grad_in, step_in, hess_in, &
   10644         408 :                                   hess_submatrix_in, quench_t_in, special_case, eps_filter, domain_map, &
   10645         408 :                                   cpu_of_domain)
   10646             : 
   10647             :       !RZK-noncritical: can be formulated without submatrices
   10648             :       REAL(KIND=dp), INTENT(INOUT)                       :: reduction_out
   10649             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: grad_in, step_in, hess_in
   10650             :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
   10651             :          INTENT(IN)                                      :: hess_submatrix_in
   10652             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: quench_t_in
   10653             :       INTEGER, INTENT(IN)                                :: special_case
   10654             :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
   10655             :       TYPE(domain_map_type), DIMENSION(:), INTENT(IN)    :: domain_map
   10656             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
   10657             : 
   10658             :       INTEGER                                            :: ispin, nspins
   10659             :       REAL(KIND=dp)                                      :: my_reduction, spin_factor, temp_real
   10660         408 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10661             : 
   10662         408 :       reduction_out = 0.0_dp
   10663             : 
   10664         408 :       nspins = SIZE(grad_in)
   10665         408 :       IF (nspins == 1) THEN
   10666             :          spin_factor = 2.0_dp
   10667             :       ELSE
   10668           0 :          spin_factor = 1.0_dp
   10669             :       END IF
   10670             : 
   10671        1632 :       ALLOCATE (m_temp_no(nspins))
   10672             : 
   10673         408 :       my_reduction = 0.0_dp
   10674         816 :       DO ispin = 1, nspins
   10675             : 
   10676         408 :          CALL dbcsr_create(m_temp_no(ispin), template=grad_in(ispin))
   10677             : 
   10678         408 :          CALL dbcsr_dot(step_in(ispin), grad_in(ispin), temp_real)
   10679         408 :          my_reduction = my_reduction + temp_real
   10680             : 
   10681             :          ! Get Hess.step
   10682         408 :          IF (special_case .EQ. xalmo_case_block_diag .OR. &
   10683             :              special_case .EQ. xalmo_case_fully_deloc) THEN
   10684             : 
   10685             :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10686             :                                 hess_in(ispin), &
   10687             :                                 step_in(ispin), &
   10688             :                                 0.0_dp, m_temp_no(ispin), &
   10689          92 :                                 filter_eps=eps_filter)
   10690             : 
   10691             :          ELSE
   10692             : 
   10693             :             CALL apply_domain_operators( &
   10694             :                matrix_in=step_in(ispin), &
   10695             :                matrix_out=m_temp_no(ispin), &
   10696             :                operator1=hess_submatrix_in(:, ispin), &
   10697             :                dpattern=quench_t_in(ispin), &
   10698             :                map=domain_map(ispin), &
   10699             :                node_of_domain=cpu_of_domain, &
   10700             :                my_action=0, &
   10701         316 :                filter_eps=eps_filter)
   10702             : 
   10703             :          END IF ! special case
   10704             : 
   10705             :          ! Get y=step^T.Hess.step
   10706         408 :          CALL dbcsr_dot(step_in(ispin), m_temp_no(ispin), temp_real)
   10707         408 :          my_reduction = my_reduction + 0.5_dp*temp_real
   10708             : 
   10709         816 :          CALL dbcsr_release(m_temp_no(ispin))
   10710             : 
   10711             :       END DO ! ispin
   10712             : 
   10713             :       !RZK-critical: do we need to multiply by the spin factor?
   10714         408 :       my_reduction = spin_factor*my_reduction
   10715             : 
   10716         408 :       reduction_out = my_reduction
   10717             : 
   10718         408 :       DEALLOCATE (m_temp_no)
   10719             : 
   10720         408 :    END SUBROUTINE predicted_reduction
   10721             : 
   10722             : ! **************************************************************************************************
   10723             : !> \brief Prints key quantities from the fixed-radius minimizer
   10724             : !> \param unit_nr ...
   10725             : !> \param iter_type ...
   10726             : !> \param iteration ...
   10727             : !> \param step_size ...
   10728             : !> \param border_reached ...
   10729             : !> \param curvature ...
   10730             : !> \param grad_norm_ratio ...
   10731             : !> \param predicted_reduction ...
   10732             : !> \param time ...
   10733             : !> \par History
   10734             : !>       2019.12 created [Rustam Z Khaliullin]
   10735             : !> \author Rustam Z Khaliullin
   10736             : ! **************************************************************************************************
   10737         898 :    SUBROUTINE fixed_r_report(unit_nr, iter_type, iteration, step_size, &
   10738             :                              border_reached, curvature, grad_norm_ratio, predicted_reduction, time)
   10739             : 
   10740             :       INTEGER, INTENT(IN)                                :: unit_nr, iter_type, iteration
   10741             :       REAL(KIND=dp), INTENT(IN)                          :: step_size
   10742             :       LOGICAL, INTENT(IN)                                :: border_reached
   10743             :       REAL(KIND=dp), INTENT(IN)                          :: curvature
   10744             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: grad_norm_ratio, predicted_reduction
   10745             :       REAL(KIND=dp), INTENT(IN)                          :: time
   10746             : 
   10747             :       CHARACTER(LEN=20)                                  :: iter_type_str
   10748             :       REAL(KIND=dp)                                      :: loss_or_grad_change
   10749             : 
   10750         898 :       loss_or_grad_change = 0.0_dp
   10751         898 :       IF (PRESENT(grad_norm_ratio)) THEN
   10752         898 :          loss_or_grad_change = grad_norm_ratio
   10753           0 :       ELSE IF (PRESENT(predicted_reduction)) THEN
   10754           0 :          loss_or_grad_change = predicted_reduction
   10755             :       ELSE
   10756           0 :          CPABORT("one argument is missing")
   10757             :       END IF
   10758             : 
   10759        1306 :       SELECT CASE (iter_type)
   10760             :       CASE (0)
   10761         408 :          iter_type_str = TRIM("Ignored")
   10762             :       CASE (1)
   10763          82 :          iter_type_str = TRIM("PCG")
   10764             :       CASE (2)
   10765           0 :          iter_type_str = TRIM("Neg. curvatr.")
   10766             :       CASE (3)
   10767          34 :          iter_type_str = TRIM("Step too long")
   10768             :       CASE (4)
   10769          26 :          iter_type_str = TRIM("Grad. reduced")
   10770             :       CASE (5)
   10771          80 :          iter_type_str = TRIM("Cauchy point")
   10772             :       CASE (6)
   10773         266 :          iter_type_str = TRIM("Full dogleg")
   10774             :       CASE (7)
   10775           2 :          iter_type_str = TRIM("Part. dogleg")
   10776             :       CASE DEFAULT
   10777         898 :          CPABORT("unknown report type")
   10778             :       END SELECT
   10779             : 
   10780         898 :       IF (unit_nr > 0) THEN
   10781             : 
   10782         204 :          SELECT CASE (iter_type)
   10783             :          CASE (0)
   10784             : 
   10785         204 :             WRITE (unit_nr, *)
   10786             :             WRITE (unit_nr, '(T4,A15,A6,A10,A10,A7,A20,A8)') &
   10787         204 :                "Action", &
   10788         204 :                "Iter", &
   10789         204 :                "Curv", &
   10790         204 :                "Step", &
   10791         204 :                "Edge?", &
   10792         204 :                "Grad/o.f. reduc", &
   10793         408 :                "Time"
   10794             : 
   10795             :          CASE DEFAULT
   10796             : 
   10797             :             WRITE (unit_nr, '(T4,A15,I6,F10.5,F10.5,L7,F20.10,F8.2)') &
   10798         245 :                iter_type_str, &
   10799         245 :                iteration, &
   10800         245 :                curvature, step_size, border_reached, &
   10801         245 :                loss_or_grad_change, &
   10802         694 :                time
   10803             : 
   10804             :          END SELECT
   10805             : 
   10806             :          ! epilogue
   10807         204 :          SELECT CASE (iter_type)
   10808             :          CASE (2, 3, 4, 5, 6, 7)
   10809             : 
   10810         449 :             WRITE (unit_nr, *)
   10811             : 
   10812             :          END SELECT
   10813             : 
   10814             :       END IF
   10815             : 
   10816         898 :    END SUBROUTINE fixed_r_report
   10817             : 
   10818             : ! **************************************************************************************************
   10819             : !> \brief Prints key quantities from the loop that tunes trust radius
   10820             : !> \param unit_nr ...
   10821             : !> \param iter_type ...
   10822             : !> \param iteration ...
   10823             : !> \param radius ...
   10824             : !> \param loss ...
   10825             : !> \param delta_loss ...
   10826             : !> \param grad_norm ...
   10827             : !> \param predicted_reduction ...
   10828             : !> \param rho ...
   10829             : !> \param new ...
   10830             : !> \param time ...
   10831             : !> \par History
   10832             : !>       2019.12 created [Rustam Z Khaliullin]
   10833             : !> \author Rustam Z Khaliullin
   10834             : ! **************************************************************************************************
   10835         843 :    SUBROUTINE trust_r_report(unit_nr, iter_type, iteration, radius, &
   10836             :                              loss, delta_loss, grad_norm, predicted_reduction, rho, new, time)
   10837             : 
   10838             :       INTEGER, INTENT(IN)                                :: unit_nr, iter_type, iteration
   10839             :       REAL(KIND=dp), INTENT(IN)                          :: radius, loss, delta_loss, grad_norm, &
   10840             :                                                             predicted_reduction, rho
   10841             :       LOGICAL, INTENT(IN)                                :: new
   10842             :       REAL(KIND=dp), INTENT(IN)                          :: time
   10843             : 
   10844             :       CHARACTER(LEN=20)                                  :: iter_status, iter_type_str
   10845             : 
   10846         852 :       SELECT CASE (iter_type)
   10847             :       CASE (0) ! header
   10848           9 :          iter_type_str = TRIM("Iter")
   10849           9 :          iter_status = TRIM("Stat")
   10850             :       CASE (1) ! first iteration, not all data is available yet
   10851         426 :          iter_type_str = TRIM("TR INI")
   10852         426 :          IF (new) THEN
   10853         426 :             iter_status = "  New" ! new point
   10854             :          ELSE
   10855           0 :             iter_status = " Redo" ! restarted
   10856             :          END IF
   10857             :       CASE (2) ! typical
   10858         408 :          iter_type_str = TRIM("TR FIN")
   10859         408 :          IF (new) THEN
   10860         408 :             iter_status = "  Acc" ! accepted
   10861             :          ELSE
   10862           0 :             iter_status = "  Rej" ! rejected
   10863             :          END IF
   10864             :       CASE DEFAULT
   10865         843 :          CPABORT("unknown report type")
   10866             :       END SELECT
   10867             : 
   10868         843 :       IF (unit_nr > 0) THEN
   10869             : 
   10870           9 :          SELECT CASE (iter_type)
   10871             :          CASE (0)
   10872             : 
   10873             :             WRITE (unit_nr, '(T2,A6,A5,A6,A22,A10,T67,A7,A6)') &
   10874           9 :                "Method", &
   10875           9 :                "Stat", &
   10876           9 :                "Iter", &
   10877           9 :                "Objective Function", &
   10878           9 :                "Conver", &!"Model Change", "Rho", &
   10879           9 :                "Radius", &
   10880          18 :                "Time"
   10881             :             WRITE (unit_nr, '(T41,A10,A10,A6)') &
   10882             :                !"Method", &
   10883             :                !"Iter", &
   10884             :                !"Objective Function", &
   10885           9 :                "Change", "Expct.", "Rho"
   10886             :             !"Radius", &
   10887             :             !"Time"
   10888             : 
   10889             :          CASE (1)
   10890             : 
   10891             :             WRITE (unit_nr, '(T2,A6,A5,I6,F22.10,ES10.2,T67,ES7.0,F6.1)') &
   10892         213 :                iter_type_str, &
   10893         213 :                iter_status, &
   10894         213 :                iteration, &
   10895         213 :                loss, &
   10896         213 :                grad_norm, & ! distinct
   10897         213 :                radius, &
   10898         426 :                time
   10899             : 
   10900             :          CASE (2)
   10901             : 
   10902             :             WRITE (unit_nr, '(T2,A6,A5,I6,F22.10,ES10.2,ES10.2,F6.1,ES7.0,F6.1)') &
   10903         204 :                iter_type_str, &
   10904         204 :                iter_status, &
   10905         204 :                iteration, &
   10906         204 :                loss, &
   10907         204 :                delta_loss, predicted_reduction, rho, & ! distinct
   10908         204 :                radius, &
   10909         630 :                time
   10910             : 
   10911             :          END SELECT
   10912             :       END IF
   10913             : 
   10914         843 :    END SUBROUTINE trust_r_report
   10915             : 
   10916             : ! **************************************************************************************************
   10917             : !> \brief ...
   10918             : !> \param unit_nr ...
   10919             : !> \param ref_energy ...
   10920             : !> \param energy_lowering ...
   10921             : ! **************************************************************************************************
   10922          26 :    SUBROUTINE energy_lowering_report(unit_nr, ref_energy, energy_lowering)
   10923             : 
   10924             :       INTEGER, INTENT(IN)                                :: unit_nr
   10925             :       REAL(KIND=dp), INTENT(IN)                          :: ref_energy, energy_lowering
   10926             : 
   10927             :       ! print out the energy lowering
   10928          26 :       IF (unit_nr > 0) THEN
   10929          13 :          WRITE (unit_nr, *)
   10930          13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "ENERGY OF BLOCK-DIAGONAL ALMOs:", &
   10931          26 :             ref_energy
   10932          13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "ENERGY LOWERING:", &
   10933          26 :             energy_lowering
   10934          13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "CORRECTED ENERGY:", &
   10935          26 :             ref_energy + energy_lowering
   10936          13 :          WRITE (unit_nr, *)
   10937             :       END IF
   10938             : 
   10939          26 :    END SUBROUTINE energy_lowering_report
   10940             : 
   10941             :    ! post SCF-loop calculations
   10942             : ! **************************************************************************************************
   10943             : !> \brief ...
   10944             : !> \param qs_env ...
   10945             : !> \param almo_scf_env ...
   10946             : !> \param perturbation_in ...
   10947             : !> \param m_xalmo_in ...
   10948             : !> \param m_quench_in ...
   10949             : !> \param energy_inout ...
   10950             : ! **************************************************************************************************
   10951         104 :    SUBROUTINE wrap_up_xalmo_scf(qs_env, almo_scf_env, perturbation_in, &
   10952         104 :                                 m_xalmo_in, m_quench_in, energy_inout)
   10953             : 
   10954             :       TYPE(qs_environment_type), POINTER                 :: qs_env
   10955             :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
   10956             :       LOGICAL, INTENT(IN)                                :: perturbation_in
   10957             :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_xalmo_in, m_quench_in
   10958             :       REAL(KIND=dp), INTENT(INOUT)                       :: energy_inout
   10959             : 
   10960             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'wrap_up_xalmo_scf'
   10961             : 
   10962             :       INTEGER                                            :: eda_unit, handle, ispin, nspins, unit_nr
   10963             :       TYPE(cp_logger_type), POINTER                      :: logger
   10964         104 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no1, m_temp_no2
   10965             :       TYPE(section_vals_type), POINTER                   :: almo_print_section, input
   10966             : 
   10967         104 :       CALL timeset(routineN, handle)
   10968             : 
   10969             :       ! get a useful output_unit
   10970         104 :       logger => cp_get_default_logger()
   10971         104 :       IF (logger%para_env%is_source()) THEN
   10972          52 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
   10973             :       ELSE
   10974          52 :          unit_nr = -1
   10975             :       END IF
   10976             : 
   10977         104 :       nspins = almo_scf_env%nspins
   10978             : 
   10979             :       ! RZK-warning: must obtain MO coefficients from final theta
   10980             : 
   10981         104 :       IF (perturbation_in) THEN
   10982             : 
   10983          96 :          ALLOCATE (m_temp_no1(nspins))
   10984          96 :          ALLOCATE (m_temp_no2(nspins))
   10985             : 
   10986          48 :          DO ispin = 1, nspins
   10987          24 :             CALL dbcsr_create(m_temp_no1(ispin), template=m_xalmo_in(ispin))
   10988          48 :             CALL dbcsr_create(m_temp_no2(ispin), template=m_xalmo_in(ispin))
   10989             :          END DO
   10990             : 
   10991             :          ! return perturbed density to qs_env
   10992             :          CALL almo_dm_to_qs_env(qs_env, almo_scf_env%matrix_p, &
   10993          24 :                                 almo_scf_env%mat_distr_aos)
   10994             : 
   10995             :          ! compute energy correction and perform
   10996             :          ! detailed decomposition analysis (if requested)
   10997             :          ! reuse step and grad matrices to store decomposition results
   10998             :          CALL xalmo_analysis( &
   10999             :             detailed_analysis=almo_scf_env%almo_analysis%do_analysis, &
   11000             :             eps_filter=almo_scf_env%eps_filter, &
   11001             :             m_T_in=m_xalmo_in, &
   11002             :             m_T0_in=almo_scf_env%matrix_t_blk, &
   11003             :             m_siginv_in=almo_scf_env%matrix_sigma_inv, &
   11004             :             m_siginv0_in=almo_scf_env%matrix_sigma_inv_0deloc, &
   11005             :             m_S_in=almo_scf_env%matrix_s, &
   11006             :             m_KS0_in=almo_scf_env%matrix_ks_0deloc, &
   11007             :             m_quench_t_in=m_quench_in, &
   11008             :             energy_out=energy_inout, & ! get energy loewring
   11009             :             m_eda_out=m_temp_no1, &
   11010             :             m_cta_out=m_temp_no2 &
   11011          24 :             )
   11012             : 
   11013          24 :          IF (almo_scf_env%almo_analysis%do_analysis) THEN
   11014             : 
   11015           4 :             DO ispin = 1, nspins
   11016             : 
   11017             :                ! energy decomposition analysis (EDA)
   11018           2 :                IF (unit_nr > 0) THEN
   11019           1 :                   WRITE (unit_nr, '(T2,A)') "DECOMPOSITION OF THE DELOCALIZATION ENERGY"
   11020             :                END IF
   11021             : 
   11022             :                ! open the output file, print and close
   11023           2 :                CALL get_qs_env(qs_env, input=input)
   11024           2 :                almo_print_section => section_vals_get_subs_vals(input, "DFT%ALMO_SCF%ANALYSIS%PRINT")
   11025             :                eda_unit = cp_print_key_unit_nr(logger, almo_print_section, &
   11026           2 :                                                "ALMO_EDA_CT", extension=".dat", local=.TRUE.)
   11027           2 :                CALL dbcsr_print_block_sum(m_temp_no1(ispin), eda_unit)
   11028             :                CALL cp_print_key_finished_output(eda_unit, logger, almo_print_section, &
   11029           2 :                                                  "ALMO_EDA_CT", local=.TRUE.)
   11030             : 
   11031             :                ! charge transfer analysis (CTA)
   11032           2 :                IF (unit_nr > 0) THEN
   11033           1 :                   WRITE (unit_nr, '(T2,A)') "DECOMPOSITION OF CHARGE TRANSFER TERMS"
   11034             :                END IF
   11035             : 
   11036             :                eda_unit = cp_print_key_unit_nr(logger, almo_print_section, &
   11037           2 :                                                "ALMO_CTA", extension=".dat", local=.TRUE.)
   11038           2 :                CALL dbcsr_print_block_sum(m_temp_no2(ispin), eda_unit)
   11039             :                CALL cp_print_key_finished_output(eda_unit, logger, almo_print_section, &
   11040           4 :                                                  "ALMO_CTA", local=.TRUE.)
   11041             : 
   11042             :             END DO ! ispin
   11043             : 
   11044             :          END IF ! do ALMO EDA/CTA
   11045             : 
   11046             :          CALL energy_lowering_report( &
   11047             :             unit_nr=unit_nr, &
   11048             :             ref_energy=almo_scf_env%almo_scf_energy, &
   11049          24 :             energy_lowering=energy_inout)
   11050             :          CALL almo_scf_update_ks_energy(qs_env, &
   11051             :                                         energy=almo_scf_env%almo_scf_energy, &
   11052          24 :                                         energy_singles_corr=energy_inout)
   11053             : 
   11054          48 :          DO ispin = 1, nspins
   11055          24 :             CALL dbcsr_release(m_temp_no1(ispin))
   11056          48 :             CALL dbcsr_release(m_temp_no2(ispin))
   11057             :          END DO
   11058             : 
   11059          24 :          DEALLOCATE (m_temp_no1)
   11060          24 :          DEALLOCATE (m_temp_no2)
   11061             : 
   11062             :       ELSE ! non-perturbative
   11063             : 
   11064             :          CALL almo_scf_update_ks_energy(qs_env, &
   11065          80 :                                         energy=energy_inout)
   11066             : 
   11067             :       END IF ! if perturbation only
   11068             : 
   11069         104 :       CALL timestop(handle)
   11070             : 
   11071         104 :    END SUBROUTINE wrap_up_xalmo_scf
   11072             : 
   11073             : END MODULE almo_scf_optimizer
   11074             : 

Generated by: LCOV version 1.15