LCOV - code coverage report
Current view: top level - src - almo_scf_optimizer.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 54.5 % 3155 1720
Test Date: 2025-12-04 06:27:48 Functions: 62.9 % 35 22

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Optimization routines for all ALMO-based SCF methods
      10              : !> \par History
      11              : !>       2011.05 created [Rustam Z Khaliullin]
      12              : !>       2014.10 as a separate file [Rustam Z Khaliullin]
      13              : !> \author Rustam Z Khaliullin
      14              : ! **************************************************************************************************
      15              : MODULE almo_scf_optimizer
      16              :    USE almo_scf_diis_types,             ONLY: almo_scf_diis_extrapolate,&
      17              :                                               almo_scf_diis_init,&
      18              :                                               almo_scf_diis_push,&
      19              :                                               almo_scf_diis_release,&
      20              :                                               almo_scf_diis_type
      21              :    USE almo_scf_lbfgs_types,            ONLY: lbfgs_create,&
      22              :                                               lbfgs_get_direction,&
      23              :                                               lbfgs_history_type,&
      24              :                                               lbfgs_release,&
      25              :                                               lbfgs_seed
      26              :    USE almo_scf_methods,                ONLY: &
      27              :         almo_scf_ks_blk_to_tv_blk, almo_scf_ks_to_ks_blk, almo_scf_ks_to_ks_xx, &
      28              :         almo_scf_ks_xx_to_tv_xx, almo_scf_p_blk_to_t_blk, almo_scf_t_rescaling, &
      29              :         almo_scf_t_to_proj, apply_domain_operators, apply_projector, &
      30              :         construct_domain_preconditioner, construct_domain_r_down, construct_domain_s_inv, &
      31              :         construct_domain_s_sqrt, fill_matrix_with_ones, get_overlap, orthogonalize_mos, &
      32              :         pseudo_invert_diagonal_blk, xalmo_initial_guess
      33              :    USE almo_scf_qs,                     ONLY: almo_dm_to_almo_ks,&
      34              :                                               almo_dm_to_qs_env,&
      35              :                                               almo_scf_update_ks_energy,&
      36              :                                               matrix_qs_to_almo
      37              :    USE almo_scf_types,                  ONLY: almo_scf_env_type,&
      38              :                                               optimizer_options_type
      39              :    USE cell_types,                      ONLY: cell_type
      40              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      41              :    USE cp_dbcsr_api,                    ONLY: &
      42              :         dbcsr_add, dbcsr_copy, dbcsr_create, dbcsr_desymmetrize, dbcsr_distribution_get, &
      43              :         dbcsr_distribution_type, dbcsr_filter, dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_info, &
      44              :         dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_readonly_start, &
      45              :         dbcsr_iterator_start, dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, &
      46              :         dbcsr_p_type, dbcsr_put_block, dbcsr_release, dbcsr_scale, dbcsr_set, dbcsr_type, &
      47              :         dbcsr_type_no_symmetry, dbcsr_work_create
      48              :    USE cp_dbcsr_cholesky,               ONLY: cp_dbcsr_cholesky_decompose,&
      49              :                                               cp_dbcsr_cholesky_invert,&
      50              :                                               cp_dbcsr_cholesky_restore
      51              :    USE cp_dbcsr_contrib,                ONLY: dbcsr_add_on_diag,&
      52              :                                               dbcsr_dot,&
      53              :                                               dbcsr_frobenius_norm,&
      54              :                                               dbcsr_get_diag,&
      55              :                                               dbcsr_hadamard_product,&
      56              :                                               dbcsr_maxabs,&
      57              :                                               dbcsr_set_diag
      58              :    USE cp_external_control,             ONLY: external_control
      59              :    USE cp_files,                        ONLY: close_file,&
      60              :                                               open_file
      61              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      62              :                                               cp_logger_get_default_unit_nr,&
      63              :                                               cp_logger_type,&
      64              :                                               cp_to_string
      65              :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      66              :                                               cp_print_key_unit_nr
      67              :    USE ct_methods,                      ONLY: analytic_line_search,&
      68              :                                               ct_step_execute,&
      69              :                                               diagonalize_diagonal_blocks
      70              :    USE ct_types,                        ONLY: ct_step_env_clean,&
      71              :                                               ct_step_env_get,&
      72              :                                               ct_step_env_init,&
      73              :                                               ct_step_env_set,&
      74              :                                               ct_step_env_type
      75              :    USE domain_submatrix_methods,        ONLY: add_submatrices,&
      76              :                                               construct_submatrices,&
      77              :                                               copy_submatrices,&
      78              :                                               init_submatrices,&
      79              :                                               maxnorm_submatrices,&
      80              :                                               release_submatrices
      81              :    USE domain_submatrix_types,          ONLY: domain_map_type,&
      82              :                                               domain_submatrix_type,&
      83              :                                               select_row
      84              :    USE input_constants,                 ONLY: &
      85              :         almo_scf_diag, almo_scf_dm_sign, cg_dai_yuan, cg_fletcher, cg_fletcher_reeves, &
      86              :         cg_hager_zhang, cg_hestenes_stiefel, cg_liu_storey, cg_polak_ribiere, cg_zero, &
      87              :         op_loc_berry, op_loc_pipek, trustr_cauchy, trustr_dogleg, virt_full, &
      88              :         xalmo_case_block_diag, xalmo_case_fully_deloc, xalmo_case_normal, xalmo_prec_domain, &
      89              :         xalmo_prec_full, xalmo_prec_zero
      90              :    USE input_section_types,             ONLY: section_vals_get_subs_vals,&
      91              :                                               section_vals_type
      92              :    USE iterate_matrix,                  ONLY: determinant,&
      93              :                                               invert_Hotelling,&
      94              :                                               matrix_sqrt_Newton_Schulz
      95              :    USE kinds,                           ONLY: dp
      96              :    USE machine,                         ONLY: m_flush,&
      97              :                                               m_walltime
      98              :    USE message_passing,                 ONLY: mp_comm_type,&
      99              :                                               mp_para_env_type
     100              :    USE particle_methods,                ONLY: get_particle_set
     101              :    USE particle_types,                  ONLY: particle_type
     102              :    USE qs_energy_types,                 ONLY: qs_energy_type
     103              :    USE qs_environment_types,            ONLY: get_qs_env,&
     104              :                                               qs_environment_type
     105              :    USE qs_kind_types,                   ONLY: qs_kind_type
     106              :    USE qs_loc_utils,                    ONLY: compute_berry_operator
     107              :    USE qs_localization_methods,         ONLY: initialize_weights
     108              : #include "./base/base_uses.f90"
     109              : 
     110              :    IMPLICIT NONE
     111              : 
     112              :    PRIVATE
     113              : 
     114              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'almo_scf_optimizer'
     115              : 
     116              :    PUBLIC :: almo_scf_block_diagonal, &
     117              :              almo_scf_xalmo_eigensolver, &
     118              :              almo_scf_xalmo_trustr, &
     119              :              almo_scf_xalmo_pcg, &
     120              :              almo_scf_construct_nlmos
     121              : 
     122              :    LOGICAL, PARAMETER :: debug_mode = .FALSE.
     123              :    LOGICAL, PARAMETER :: safe_mode = .FALSE.
     124              :    LOGICAL, PARAMETER :: almo_mathematica = .FALSE.
     125              :    INTEGER, PARAMETER :: hessian_path_reuse = 1, &
     126              :                          hessian_path_assemble = 2
     127              : 
     128              : CONTAINS
     129              : 
     130              : ! **************************************************************************************************
     131              : !> \brief An SCF procedure that optimizes block-diagonal ALMOs using DIIS
     132              : !> \param qs_env ...
     133              : !> \param almo_scf_env ...
     134              : !> \param optimizer ...
     135              : !> \par History
     136              : !>       2011.06 created [Rustam Z Khaliullin]
     137              : !>       2018.09 smearing support [Ruben Staub]
     138              : !> \author Rustam Z Khaliullin
     139              : ! **************************************************************************************************
     140           76 :    SUBROUTINE almo_scf_block_diagonal(qs_env, almo_scf_env, optimizer)
     141              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     142              :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     143              :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     144              : 
     145              :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_block_diagonal'
     146              : 
     147              :       INTEGER                                            :: handle, iscf, ispin, nspin, unit_nr
     148           76 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: local_nocc_of_domain
     149              :       LOGICAL                                            :: converged, prepare_to_exit, should_stop, &
     150              :                                                             use_diis, use_prev_as_guess
     151              :       REAL(KIND=dp) :: density_rec, energy_diff, energy_new, energy_old, error_norm, &
     152              :          error_norm_ispin, kTS_sum, prev_error_norm, t1, t2, true_mixing_fraction
     153           76 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: local_mu
     154              :       TYPE(almo_scf_diis_type), ALLOCATABLE, &
     155           76 :          DIMENSION(:)                                    :: almo_diis
     156              :       TYPE(cp_logger_type), POINTER                      :: logger
     157           76 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: matrix_mixing_old_blk
     158              :       TYPE(qs_energy_type), POINTER                      :: qs_energy
     159              : 
     160           76 :       CALL timeset(routineN, handle)
     161              : 
     162              :       ! get a useful output_unit
     163           76 :       logger => cp_get_default_logger()
     164           76 :       IF (logger%para_env%is_source()) THEN
     165           38 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     166              :       ELSE
     167              :          unit_nr = -1
     168              :       END IF
     169              : 
     170              :       ! use DIIS, it's superior to simple mixing
     171           76 :       use_diis = .TRUE.
     172           76 :       use_prev_as_guess = .FALSE.
     173              : 
     174           76 :       nspin = almo_scf_env%nspins
     175          228 :       ALLOCATE (local_mu(almo_scf_env%ndomains))
     176          228 :       ALLOCATE (local_nocc_of_domain(almo_scf_env%ndomains))
     177              : 
     178              :       ! init mixing matrices
     179          304 :       ALLOCATE (matrix_mixing_old_blk(nspin))
     180          304 :       ALLOCATE (almo_diis(nspin))
     181          152 :       DO ispin = 1, nspin
     182              :          CALL dbcsr_create(matrix_mixing_old_blk(ispin), &
     183           76 :                            template=almo_scf_env%matrix_ks_blk(ispin))
     184              :          CALL almo_scf_diis_init(diis_env=almo_diis(ispin), &
     185              :                                  sample_err=almo_scf_env%matrix_ks_blk(ispin), &
     186              :                                  sample_var=almo_scf_env%matrix_s_blk(1), &
     187              :                                  error_type=1, &
     188          152 :                                  max_length=optimizer%ndiis)
     189              :       END DO
     190              : 
     191           76 :       CALL get_qs_env(qs_env, energy=qs_energy)
     192           76 :       energy_old = qs_energy%total
     193              : 
     194           76 :       iscf = 0
     195           76 :       prepare_to_exit = .FALSE.
     196           76 :       true_mixing_fraction = 0.0_dp
     197           76 :       error_norm = 1.0E+10_dp ! arbitrary big step
     198              : 
     199           76 :       IF (unit_nr > 0) THEN
     200           38 :          WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     201           76 :             " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
     202           38 :          WRITE (unit_nr, *)
     203           38 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
     204           76 :             "Total Energy", "Change", "Convergence", "Time"
     205           38 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
     206              :       END IF
     207              : 
     208              :       ! the real SCF loop
     209           76 :       t1 = m_walltime()
     210          424 :       DO
     211              : 
     212          424 :          iscf = iscf + 1
     213              : 
     214              :          ! obtain projected KS matrix and the DIIS-error vector
     215          424 :          CALL almo_scf_ks_to_ks_blk(almo_scf_env)
     216              : 
     217              :          ! inform the DIIS handler about the new KS matrix and its error vector
     218              :          IF (use_diis) THEN
     219          848 :             DO ispin = 1, nspin
     220              :                CALL almo_scf_diis_push(diis_env=almo_diis(ispin), &
     221              :                                        var=almo_scf_env%matrix_ks_blk(ispin), &
     222          848 :                                        err=almo_scf_env%matrix_err_blk(ispin))
     223              :             END DO
     224              :          END IF
     225              : 
     226              :          ! get error_norm: choose the largest of the two spins
     227          848 :          prev_error_norm = error_norm
     228          848 :          DO ispin = 1, nspin
     229              :             !error_norm=dbcsr_frobenius_norm(almo_scf_env%matrix_err_blk(ispin))
     230          424 :             error_norm_ispin = dbcsr_maxabs(almo_scf_env%matrix_err_blk(ispin))
     231          424 :             IF (ispin == 1) error_norm = error_norm_ispin
     232            0 :             IF (ispin > 1 .AND. error_norm_ispin > error_norm) &
     233          424 :                error_norm = error_norm_ispin
     234              :          END DO
     235              : 
     236          424 :          IF (error_norm < almo_scf_env%eps_prev_guess) THEN
     237            0 :             use_prev_as_guess = .TRUE.
     238              :          ELSE
     239          424 :             use_prev_as_guess = .FALSE.
     240              :          END IF
     241              : 
     242              :          ! check convergence
     243          424 :          converged = .TRUE.
     244          424 :          IF (error_norm > optimizer%eps_error) converged = .FALSE.
     245              : 
     246              :          ! check other exit criteria: max SCF steps and timing
     247              :          CALL external_control(should_stop, "SCF", &
     248              :                                start_time=qs_env%start_time, &
     249          424 :                                target_time=qs_env%target_time)
     250          424 :          IF (should_stop .OR. iscf >= optimizer%max_iter .OR. converged) THEN
     251           76 :             prepare_to_exit = .TRUE.
     252           76 :             IF (iscf == 1) energy_new = energy_old
     253              :          END IF
     254              : 
     255              :          ! if early stopping is on do at least one iteration
     256          424 :          IF (optimizer%early_stopping_on .AND. iscf == 1) &
     257              :             prepare_to_exit = .FALSE.
     258              : 
     259          424 :          IF (.NOT. prepare_to_exit) THEN ! update the ALMOs and density matrix
     260              : 
     261              :             ! perform mixing of KS matrices
     262          348 :             IF (iscf /= 1) THEN
     263              :                IF (use_diis) THEN ! use diis instead of mixing
     264          544 :                   DO ispin = 1, nspin
     265              :                      CALL almo_scf_diis_extrapolate(diis_env=almo_diis(ispin), &
     266          544 :                                                     extr_var=almo_scf_env%matrix_ks_blk(ispin))
     267              :                   END DO
     268              :                ELSE ! use mixing
     269              :                   true_mixing_fraction = almo_scf_env%mixing_fraction
     270              :                   DO ispin = 1, nspin
     271              :                      CALL dbcsr_add(almo_scf_env%matrix_ks_blk(ispin), &
     272              :                                     matrix_mixing_old_blk(ispin), &
     273              :                                     true_mixing_fraction, &
     274              :                                     1.0_dp - true_mixing_fraction)
     275              :                   END DO
     276              :                END IF
     277              :             END IF
     278              :             ! save the new matrix for the future mixing
     279          696 :             DO ispin = 1, nspin
     280              :                CALL dbcsr_copy(matrix_mixing_old_blk(ispin), &
     281          696 :                                almo_scf_env%matrix_ks_blk(ispin))
     282              :             END DO
     283              : 
     284              :             ! obtain ALMOs from the new KS matrix
     285          696 :             SELECT CASE (almo_scf_env%almo_update_algorithm)
     286              :             CASE (almo_scf_diag)
     287              : 
     288          348 :                CALL almo_scf_ks_blk_to_tv_blk(almo_scf_env)
     289              : 
     290              :             CASE (almo_scf_dm_sign)
     291              : 
     292              :                ! update the density matrix
     293            0 :                DO ispin = 1, nspin
     294              : 
     295            0 :                   local_nocc_of_domain(:) = almo_scf_env%nocc_of_domain(:, ispin)
     296            0 :                   local_mu(:) = almo_scf_env%mu_of_domain(:, ispin)
     297              :                   ! RZK UPDATE! the update algorithm is removed because
     298              :                   ! RZK UPDATE! it requires updating core LS_SCF routines
     299              :                   ! RZK UPDATE! (the code exists in the CVS version)
     300            0 :                   CPABORT("Density_matrix_sign has not been tested yet")
     301              :                   ! RZK UPDATE!  CALL density_matrix_sign(almo_scf_env%matrix_p_blk(ispin),&
     302              :                   ! RZK UPDATE!          local_mu,&
     303              :                   ! RZK UPDATE!          almo_scf_env%fixed_mu,&
     304              :                   ! RZK UPDATE!          almo_scf_env%matrix_ks_blk(ispin),&
     305              :                   ! RZK UPDATE!          !matrix_mixing_old_blk(ispin),&
     306              :                   ! RZK UPDATE!          almo_scf_env%matrix_s_blk(1), &
     307              :                   ! RZK UPDATE!          almo_scf_env%matrix_s_blk_inv(1), &
     308              :                   ! RZK UPDATE!          local_nocc_of_domain,&
     309              :                   ! RZK UPDATE!          almo_scf_env%eps_filter,&
     310              :                   ! RZK UPDATE!          almo_scf_env%domain_index_of_ao)
     311              :                   ! RZK UPDATE!
     312            0 :                   almo_scf_env%mu_of_domain(:, ispin) = local_mu(:)
     313              : 
     314              :                END DO
     315              : 
     316              :                ! obtain ALMOs from matrix_p_blk: T_new = P_blk S_blk T_old
     317            0 :                CALL almo_scf_p_blk_to_t_blk(almo_scf_env, ionic=.FALSE.)
     318              : 
     319          348 :                DO ispin = 1, almo_scf_env%nspins
     320              : 
     321              :                   CALL orthogonalize_mos(ket=almo_scf_env%matrix_t_blk(ispin), &
     322              :                                          overlap=almo_scf_env%matrix_sigma_blk(ispin), &
     323              :                                          metric=almo_scf_env%matrix_s_blk(1), &
     324              :                                          retain_locality=.TRUE., &
     325              :                                          only_normalize=.FALSE., &
     326              :                                          nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     327              :                                          eps_filter=almo_scf_env%eps_filter, &
     328              :                                          order_lanczos=almo_scf_env%order_lanczos, &
     329              :                                          eps_lanczos=almo_scf_env%eps_lanczos, &
     330            0 :                                          max_iter_lanczos=almo_scf_env%max_iter_lanczos)
     331              : 
     332              :                END DO
     333              : 
     334              :             END SELECT
     335              : 
     336              :             ! obtain density matrix from ALMOs
     337          696 :             DO ispin = 1, almo_scf_env%nspins
     338              : 
     339              :                !! Application of an occupation-rescaling trick for smearing, if requested
     340          348 :                IF (almo_scf_env%smear) THEN
     341              :                   CALL almo_scf_t_rescaling(matrix_t=almo_scf_env%matrix_t_blk(ispin), &
     342              :                                             mo_energies=almo_scf_env%mo_energies(:, ispin), &
     343              :                                             mu_of_domain=almo_scf_env%mu_of_domain(:, ispin), &
     344              :                                             real_ne_of_domain=almo_scf_env%real_ne_of_domain(:, ispin), &
     345              :                                             spin_kTS=almo_scf_env%kTS(ispin), &
     346              :                                             smear_e_temp=almo_scf_env%smear_e_temp, &
     347              :                                             ndomains=almo_scf_env%ndomains, &
     348           16 :                                             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin))
     349              :                END IF
     350              : 
     351              :                CALL almo_scf_t_to_proj(t=almo_scf_env%matrix_t_blk(ispin), &
     352              :                                        p=almo_scf_env%matrix_p(ispin), &
     353              :                                        eps_filter=almo_scf_env%eps_filter, &
     354              :                                        orthog_orbs=.FALSE., &
     355              :                                        nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     356              :                                        s=almo_scf_env%matrix_s(1), &
     357              :                                        sigma=almo_scf_env%matrix_sigma(ispin), &
     358              :                                        sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
     359              :                                        use_guess=use_prev_as_guess, &
     360              :                                        smear=almo_scf_env%smear, &
     361              :                                        algorithm=almo_scf_env%sigma_inv_algorithm, &
     362              :                                        inverse_accelerator=almo_scf_env%order_lanczos, &
     363              :                                        inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
     364              :                                        eps_lanczos=almo_scf_env%eps_lanczos, &
     365              :                                        max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
     366              :                                        para_env=almo_scf_env%para_env, &
     367          696 :                                        blacs_env=almo_scf_env%blacs_env)
     368              : 
     369              :             END DO
     370              : 
     371          348 :             IF (almo_scf_env%nspins == 1) THEN
     372          348 :                CALL dbcsr_scale(almo_scf_env%matrix_p(1), 2.0_dp)
     373              :                !! Rescaling electronic entropy contribution by spin_factor
     374          348 :                IF (almo_scf_env%smear) THEN
     375           16 :                   almo_scf_env%kTS(1) = almo_scf_env%kTS(1)*2.0_dp
     376              :                END IF
     377              :             END IF
     378              : 
     379          348 :             IF (almo_scf_env%smear) THEN
     380           32 :                kTS_sum = SUM(almo_scf_env%kTS)
     381              :             ELSE
     382          332 :                kTS_sum = 0.0_dp
     383              :             END IF
     384              : 
     385              :             ! compute the new KS matrix and new energy
     386              :             CALL almo_dm_to_almo_ks(qs_env, &
     387              :                                     almo_scf_env%matrix_p, &
     388              :                                     almo_scf_env%matrix_ks, &
     389              :                                     energy_new, &
     390              :                                     almo_scf_env%eps_filter, &
     391              :                                     almo_scf_env%mat_distr_aos, &
     392              :                                     smear=almo_scf_env%smear, &
     393          348 :                                     kTS_sum=kTS_sum)
     394              : 
     395              :          END IF ! prepare_to_exit
     396              : 
     397          424 :          energy_diff = energy_new - energy_old
     398          424 :          energy_old = energy_new
     399          424 :          almo_scf_env%almo_scf_energy = energy_new
     400              : 
     401          424 :          t2 = m_walltime()
     402              :          ! brief report on the current SCF loop
     403          424 :          IF (unit_nr > 0) THEN
     404          212 :             WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') "ALMO SCF DIIS", &
     405          212 :                iscf, &
     406          424 :                energy_new, energy_diff, error_norm, t2 - t1
     407              :          END IF
     408          424 :          t1 = m_walltime()
     409              : 
     410          424 :          IF (prepare_to_exit) EXIT
     411              : 
     412              :       END DO ! end scf cycle
     413              : 
     414              :       !! Print number of electrons recovered if smearing was requested
     415           76 :       IF (almo_scf_env%smear) THEN
     416            8 :          DO ispin = 1, nspin
     417            4 :             CALL dbcsr_dot(almo_scf_env%matrix_p(ispin), almo_scf_env%matrix_s(1), density_rec)
     418            8 :             IF (unit_nr > 0) THEN
     419            2 :                WRITE (unit_nr, '(T2,A20,F23.10)') "Electrons recovered:", density_rec
     420              :             END IF
     421              :          END DO
     422              :       END IF
     423              : 
     424           76 :       IF (.NOT. converged .AND. (.NOT. optimizer%early_stopping_on)) THEN
     425            0 :          IF (unit_nr > 0) THEN
     426            0 :             CPABORT("SCF for block-diagonal ALMOs not converged!")
     427              :          END IF
     428              :       END IF
     429              : 
     430          152 :       DO ispin = 1, nspin
     431           76 :          CALL dbcsr_release(matrix_mixing_old_blk(ispin))
     432          152 :          CALL almo_scf_diis_release(diis_env=almo_diis(ispin))
     433              :       END DO
     434          152 :       DEALLOCATE (almo_diis)
     435           76 :       DEALLOCATE (matrix_mixing_old_blk)
     436           76 :       DEALLOCATE (local_mu)
     437           76 :       DEALLOCATE (local_nocc_of_domain)
     438              : 
     439           76 :       CALL timestop(handle)
     440              : 
     441           76 :    END SUBROUTINE almo_scf_block_diagonal
     442              : 
     443              : ! **************************************************************************************************
     444              : !> \brief An eigensolver-based SCF to optimize extended ALMOs (i.e. ALMOs on
     445              : !>        overlapping domains)
     446              : !> \param qs_env ...
     447              : !> \param almo_scf_env ...
     448              : !> \param optimizer ...
     449              : !> \par History
     450              : !>       2013.03 created [Rustam Z Khaliullin]
     451              : !>       2018.09 smearing support [Ruben Staub]
     452              : !> \author Rustam Z Khaliullin
     453              : ! **************************************************************************************************
     454            2 :    SUBROUTINE almo_scf_xalmo_eigensolver(qs_env, almo_scf_env, optimizer)
     455              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     456              :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     457              :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     458              : 
     459              :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_eigensolver'
     460              : 
     461              :       INTEGER                                            :: handle, iscf, ispin, nspin, unit_nr
     462              :       LOGICAL                                            :: converged, prepare_to_exit, should_stop
     463              :       REAL(KIND=dp) :: denergy_tot, density_rec, energy_diff, energy_new, energy_old, error_norm, &
     464              :          error_norm_0, kTS_sum, spin_factor, t1, t2
     465              :       REAL(KIND=dp), DIMENSION(2)                        :: denergy_spin
     466              :       TYPE(almo_scf_diis_type), ALLOCATABLE, &
     467            2 :          DIMENSION(:)                                    :: almo_diis
     468              :       TYPE(cp_logger_type), POINTER                      :: logger
     469              :       TYPE(dbcsr_type)                                   :: matrix_p_almo_scf_converged
     470              :       TYPE(domain_submatrix_type), ALLOCATABLE, &
     471            2 :          DIMENSION(:, :)                                 :: submatrix_mixing_old_blk
     472              : 
     473            2 :       CALL timeset(routineN, handle)
     474              : 
     475              :       ! get a useful output_unit
     476            2 :       logger => cp_get_default_logger()
     477            2 :       IF (logger%para_env%is_source()) THEN
     478            1 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     479              :       ELSE
     480            1 :          unit_nr = -1
     481              :       END IF
     482              : 
     483            2 :       nspin = almo_scf_env%nspins
     484            2 :       IF (nspin == 1) THEN
     485            2 :          spin_factor = 2.0_dp
     486              :       ELSE
     487            0 :          spin_factor = 1.0_dp
     488              :       END IF
     489              : 
     490              :       ! RZK-warning domain_s_sqrt and domain_s_sqrt_inv do not have spin
     491              :       ! components yet (may be used later)
     492            2 :       ispin = 1
     493              :       CALL construct_domain_s_sqrt( &
     494              :          matrix_s=almo_scf_env%matrix_s(1), &
     495              :          subm_s_sqrt=almo_scf_env%domain_s_sqrt(:, ispin), &
     496              :          subm_s_sqrt_inv=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
     497              :          dpattern=almo_scf_env%quench_t(ispin), &
     498              :          map=almo_scf_env%domain_map(ispin), &
     499            2 :          node_of_domain=almo_scf_env%cpu_of_domain)
     500              :       ! TRY: construct s_inv
     501              :       !CALL construct_domain_s_inv(&
     502              :       !       matrix_s=almo_scf_env%matrix_s(1),&
     503              :       !       subm_s_inv=almo_scf_env%domain_s_inv(:,ispin),&
     504              :       !       dpattern=almo_scf_env%quench_t(ispin),&
     505              :       !       map=almo_scf_env%domain_map(ispin),&
     506              :       !       node_of_domain=almo_scf_env%cpu_of_domain)
     507              : 
     508              :       ! construct the domain template for the occupied orbitals
     509            4 :       DO ispin = 1, nspin
     510              :          ! RZK-warning we need only the matrix structure, not data
     511              :          ! replace construct_submatrices with lighter procedure with
     512              :          ! no heavy communications
     513              :          CALL construct_submatrices( &
     514              :             matrix=almo_scf_env%quench_t(ispin), &
     515              :             submatrix=almo_scf_env%domain_t(:, ispin), &
     516              :             distr_pattern=almo_scf_env%quench_t(ispin), &
     517              :             domain_map=almo_scf_env%domain_map(ispin), &
     518              :             node_of_domain=almo_scf_env%cpu_of_domain, &
     519            4 :             job_type=select_row)
     520              :       END DO
     521              : 
     522              :       ! init mixing matrices
     523           20 :       ALLOCATE (submatrix_mixing_old_blk(almo_scf_env%ndomains, nspin))
     524            2 :       CALL init_submatrices(submatrix_mixing_old_blk)
     525            8 :       ALLOCATE (almo_diis(nspin))
     526              : 
     527              :       ! TRY: construct block-projector
     528              :       !ALLOCATE(submatrix_tmp(almo_scf_env%ndomains))
     529              :       !DO ispin=1,nspin
     530              :       !   CALL init_submatrices(submatrix_tmp)
     531              :       !   CALL construct_domain_r_down(&
     532              :       !           matrix_t=almo_scf_env%matrix_t_blk(ispin),&
     533              :       !           matrix_sigma_inv=almo_scf_env%matrix_sigma_inv(ispin),&
     534              :       !           matrix_s=almo_scf_env%matrix_s(1),&
     535              :       !           subm_r_down=submatrix_tmp(:),&
     536              :       !           dpattern=almo_scf_env%quench_t(ispin),&
     537              :       !           map=almo_scf_env%domain_map(ispin),&
     538              :       !           node_of_domain=almo_scf_env%cpu_of_domain,&
     539              :       !           filter_eps=almo_scf_env%eps_filter)
     540              :       !   CALL multiply_submatrices('N','N',1.0_dp,&
     541              :       !           submatrix_tmp(:),&
     542              :       !           almo_scf_env%domain_s_inv(:,1),0.0_dp,&
     543              :       !           almo_scf_env%domain_r_down_up(:,ispin))
     544              :       !   CALL release_submatrices(submatrix_tmp)
     545              :       !ENDDO
     546              :       !DEALLOCATE(submatrix_tmp)
     547              : 
     548            4 :       DO ispin = 1, nspin
     549              :          ! use s_sqrt since they are already properly constructed
     550              :          ! and have the same distributions as domain_err and domain_ks_xx
     551              :          CALL almo_scf_diis_init(diis_env=almo_diis(ispin), &
     552              :                                  sample_err=almo_scf_env%domain_s_sqrt(:, ispin), &
     553              :                                  error_type=1, &
     554            4 :                                  max_length=optimizer%ndiis)
     555              :       END DO
     556              : 
     557            2 :       denergy_tot = 0.0_dp
     558            2 :       energy_old = 0.0_dp
     559            2 :       iscf = 0
     560            2 :       prepare_to_exit = .FALSE.
     561              : 
     562              :       ! the SCF loop
     563            2 :       t1 = m_walltime()
     564            2 :       DO
     565              : 
     566            2 :          iscf = iscf + 1
     567              : 
     568              :          ! obtain projected KS matrix and the DIIS-error vector
     569            2 :          CALL almo_scf_ks_to_ks_xx(almo_scf_env)
     570              : 
     571              :          ! inform the DIIS handler about the new KS matrix and its error vector
     572            4 :          DO ispin = 1, nspin
     573              :             CALL almo_scf_diis_push(diis_env=almo_diis(ispin), &
     574              :                                     d_var=almo_scf_env%domain_ks_xx(:, ispin), &
     575            4 :                                     d_err=almo_scf_env%domain_err(:, ispin))
     576              :          END DO
     577              : 
     578              :          ! check convergence
     579            2 :          converged = .TRUE.
     580            2 :          DO ispin = 1, nspin
     581              :             !error_norm=dbcsr_frobenius_norm(almo_scf_env%matrix_err_blk(ispin))
     582            2 :             error_norm = dbcsr_maxabs(almo_scf_env%matrix_err_xx(ispin))
     583              :             CALL maxnorm_submatrices(almo_scf_env%domain_err(:, ispin), &
     584            2 :                                      norm=error_norm_0)
     585            2 :             IF (error_norm > optimizer%eps_error) THEN
     586              :                converged = .FALSE.
     587              :                EXIT ! no need to check the other spin
     588              :             END IF
     589              :          END DO
     590              :          ! check other exit criteria: max SCF steps and timing
     591              :          CALL external_control(should_stop, "SCF", &
     592              :                                start_time=qs_env%start_time, &
     593            2 :                                target_time=qs_env%target_time)
     594            2 :          IF (should_stop .OR. iscf >= optimizer%max_iter .OR. converged) THEN
     595            0 :             prepare_to_exit = .TRUE.
     596              :          END IF
     597              : 
     598              :          ! if early stopping is on do at least one iteration
     599            2 :          IF (optimizer%early_stopping_on .AND. iscf == 1) &
     600              :             prepare_to_exit = .FALSE.
     601              : 
     602            2 :          IF (.NOT. prepare_to_exit) THEN ! update the ALMOs and density matrix
     603              : 
     604              :             ! perform mixing of KS matrices
     605            2 :             IF (iscf /= 1) THEN
     606              :                IF (.FALSE.) THEN ! use diis instead of mixing
     607              :                   DO ispin = 1, nspin
     608              :                      CALL add_submatrices( &
     609              :                         almo_scf_env%mixing_fraction, &
     610              :                         almo_scf_env%domain_ks_xx(:, ispin), &
     611              :                         1.0_dp - almo_scf_env%mixing_fraction, &
     612              :                         submatrix_mixing_old_blk(:, ispin), &
     613              :                         'N')
     614              :                   END DO
     615              :                ELSE
     616            0 :                   DO ispin = 1, nspin
     617              :                      CALL almo_scf_diis_extrapolate(diis_env=almo_diis(ispin), &
     618            0 :                                                     d_extr_var=almo_scf_env%domain_ks_xx(:, ispin))
     619              :                   END DO
     620              :                END IF
     621              :             END IF
     622              :             ! save the new matrix for the future mixing
     623            4 :             DO ispin = 1, nspin
     624              :                CALL copy_submatrices( &
     625              :                   almo_scf_env%domain_ks_xx(:, ispin), &
     626              :                   submatrix_mixing_old_blk(:, ispin), &
     627            4 :                   copy_data=.TRUE.)
     628              :             END DO
     629              : 
     630              :             ! obtain a new set of ALMOs from the updated KS matrix
     631            2 :             CALL almo_scf_ks_xx_to_tv_xx(almo_scf_env)
     632              : 
     633              :             ! update the density matrix
     634            4 :             DO ispin = 1, nspin
     635              : 
     636              :                ! save the initial density matrix (to get the perturbative energy lowering)
     637            2 :                IF (iscf == 1) THEN
     638              :                   CALL dbcsr_create(matrix_p_almo_scf_converged, &
     639            2 :                                     template=almo_scf_env%matrix_p(ispin))
     640              :                   CALL dbcsr_copy(matrix_p_almo_scf_converged, &
     641            2 :                                   almo_scf_env%matrix_p(ispin))
     642              :                END IF
     643              : 
     644              :                !! Application of an occupation-rescaling trick for smearing, if requested
     645            2 :                IF (almo_scf_env%smear) THEN
     646              :                   CALL almo_scf_t_rescaling(matrix_t=almo_scf_env%matrix_t_blk(ispin), &
     647              :                                             mo_energies=almo_scf_env%mo_energies(:, ispin), &
     648              :                                             mu_of_domain=almo_scf_env%mu_of_domain(:, ispin), &
     649              :                                             real_ne_of_domain=almo_scf_env%real_ne_of_domain(:, ispin), &
     650              :                                             spin_kTS=almo_scf_env%kTS(ispin), &
     651              :                                             smear_e_temp=almo_scf_env%smear_e_temp, &
     652              :                                             ndomains=almo_scf_env%ndomains, &
     653            0 :                                             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin))
     654              :                END IF
     655              : 
     656              :                ! update now
     657              :                CALL almo_scf_t_to_proj( &
     658              :                   t=almo_scf_env%matrix_t(ispin), &
     659              :                   p=almo_scf_env%matrix_p(ispin), &
     660              :                   eps_filter=almo_scf_env%eps_filter, &
     661              :                   orthog_orbs=.FALSE., &
     662              :                   nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
     663              :                   s=almo_scf_env%matrix_s(1), &
     664              :                   sigma=almo_scf_env%matrix_sigma(ispin), &
     665              :                   sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
     666              :                   use_guess=.TRUE., &
     667              :                   smear=almo_scf_env%smear, &
     668              :                   algorithm=almo_scf_env%sigma_inv_algorithm, &
     669              :                   inverse_accelerator=almo_scf_env%order_lanczos, &
     670              :                   inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
     671              :                   eps_lanczos=almo_scf_env%eps_lanczos, &
     672              :                   max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
     673              :                   para_env=almo_scf_env%para_env, &
     674            2 :                   blacs_env=almo_scf_env%blacs_env)
     675            2 :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), spin_factor)
     676              :                !! Rescaling electronic entropy contribution by spin_factor
     677            2 :                IF (almo_scf_env%smear) THEN
     678            0 :                   almo_scf_env%kTS(ispin) = almo_scf_env%kTS(ispin)*spin_factor
     679              :                END IF
     680              : 
     681              :                ! obtain perturbative estimate (at no additional cost)
     682              :                ! of the energy lowering relative to the block-diagonal ALMOs
     683            4 :                IF (iscf == 1) THEN
     684              : 
     685              :                   CALL dbcsr_add(matrix_p_almo_scf_converged, &
     686            2 :                                  almo_scf_env%matrix_p(ispin), -1.0_dp, 1.0_dp)
     687              :                   CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin), &
     688              :                                  matrix_p_almo_scf_converged, &
     689            2 :                                  denergy_spin(ispin))
     690              : 
     691            2 :                   CALL dbcsr_release(matrix_p_almo_scf_converged)
     692              : 
     693              :                   !! RS-WARNING: If smearing ALMO is requested, electronic entropy contribution should probably be included here
     694              : 
     695            2 :                   denergy_tot = denergy_tot + denergy_spin(ispin)
     696              : 
     697              :                   ! RZK-warning Energy correction can be evaluated using matrix_x
     698              :                   ! as shown in the attempt below and in the PCG procedure.
     699              :                   ! Using matrix_x allows immediate decomposition of the energy
     700              :                   ! lowering into 2-body components for EDA. However, it does not
     701              :                   ! work here because the diagonalization routine does not necessarily
     702              :                   ! produce orbitals with the same sign as the block-diagonal ALMOs
     703              :                   ! Any fixes?!
     704              : 
     705              :                   !CALL dbcsr_init(matrix_x)
     706              :                   !CALL dbcsr_create(matrix_x,&
     707              :                   !        template=almo_scf_env%matrix_t(ispin))
     708              :                   !
     709              :                   !CALL dbcsr_init(matrix_tmp_no)
     710              :                   !CALL dbcsr_create(matrix_tmp_no,&
     711              :                   !        template=almo_scf_env%matrix_t(ispin))
     712              :                   !
     713              :                   !CALL dbcsr_copy(matrix_x,&
     714              :                   !        almo_scf_env%matrix_t_blk(ispin))
     715              :                   !CALL dbcsr_add(matrix_x,almo_scf_env%matrix_t(ispin),&
     716              :                   !        -1.0_dp,1.0_dp)
     717              : 
     718              :                   !CALL dbcsr_dot(matrix_x, almo_scf_env%matrix_err_xx(ispin),denergy)
     719              : 
     720              :                   !denergy=denergy*spin_factor
     721              : 
     722              :                   !IF (unit_nr>0) THEN
     723              :                   !   WRITE(unit_nr,*) "_ENERGY-0: ", almo_scf_env%almo_scf_energy
     724              :                   !   WRITE(unit_nr,*) "_ENERGY-D: ", denergy
     725              :                   !   WRITE(unit_nr,*) "_ENERGY-F: ", almo_scf_env%almo_scf_energy+denergy
     726              :                   !ENDIF
     727              :                   !! RZK-warning update will not work since the energy is overwritten almost immediately
     728              :                   !!CALL almo_scf_update_ks_energy(qs_env,&
     729              :                   !!        almo_scf_env%almo_scf_energy+denergy)
     730              :                   !!
     731              : 
     732              :                   !! print out the results of the decomposition analysis
     733              :                   !CALL dbcsr_hadamard_product(matrix_x,&
     734              :                   !        almo_scf_env%matrix_err_xx(ispin),&
     735              :                   !        matrix_tmp_no)
     736              :                   !CALL dbcsr_scale(matrix_tmp_no,spin_factor)
     737              :                   !CALL dbcsr_filter(matrix_tmp_no,almo_scf_env%eps_filter)
     738              :                   !
     739              :                   !IF (unit_nr>0) THEN
     740              :                   !   WRITE(unit_nr,*)
     741              :                   !   WRITE(unit_nr,'(T2,A)') "DECOMPOSITION OF THE DELOCALIZATION ENERGY"
     742              :                   !ENDIF
     743              : 
     744              :                   !mynode=dbcsr_mp_mynode(dbcsr_distribution_mp(&
     745              :                   !   dbcsr_distribution(matrix_tmp_no)))
     746              :                   !WRITE(mynodestr,'(I6.6)') mynode
     747              :                   !mylogfile='EDA.'//TRIM(ADJUSTL(mynodestr))
     748              :                   !OPEN (iunit,file=mylogfile,status='REPLACE')
     749              :                   !CALL print_block_sum(matrix_tmp_no,iunit)
     750              :                   !CLOSE(iunit)
     751              :                   !
     752              :                   !CALL dbcsr_release(matrix_tmp_no)
     753              :                   !CALL dbcsr_release(matrix_x)
     754              : 
     755              :                END IF ! iscf.eq.1
     756              : 
     757              :             END DO
     758              : 
     759              :             ! print out the energy lowering
     760            2 :             IF (iscf == 1) THEN
     761              :                CALL energy_lowering_report( &
     762              :                   unit_nr=unit_nr, &
     763              :                   ref_energy=almo_scf_env%almo_scf_energy, &
     764            2 :                   energy_lowering=denergy_tot)
     765              :                CALL almo_scf_update_ks_energy(qs_env, &
     766              :                                               energy=almo_scf_env%almo_scf_energy, &
     767            2 :                                               energy_singles_corr=denergy_tot)
     768              :             END IF
     769              : 
     770              :             ! compute the new KS matrix and new energy
     771            2 :             IF (.NOT. almo_scf_env%perturbative_delocalization) THEN
     772              : 
     773            0 :                IF (almo_scf_env%smear) THEN
     774            0 :                   kTS_sum = SUM(almo_scf_env%kTS)
     775              :                ELSE
     776            0 :                   kTS_sum = 0.0_dp
     777              :                END IF
     778              : 
     779              :                CALL almo_dm_to_almo_ks(qs_env, &
     780              :                                        almo_scf_env%matrix_p, &
     781              :                                        almo_scf_env%matrix_ks, &
     782              :                                        energy_new, &
     783              :                                        almo_scf_env%eps_filter, &
     784              :                                        almo_scf_env%mat_distr_aos, &
     785              :                                        smear=almo_scf_env%smear, &
     786            0 :                                        kTS_sum=kTS_sum)
     787              :             END IF
     788              : 
     789              :          END IF ! prepare_to_exit
     790              : 
     791            2 :          IF (almo_scf_env%perturbative_delocalization) THEN
     792              : 
     793              :             ! exit after the first step if we do not need the SCF procedure
     794            2 :             CALL almo_dm_to_qs_env(qs_env, almo_scf_env%matrix_p, almo_scf_env%mat_distr_aos)
     795            2 :             converged = .TRUE.
     796            2 :             prepare_to_exit = .TRUE.
     797              : 
     798              :          ELSE ! not a perturbative treatment
     799              : 
     800            0 :             energy_diff = energy_new - energy_old
     801            0 :             energy_old = energy_new
     802            0 :             almo_scf_env%almo_scf_energy = energy_new
     803              : 
     804            0 :             t2 = m_walltime()
     805              :             ! brief report on the current SCF loop
     806            0 :             IF (unit_nr > 0) THEN
     807            0 :                WRITE (unit_nr, '(T2,A,I6,F20.9,E11.3,E11.3,E11.3,F8.2)') "ALMO SCF", &
     808            0 :                   iscf, &
     809            0 :                   energy_new, energy_diff, error_norm, error_norm_0, t2 - t1
     810              :             END IF
     811            0 :             t1 = m_walltime()
     812              : 
     813              :          END IF
     814              : 
     815            2 :          IF (prepare_to_exit) EXIT
     816              : 
     817              :       END DO ! end scf cycle
     818              : 
     819              :       !! Print number of electrons recovered if smearing was requested
     820            2 :       IF (almo_scf_env%smear) THEN
     821            0 :          DO ispin = 1, nspin
     822            0 :             CALL dbcsr_dot(almo_scf_env%matrix_p(ispin), almo_scf_env%matrix_s(1), density_rec)
     823            0 :             IF (unit_nr > 0) THEN
     824            0 :                WRITE (unit_nr, '(T2,A20,F23.10)') "Electrons recovered:", density_rec
     825              :             END IF
     826              :          END DO
     827              :       END IF
     828              : 
     829            2 :       IF (.NOT. converged .AND. .NOT. optimizer%early_stopping_on) THEN
     830            0 :          CPABORT("SCF for ALMOs on overlapping domains not converged!")
     831              :       END IF
     832              : 
     833            4 :       DO ispin = 1, nspin
     834            2 :          CALL release_submatrices(submatrix_mixing_old_blk(:, ispin))
     835            4 :          CALL almo_scf_diis_release(diis_env=almo_diis(ispin))
     836              :       END DO
     837            4 :       DEALLOCATE (almo_diis)
     838           12 :       DEALLOCATE (submatrix_mixing_old_blk)
     839              : 
     840            2 :       CALL timestop(handle)
     841              : 
     842            2 :    END SUBROUTINE almo_scf_xalmo_eigensolver
     843              : 
     844              : ! **************************************************************************************************
     845              : !> \brief Optimization of ALMOs using PCG-like minimizers
     846              : !> \param qs_env ...
     847              : !> \param almo_scf_env ...
     848              : !> \param optimizer   controls the optimization algorithm
     849              : !> \param quench_t ...
     850              : !> \param matrix_t_in ...
     851              : !> \param matrix_t_out ...
     852              : !> \param assume_t0_q0x - since it is extremely difficult to converge the iterative
     853              : !>                        procedure using T as an optimized variable, assume
     854              : !>                        T = T_0 + (1-R_0)*X and optimize X
     855              : !>                        T_0 is assumed to be the zero-delocalization reference
     856              : !> \param perturbation_only - perturbative (do not update Hamiltonian)
     857              : !> \param special_case   to reduce the overhead special cases are implemented:
     858              : !>                       xalmo_case_normal - no special case (i.e. xALMOs)
     859              : !>                       xalmo_case_block_diag
     860              : !>                       xalmo_case_fully_deloc
     861              : !> \par History
     862              : !>       2011.11 created [Rustam Z Khaliullin]
     863              : !> \author Rustam Z Khaliullin
     864              : ! **************************************************************************************************
     865           86 :    SUBROUTINE almo_scf_xalmo_pcg(qs_env, almo_scf_env, optimizer, quench_t, &
     866              :                                  matrix_t_in, matrix_t_out, assume_t0_q0x, perturbation_only, &
     867              :                                  special_case)
     868              : 
     869              :       TYPE(qs_environment_type), POINTER                 :: qs_env
     870              :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
     871              :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
     872              :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
     873              :          INTENT(INOUT)                                   :: quench_t, matrix_t_in, matrix_t_out
     874              :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, perturbation_only
     875              :       INTEGER, INTENT(IN), OPTIONAL                      :: special_case
     876              : 
     877              :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_pcg'
     878              : 
     879              :       CHARACTER(LEN=20)                                  :: iter_type
     880              :       INTEGER :: cg_iteration, dim_op, fixed_line_search_niter, handle, idim0, ielem, ispin, &
     881              :          iteration, line_search_iteration, max_iter, my_special_case, ndomains, nmo, nspins, &
     882              :          outer_iteration, outer_max_iter, prec_type, reim, unit_nr
     883           86 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
     884              :       LOGICAL :: blissful_neglect, converged, just_started, line_search, normalize_orbitals, &
     885              :          optimize_theta, outer_prepare_to_exit, penalty_occ_local, penalty_occ_vol, &
     886              :          prepare_to_exit, reset_conjugator, skip_grad, use_guess
     887           86 :       REAL(dp), ALLOCATABLE, DIMENSION(:)                :: reim_diag, weights, z2
     888              :       REAL(kind=dp) :: appr_sec_der, beta, denom, denom2, e0, e1, energy_coeff, energy_diff, &
     889              :          energy_new, energy_old, eps_skip_gradients, fval, g0, g1, grad_norm, grad_norm_frob, &
     890              :          line_search_error, localiz_coeff, localization_obj_function, next_step_size_guess, &
     891              :          penalty_amplitude, penalty_func_new, spin_factor, step_size, t1, t2, tempreal
     892           86 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: grad_norm_spin, &
     893           86 :                                                             penalty_occ_vol_g_prefactor, &
     894           86 :                                                             penalty_occ_vol_h_prefactor
     895              :       TYPE(cell_type), POINTER                           :: cell
     896              :       TYPE(cp_logger_type), POINTER                      :: logger
     897           86 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: qs_matrix_s
     898           86 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set_almo, op_sm_set_qs
     899           86 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: FTsiginv, grad, m_sig_sqrti_ii, m_t_in_local, &
     900           86 :          m_theta, prec_vv, prev_grad, prev_minus_prec_grad, prev_step, siginvTFTsiginv, ST, step, &
     901           86 :          STsiginv_0, tempNOcc, tempNOcc_1, tempOccOcc
     902              :       TYPE(domain_submatrix_type), ALLOCATABLE, &
     903           86 :          DIMENSION(:, :)                                 :: bad_modes_projector_down, domain_r_down
     904              :       TYPE(mp_comm_type)                                 :: group
     905              : 
     906           86 :       CALL timeset(routineN, handle)
     907              : 
     908           86 :       my_special_case = xalmo_case_normal
     909           86 :       IF (PRESENT(special_case)) my_special_case = special_case
     910              : 
     911              :       ! get a useful output_unit
     912           86 :       logger => cp_get_default_logger()
     913           86 :       IF (logger%para_env%is_source()) THEN
     914           43 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
     915              :       ELSE
     916              :          unit_nr = -1
     917              :       END IF
     918              : 
     919           86 :       nspins = almo_scf_env%nspins
     920              : 
     921              :       ! if unprojected XALMOs are optimized
     922              :       ! then we must use the "blissful_neglect" procedure
     923           86 :       blissful_neglect = .FALSE.
     924           86 :       IF (my_special_case == xalmo_case_normal .AND. .NOT. assume_t0_q0x) THEN
     925           14 :          blissful_neglect = .TRUE.
     926              :       END IF
     927              : 
     928           86 :       IF (unit_nr > 0) THEN
     929           43 :          WRITE (unit_nr, *)
     930            2 :          SELECT CASE (my_special_case)
     931              :          CASE (xalmo_case_block_diag)
     932            2 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     933            4 :                " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
     934              :          CASE (xalmo_case_fully_deloc)
     935           22 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
     936           44 :                " Optimization of fully delocalized MOs ", REPEAT("-", 20)
     937              :          CASE (xalmo_case_normal)
     938           43 :             IF (blissful_neglect) THEN
     939            7 :                WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 25), &
     940           14 :                   " LCP optimization of XALMOs ", REPEAT("-", 26)
     941              :             ELSE
     942           12 :                WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 27), &
     943           24 :                   " Optimization of XALMOs ", REPEAT("-", 28)
     944              :             END IF
     945              :          END SELECT
     946           43 :          WRITE (unit_nr, *)
     947           43 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
     948           86 :             "Objective Function", "Change", "Convergence", "Time"
     949           43 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
     950              :       END IF
     951              : 
     952              :       ! set local parameters using developer's keywords
     953              :       ! RZK-warning: change to normal keywords later
     954           86 :       optimize_theta = almo_scf_env%logical05
     955           86 :       eps_skip_gradients = almo_scf_env%real01
     956              : 
     957              :       ! penalty amplitude adjusts the strength of volume conservation
     958           86 :       energy_coeff = 1.0_dp !optimizer%opt_penalty%energy_coeff
     959           86 :       localiz_coeff = 0.0_dp !optimizer%opt_penalty%occ_loc_coeff
     960           86 :       penalty_amplitude = 0.0_dp !optimizer%opt_penalty%occ_vol_coeff
     961           86 :       penalty_occ_vol = .FALSE. !( optimizer%opt_penalty%occ_vol_method &
     962              :       !/= penalty_type_none .AND. my_special_case == xalmo_case_fully_deloc )
     963           86 :       penalty_occ_local = .FALSE. !( optimizer%opt_penalty%occ_loc_method &
     964              :       !/= penalty_type_none .AND. my_special_case == xalmo_case_fully_deloc )
     965           86 :       normalize_orbitals = penalty_occ_vol .OR. penalty_occ_local
     966          258 :       ALLOCATE (penalty_occ_vol_g_prefactor(nspins))
     967          172 :       ALLOCATE (penalty_occ_vol_h_prefactor(nspins))
     968          172 :       penalty_occ_vol_g_prefactor(:) = 0.0_dp
     969          172 :       penalty_occ_vol_h_prefactor(:) = 0.0_dp
     970           86 :       penalty_func_new = 0.0_dp
     971              : 
     972              :       ! preconditioner control
     973           86 :       prec_type = optimizer%preconditioner
     974              : 
     975              :       ! control of the line search
     976           86 :       fixed_line_search_niter = 0 ! init to zero, change when eps is small enough
     977              : 
     978           86 :       IF (nspins == 1) THEN
     979           86 :          spin_factor = 2.0_dp
     980              :       ELSE
     981            0 :          spin_factor = 1.0_dp
     982              :       END IF
     983              : 
     984          172 :       ALLOCATE (grad_norm_spin(nspins))
     985          258 :       ALLOCATE (nocc(nspins))
     986              : 
     987              :       ! create a local copy of matrix_t_in because
     988              :       ! matrix_t_in and matrix_t_out can be the same matrix
     989              :       ! we need to make sure data in matrix_t_in is intact
     990              :       ! after we start writing to matrix_t_out
     991          344 :       ALLOCATE (m_t_in_local(nspins))
     992          172 :       DO ispin = 1, nspins
     993              :          CALL dbcsr_create(m_t_in_local(ispin), &
     994              :                            template=matrix_t_in(ispin), &
     995           86 :                            matrix_type=dbcsr_type_no_symmetry)
     996          172 :          CALL dbcsr_copy(m_t_in_local(ispin), matrix_t_in(ispin))
     997              :       END DO
     998              : 
     999              :       ! m_theta contains a set of variational parameters
    1000              :       ! that define one-electron orbitals (simple, projected, etc.)
    1001          258 :       ALLOCATE (m_theta(nspins))
    1002          172 :       DO ispin = 1, nspins
    1003              :          CALL dbcsr_create(m_theta(ispin), &
    1004              :                            template=matrix_t_out(ispin), &
    1005          172 :                            matrix_type=dbcsr_type_no_symmetry)
    1006              :       END DO
    1007              : 
    1008              :       ! Compute localization matrices
    1009              :       IF (penalty_occ_local) THEN
    1010              : 
    1011              :          CALL get_qs_env(qs_env=qs_env, &
    1012              :                          matrix_s=qs_matrix_s, &
    1013              :                          cell=cell)
    1014              : 
    1015              :          IF (cell%orthorhombic) THEN
    1016              :             dim_op = 3
    1017              :          ELSE
    1018              :             dim_op = 6
    1019              :          END IF
    1020              :          ALLOCATE (weights(6))
    1021              :          weights = 0.0_dp
    1022              : 
    1023              :          CALL initialize_weights(cell, weights)
    1024              : 
    1025              :          ALLOCATE (op_sm_set_qs(2, dim_op))
    1026              :          ALLOCATE (op_sm_set_almo(2, dim_op))
    1027              : 
    1028              :          DO idim0 = 1, dim_op
    1029              :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    1030              :                NULLIFY (op_sm_set_qs(reim, idim0)%matrix)
    1031              :                ALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    1032              :                CALL dbcsr_copy(op_sm_set_qs(reim, idim0)%matrix, qs_matrix_s(1)%matrix, &
    1033              :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    1034              :                CALL dbcsr_set(op_sm_set_qs(reim, idim0)%matrix, 0.0_dp)
    1035              :                NULLIFY (op_sm_set_almo(reim, idim0)%matrix)
    1036              :                ALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    1037              :                CALL dbcsr_copy(op_sm_set_almo(reim, idim0)%matrix, almo_scf_env%matrix_s(1), &
    1038              :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    1039              :                CALL dbcsr_set(op_sm_set_almo(reim, idim0)%matrix, 0.0_dp)
    1040              :             END DO
    1041              :          END DO
    1042              : 
    1043              :          CALL compute_berry_operator(qs_env, cell, op_sm_set_qs, dim_op)
    1044              : 
    1045              :          !CALL matrix_qs_to_almo(op_sm_set_qs, op_sm_set_almo, almo_scf_env%mat_distr_aos)
    1046              : 
    1047              :       END IF
    1048              : 
    1049              :       ! create initial guess from the initial orbitals
    1050              :       CALL xalmo_initial_guess(m_guess=m_theta, &
    1051              :                                m_t_in=m_t_in_local, &
    1052              :                                m_t0=almo_scf_env%matrix_t_blk, &
    1053              :                                m_quench_t=quench_t, &
    1054              :                                m_overlap=almo_scf_env%matrix_s(1), &
    1055              :                                m_sigma_tmpl=almo_scf_env%matrix_sigma_inv, &
    1056              :                                nspins=nspins, &
    1057              :                                xalmo_history=almo_scf_env%xalmo_history, &
    1058              :                                assume_t0_q0x=assume_t0_q0x, &
    1059              :                                optimize_theta=optimize_theta, &
    1060              :                                envelope_amplitude=almo_scf_env%envelope_amplitude, &
    1061              :                                eps_filter=almo_scf_env%eps_filter, &
    1062              :                                order_lanczos=almo_scf_env%order_lanczos, &
    1063              :                                eps_lanczos=almo_scf_env%eps_lanczos, &
    1064              :                                max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    1065           86 :                                nocc_of_domain=almo_scf_env%nocc_of_domain)
    1066              : 
    1067           86 :       ndomains = almo_scf_env%ndomains
    1068         1028 :       ALLOCATE (domain_r_down(ndomains, nspins))
    1069           86 :       CALL init_submatrices(domain_r_down)
    1070          942 :       ALLOCATE (bad_modes_projector_down(ndomains, nspins))
    1071           86 :       CALL init_submatrices(bad_modes_projector_down)
    1072              : 
    1073          258 :       ALLOCATE (prec_vv(nspins))
    1074          258 :       ALLOCATE (siginvTFTsiginv(nspins))
    1075          258 :       ALLOCATE (STsiginv_0(nspins))
    1076          258 :       ALLOCATE (FTsiginv(nspins))
    1077          258 :       ALLOCATE (ST(nspins))
    1078          258 :       ALLOCATE (prev_grad(nspins))
    1079          344 :       ALLOCATE (grad(nspins))
    1080          258 :       ALLOCATE (prev_step(nspins))
    1081          258 :       ALLOCATE (step(nspins))
    1082          258 :       ALLOCATE (prev_minus_prec_grad(nspins))
    1083          258 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    1084          258 :       ALLOCATE (tempNOcc(nspins))
    1085          258 :       ALLOCATE (tempNOcc_1(nspins))
    1086          258 :       ALLOCATE (tempOccOcc(nspins))
    1087          172 :       DO ispin = 1, nspins
    1088              : 
    1089              :          ! init temporary storage
    1090              :          CALL dbcsr_create(prec_vv(ispin), &
    1091              :                            template=almo_scf_env%matrix_ks(ispin), &
    1092           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1093              :          CALL dbcsr_create(siginvTFTsiginv(ispin), &
    1094              :                            template=almo_scf_env%matrix_sigma(ispin), &
    1095           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1096              :          CALL dbcsr_create(STsiginv_0(ispin), &
    1097              :                            template=matrix_t_out(ispin), &
    1098           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1099              :          CALL dbcsr_create(FTsiginv(ispin), &
    1100              :                            template=matrix_t_out(ispin), &
    1101           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1102              :          CALL dbcsr_create(ST(ispin), &
    1103              :                            template=matrix_t_out(ispin), &
    1104           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1105              :          CALL dbcsr_create(prev_grad(ispin), &
    1106              :                            template=matrix_t_out(ispin), &
    1107           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1108              :          CALL dbcsr_create(grad(ispin), &
    1109              :                            template=matrix_t_out(ispin), &
    1110           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1111              :          CALL dbcsr_create(prev_step(ispin), &
    1112              :                            template=matrix_t_out(ispin), &
    1113           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1114              :          CALL dbcsr_create(step(ispin), &
    1115              :                            template=matrix_t_out(ispin), &
    1116           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1117              :          CALL dbcsr_create(prev_minus_prec_grad(ispin), &
    1118              :                            template=matrix_t_out(ispin), &
    1119           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1120              :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    1121              :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    1122           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1123              :          CALL dbcsr_create(tempNOcc(ispin), &
    1124              :                            template=matrix_t_out(ispin), &
    1125           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1126              :          CALL dbcsr_create(tempNOcc_1(ispin), &
    1127              :                            template=matrix_t_out(ispin), &
    1128           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1129              :          CALL dbcsr_create(tempOccOcc(ispin), &
    1130              :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    1131           86 :                            matrix_type=dbcsr_type_no_symmetry)
    1132              : 
    1133           86 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    1134           86 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    1135              : 
    1136              :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
    1137           86 :                              nfullrows_total=nocc(ispin))
    1138              : 
    1139              :          ! invert S domains if necessary
    1140              :          ! Note: domains for alpha and beta electrons might be different
    1141              :          ! that is why the inversion of the AO overlap is inside the spin loop
    1142           86 :          IF (my_special_case == xalmo_case_normal) THEN
    1143              :             CALL construct_domain_s_inv( &
    1144              :                matrix_s=almo_scf_env%matrix_s(1), &
    1145              :                subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1146              :                dpattern=quench_t(ispin), &
    1147              :                map=almo_scf_env%domain_map(ispin), &
    1148           38 :                node_of_domain=almo_scf_env%cpu_of_domain)
    1149              : 
    1150              :             CALL construct_domain_s_sqrt( &
    1151              :                matrix_s=almo_scf_env%matrix_s(1), &
    1152              :                subm_s_sqrt=almo_scf_env%domain_s_sqrt(:, ispin), &
    1153              :                subm_s_sqrt_inv=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
    1154              :                dpattern=almo_scf_env%quench_t(ispin), &
    1155              :                map=almo_scf_env%domain_map(ispin), &
    1156           38 :                node_of_domain=almo_scf_env%cpu_of_domain)
    1157              : 
    1158              :          END IF
    1159              : 
    1160           86 :          IF (assume_t0_q0x) THEN
    1161              : 
    1162              :             ! save S.T_0.siginv_0
    1163           42 :             IF (my_special_case == xalmo_case_fully_deloc) THEN
    1164              :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1165              :                                    almo_scf_env%matrix_s(1), &
    1166              :                                    almo_scf_env%matrix_t_blk(ispin), &
    1167              :                                    0.0_dp, ST(ispin), &
    1168           18 :                                    filter_eps=almo_scf_env%eps_filter)
    1169              :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1170              :                                    ST(ispin), &
    1171              :                                    almo_scf_env%matrix_sigma_inv_0deloc(ispin), &
    1172              :                                    0.0_dp, STsiginv_0(ispin), &
    1173           18 :                                    filter_eps=almo_scf_env%eps_filter)
    1174              :             END IF
    1175              : 
    1176              :             ! construct domain-projector
    1177           42 :             IF (my_special_case == xalmo_case_normal) THEN
    1178              :                CALL construct_domain_r_down( &
    1179              :                   matrix_t=almo_scf_env%matrix_t_blk(ispin), &
    1180              :                   matrix_sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
    1181              :                   matrix_s=almo_scf_env%matrix_s(1), &
    1182              :                   subm_r_down=domain_r_down(:, ispin), &
    1183              :                   dpattern=quench_t(ispin), &
    1184              :                   map=almo_scf_env%domain_map(ispin), &
    1185              :                   node_of_domain=almo_scf_env%cpu_of_domain, &
    1186           24 :                   filter_eps=almo_scf_env%eps_filter)
    1187              :             END IF
    1188              : 
    1189              :          END IF ! assume_t0_q0x
    1190              : 
    1191              :          ! localization functional
    1192          172 :          IF (penalty_occ_local) THEN
    1193              : 
    1194              :             ! compute S.R0.B.R0.S
    1195              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1196              :                                 almo_scf_env%matrix_s(1), &
    1197              :                                 matrix_t_in(ispin), &
    1198              :                                 0.0_dp, tempNOcc(ispin), &
    1199            0 :                                 filter_eps=almo_scf_env%eps_filter)
    1200              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1201              :                                 tempNOcc(ispin), &
    1202              :                                 almo_scf_env%matrix_sigma_inv(ispin), &
    1203              :                                 0.0_dp, tempNOCC_1(ispin), &
    1204            0 :                                 filter_eps=almo_scf_env%eps_filter)
    1205              : 
    1206            0 :             DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    1207            0 :                DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    1208              : 
    1209              :                   CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, &
    1210            0 :                                          op_sm_set_almo(reim, idim0)%matrix, almo_scf_env%mat_distr_aos)
    1211              : 
    1212              :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1213              :                                       op_sm_set_almo(reim, idim0)%matrix, &
    1214              :                                       matrix_t_in(ispin), &
    1215              :                                       0.0_dp, tempNOcc(ispin), &
    1216            0 :                                       filter_eps=almo_scf_env%eps_filter)
    1217              : 
    1218              :                   CALL dbcsr_multiply("T", "N", 1.0_dp, &
    1219              :                                       matrix_t_in(ispin), &
    1220              :                                       tempNOcc(ispin), &
    1221              :                                       0.0_dp, tempOccOcc(ispin), &
    1222            0 :                                       filter_eps=almo_scf_env%eps_filter)
    1223              : 
    1224              :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1225              :                                       tempNOCC_1(ispin), &
    1226              :                                       tempOccOcc(ispin), &
    1227              :                                       0.0_dp, tempNOcc(ispin), &
    1228            0 :                                       filter_eps=almo_scf_env%eps_filter)
    1229              : 
    1230              :                   CALL dbcsr_multiply("N", "T", 1.0_dp, &
    1231              :                                       tempNOcc(ispin), &
    1232              :                                       tempNOcc_1(ispin), &
    1233              :                                       0.0_dp, op_sm_set_almo(reim, idim0)%matrix, &
    1234            0 :                                       filter_eps=almo_scf_env%eps_filter)
    1235              : 
    1236              :                END DO
    1237              :             END DO ! end loop over idim0
    1238              : 
    1239              :          END IF !penalty_occ_local
    1240              : 
    1241              :       END DO ! ispin
    1242              : 
    1243              :       ! start the outer SCF loop
    1244           86 :       outer_max_iter = optimizer%max_iter_outer_loop
    1245           86 :       outer_prepare_to_exit = .FALSE.
    1246           86 :       outer_iteration = 0
    1247           86 :       grad_norm = 0.0_dp
    1248           86 :       grad_norm_frob = 0.0_dp
    1249           86 :       use_guess = .FALSE.
    1250              : 
    1251              :       DO
    1252              : 
    1253              :          ! start the inner SCF loop
    1254           92 :          max_iter = optimizer%max_iter
    1255           92 :          prepare_to_exit = .FALSE.
    1256           92 :          line_search = .FALSE.
    1257           92 :          converged = .FALSE.
    1258           92 :          iteration = 0
    1259           92 :          cg_iteration = 0
    1260           92 :          line_search_iteration = 0
    1261              :          energy_new = 0.0_dp
    1262           92 :          energy_old = 0.0_dp
    1263           92 :          energy_diff = 0.0_dp
    1264              :          localization_obj_function = 0.0_dp
    1265           92 :          line_search_error = 0.0_dp
    1266              : 
    1267           92 :          t1 = m_walltime()
    1268              : 
    1269         1048 :          DO
    1270              : 
    1271         1048 :             just_started = (iteration == 0) .AND. (outer_iteration == 0)
    1272              : 
    1273              :             CALL main_var_to_xalmos_and_loss_func( &
    1274              :                almo_scf_env=almo_scf_env, &
    1275              :                qs_env=qs_env, &
    1276              :                m_main_var_in=m_theta, &
    1277              :                m_t_out=matrix_t_out, &
    1278              :                m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
    1279              :                energy_out=energy_new, &
    1280              :                penalty_out=penalty_func_new, &
    1281              :                m_FTsiginv_out=FTsiginv, &
    1282              :                m_siginvTFTsiginv_out=siginvTFTsiginv, &
    1283              :                m_ST_out=ST, &
    1284              :                m_STsiginv0_in=STsiginv_0, &
    1285              :                m_quench_t_in=quench_t, &
    1286              :                domain_r_down_in=domain_r_down, &
    1287              :                assume_t0_q0x=assume_t0_q0x, &
    1288              :                just_started=just_started, &
    1289              :                optimize_theta=optimize_theta, &
    1290              :                normalize_orbitals=normalize_orbitals, &
    1291              :                perturbation_only=perturbation_only, &
    1292              :                do_penalty=penalty_occ_vol, &
    1293         1048 :                special_case=my_special_case)
    1294         1048 :             IF (penalty_occ_vol) THEN
    1295              :                ! this is not pure energy anymore
    1296            0 :                energy_new = energy_new + penalty_func_new
    1297              :             END IF
    1298         2096 :             DO ispin = 1, nspins
    1299         2096 :                IF (penalty_occ_vol) THEN
    1300              :                   penalty_occ_vol_g_prefactor(ispin) = &
    1301            0 :                      -2.0_dp*penalty_amplitude*spin_factor*nocc(ispin)
    1302            0 :                   penalty_occ_vol_h_prefactor(ispin) = 0.0_dp
    1303              :                END IF
    1304              :             END DO
    1305              : 
    1306         1048 :             localization_obj_function = 0.0_dp
    1307              :             ! RZK-warning: This block must be combined with the loss function
    1308         1048 :             IF (penalty_occ_local) THEN
    1309            0 :                DO ispin = 1, nspins
    1310              : 
    1311              :                   ! LzL insert localization penalty
    1312            0 :                   localization_obj_function = 0.0_dp
    1313            0 :                   CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), nfullrows_total=nmo)
    1314            0 :                   ALLOCATE (z2(nmo))
    1315            0 :                   ALLOCATE (reim_diag(nmo))
    1316              : 
    1317            0 :                   CALL dbcsr_get_info(tempOccOcc(ispin), group=group)
    1318              : 
    1319            0 :                   DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    1320              : 
    1321            0 :                      z2(:) = 0.0_dp
    1322              : 
    1323            0 :                      DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    1324              : 
    1325              :                         !CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix,
    1326              :                         !                       op_sm_set_almo(reim, idim0)%matrix, &
    1327              :                         !                       almo_scf_env%mat_distr_aos)
    1328              :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    1329              :                                             op_sm_set_almo(reim, idim0)%matrix, &
    1330              :                                             matrix_t_out(ispin), &
    1331              :                                             0.0_dp, tempNOcc(ispin), &
    1332            0 :                                             filter_eps=almo_scf_env%eps_filter)
    1333              :                         !warning - save time by computing only the diagonal elements
    1334              :                         CALL dbcsr_multiply("T", "N", 1.0_dp, &
    1335              :                                             matrix_t_out(ispin), &
    1336              :                                             tempNOcc(ispin), &
    1337              :                                             0.0_dp, tempOccOcc(ispin), &
    1338            0 :                                             filter_eps=almo_scf_env%eps_filter)
    1339              : 
    1340            0 :                         reim_diag = 0.0_dp
    1341            0 :                         CALL dbcsr_get_diag(tempOccOcc(ispin), reim_diag)
    1342            0 :                         CALL group%sum(reim_diag)
    1343            0 :                         z2(:) = z2(:) + reim_diag(:)*reim_diag(:)
    1344              : 
    1345              :                      END DO
    1346              : 
    1347            0 :                      DO ielem = 1, nmo
    1348              :                         SELECT CASE (2) ! allows for selection of different spread functionals
    1349              :                         CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    1350            0 :                            fval = -weights(idim0)*LOG(ABS(z2(ielem)))
    1351              :                         CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    1352            0 :                            fval = weights(idim0) - weights(idim0)*ABS(z2(ielem))
    1353              :                         CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    1354              :                            fval = weights(idim0) - weights(idim0)*SQRT(ABS(z2(ielem)))
    1355              :                         END SELECT
    1356            0 :                         localization_obj_function = localization_obj_function + fval
    1357              :                      END DO
    1358              : 
    1359              :                   END DO ! end loop over idim0
    1360              : 
    1361            0 :                   DEALLOCATE (z2)
    1362            0 :                   DEALLOCATE (reim_diag)
    1363              : 
    1364            0 :                   energy_new = energy_new + localiz_coeff*localization_obj_function
    1365              : 
    1366              :                END DO ! ispin
    1367              :             END IF ! penalty_occ_local
    1368              : 
    1369         2096 :             DO ispin = 1, nspins
    1370              : 
    1371              :                IF (just_started .AND. almo_mathematica) THEN
    1372              :                   CPWARN_IF(ispin > 1, "Mathematica files will be overwritten")
    1373              :                   CALL print_mathematica_matrix(almo_scf_env%matrix_s(1), "matrixS.dat")
    1374              :                   CALL print_mathematica_matrix(almo_scf_env%matrix_ks(ispin), "matrixF.dat")
    1375              :                   CALL print_mathematica_matrix(matrix_t_out(ispin), "matrixT.dat")
    1376              :                   CALL print_mathematica_matrix(quench_t(ispin), "matrixQ.dat")
    1377              :                END IF
    1378              : 
    1379              :                ! save the previous gradient to compute beta
    1380              :                ! do it only if the previous grad was computed
    1381              :                ! for .NOT.line_search
    1382         1048 :                IF (line_search_iteration == 0 .AND. iteration /= 0) &
    1383         1542 :                   CALL dbcsr_copy(prev_grad(ispin), grad(ispin))
    1384              : 
    1385              :             END DO ! ispin
    1386              : 
    1387              :             ! compute the energy gradient if necessary
    1388              :             skip_grad = (iteration > 0 .AND. &
    1389              :                          fixed_line_search_niter /= 0 .AND. &
    1390         1048 :                          line_search_iteration /= fixed_line_search_niter)
    1391              : 
    1392              :             IF (.NOT. skip_grad) THEN
    1393              : 
    1394         2096 :                DO ispin = 1, nspins
    1395              : 
    1396              :                   CALL compute_gradient( &
    1397              :                      m_grad_out=grad(ispin), &
    1398              :                      m_ks=almo_scf_env%matrix_ks(ispin), &
    1399              :                      m_s=almo_scf_env%matrix_s(1), &
    1400              :                      m_t=matrix_t_out(ispin), &
    1401              :                      m_t0=almo_scf_env%matrix_t_blk(ispin), &
    1402              :                      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1403              :                      m_quench_t=quench_t(ispin), &
    1404              :                      m_FTsiginv=FTsiginv(ispin), &
    1405              :                      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1406              :                      m_ST=ST(ispin), &
    1407              :                      m_STsiginv0=STsiginv_0(ispin), &
    1408              :                      m_theta=m_theta(ispin), &
    1409              :                      m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    1410              :                      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1411              :                      domain_r_down=domain_r_down(:, ispin), &
    1412              :                      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1413              :                      domain_map=almo_scf_env%domain_map(ispin), &
    1414              :                      assume_t0_q0x=assume_t0_q0x, &
    1415              :                      optimize_theta=optimize_theta, &
    1416              :                      normalize_orbitals=normalize_orbitals, &
    1417              :                      penalty_occ_vol=penalty_occ_vol, &
    1418              :                      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1419              :                      envelope_amplitude=almo_scf_env%envelope_amplitude, &
    1420              :                      eps_filter=almo_scf_env%eps_filter, &
    1421              :                      spin_factor=spin_factor, &
    1422              :                      special_case=my_special_case, &
    1423              :                      penalty_occ_local=penalty_occ_local, &
    1424              :                      op_sm_set=op_sm_set_almo, &
    1425              :                      weights=weights, &
    1426              :                      energy_coeff=energy_coeff, &
    1427         2096 :                      localiz_coeff=localiz_coeff)
    1428              : 
    1429              :                END DO ! ispin
    1430              : 
    1431              :             END IF ! skip_grad
    1432              : 
    1433              :             ! if unprojected XALMOs are optimized then compute both
    1434              :             ! HessianInv/preconditioner and the "bad-mode" projector
    1435              : 
    1436         1048 :             IF (blissful_neglect) THEN
    1437          460 :                DO ispin = 1, nspins
    1438              :                   !compute the prec only for the first step,
    1439              :                   !but project the gradient every step
    1440          230 :                   IF (iteration == 0) THEN
    1441              :                      CALL compute_preconditioner( &
    1442              :                         domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    1443              :                         bad_modes_projector_down_out=bad_modes_projector_down(:, ispin), &
    1444              :                         m_prec_out=prec_vv(ispin), &
    1445              :                         m_ks=almo_scf_env%matrix_ks(ispin), &
    1446              :                         m_s=almo_scf_env%matrix_s(1), &
    1447              :                         m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1448              :                         m_quench_t=quench_t(ispin), &
    1449              :                         m_FTsiginv=FTsiginv(ispin), &
    1450              :                         m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1451              :                         m_ST=ST(ispin), &
    1452              :                         para_env=almo_scf_env%para_env, &
    1453              :                         blacs_env=almo_scf_env%blacs_env, &
    1454              :                         nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    1455              :                         domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1456              :                         domain_s_inv_half=almo_scf_env%domain_s_sqrt_inv(:, ispin), &
    1457              :                         domain_s_half=almo_scf_env%domain_s_sqrt(:, ispin), &
    1458              :                         domain_r_down=domain_r_down(:, ispin), &
    1459              :                         cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1460              :                         domain_map=almo_scf_env%domain_map(ispin), &
    1461              :                         assume_t0_q0x=assume_t0_q0x, &
    1462              :                         penalty_occ_vol=penalty_occ_vol, &
    1463              :                         penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1464              :                         eps_filter=almo_scf_env%eps_filter, &
    1465              :                         neg_thr=optimizer%neglect_threshold, &
    1466              :                         spin_factor=spin_factor, &
    1467              :                         skip_inversion=.FALSE., &
    1468           18 :                         special_case=my_special_case)
    1469              :                   END IF
    1470              :                   ! remove bad modes from the gradient
    1471              :                   CALL apply_domain_operators( &
    1472              :                      matrix_in=grad(ispin), &
    1473              :                      matrix_out=grad(ispin), &
    1474              :                      operator1=almo_scf_env%domain_s_inv(:, ispin), &
    1475              :                      operator2=bad_modes_projector_down(:, ispin), &
    1476              :                      dpattern=quench_t(ispin), &
    1477              :                      map=almo_scf_env%domain_map(ispin), &
    1478              :                      node_of_domain=almo_scf_env%cpu_of_domain, &
    1479              :                      my_action=1, &
    1480          460 :                      filter_eps=almo_scf_env%eps_filter)
    1481              : 
    1482              :                END DO ! ispin
    1483              : 
    1484              :             END IF ! blissful neglect
    1485              : 
    1486              :             ! check convergence and other exit criteria
    1487         2096 :             DO ispin = 1, nspins
    1488         2096 :                grad_norm_spin(ispin) = dbcsr_maxabs(grad(ispin))
    1489              :             END DO ! ispin
    1490         3144 :             grad_norm = MAXVAL(grad_norm_spin)
    1491              : 
    1492         1048 :             converged = (grad_norm <= optimizer%eps_error)
    1493         1048 :             IF (converged .OR. (iteration >= max_iter)) THEN
    1494           92 :                prepare_to_exit = .TRUE.
    1495              :             END IF
    1496              :             ! if early stopping is on do at least one iteration
    1497         1048 :             IF (optimizer%early_stopping_on .AND. just_started) &
    1498            0 :                prepare_to_exit = .FALSE.
    1499              : 
    1500              :             IF (grad_norm < almo_scf_env%eps_prev_guess) &
    1501         1048 :                use_guess = .TRUE.
    1502              : 
    1503              :             ! it is not time to exit just yet
    1504         1048 :             IF (.NOT. prepare_to_exit) THEN
    1505              : 
    1506              :                ! check the gradient along the step direction
    1507              :                ! and decide whether to switch to the line-search mode
    1508              :                ! do not do this in the first iteration
    1509          956 :                IF (iteration /= 0) THEN
    1510              : 
    1511          864 :                   IF (fixed_line_search_niter == 0) THEN
    1512              : 
    1513              :                      ! enforce at least one line search
    1514              :                      ! without even checking the error
    1515          864 :                      IF (.NOT. line_search) THEN
    1516              : 
    1517          422 :                         line_search = .TRUE.
    1518          422 :                         line_search_iteration = line_search_iteration + 1
    1519              : 
    1520              :                      ELSE
    1521              : 
    1522              :                         ! check the line-search error and decide whether to
    1523              :                         ! change the direction
    1524              :                         line_search_error = 0.0_dp
    1525              :                         denom = 0.0_dp
    1526              :                         denom2 = 0.0_dp
    1527              : 
    1528          884 :                         DO ispin = 1, nspins
    1529              : 
    1530          442 :                            CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1531          442 :                            line_search_error = line_search_error + tempreal
    1532          442 :                            CALL dbcsr_dot(grad(ispin), grad(ispin), tempreal)
    1533          442 :                            denom = denom + tempreal
    1534          442 :                            CALL dbcsr_dot(step(ispin), step(ispin), tempreal)
    1535          884 :                            denom2 = denom2 + tempreal
    1536              : 
    1537              :                         END DO ! ispin
    1538              : 
    1539              :                         ! cosine of the angle between the step and grad
    1540              :                         ! (must be close to zero at convergence)
    1541          442 :                         line_search_error = line_search_error/SQRT(denom)/SQRT(denom2)
    1542              : 
    1543          442 :                         IF (ABS(line_search_error) > optimizer%lin_search_eps_error) THEN
    1544           40 :                            line_search = .TRUE.
    1545           40 :                            line_search_iteration = line_search_iteration + 1
    1546              :                         ELSE
    1547          402 :                            line_search = .FALSE.
    1548          402 :                            line_search_iteration = 0
    1549          402 :                            IF (grad_norm < eps_skip_gradients) THEN
    1550            0 :                               fixed_line_search_niter = ABS(almo_scf_env%integer04)
    1551              :                            END IF
    1552              :                         END IF
    1553              : 
    1554              :                      END IF
    1555              : 
    1556              :                   ELSE ! decision for fixed_line_search_niter
    1557              : 
    1558            0 :                      IF (.NOT. line_search) THEN
    1559            0 :                         line_search = .TRUE.
    1560            0 :                         line_search_iteration = line_search_iteration + 1
    1561              :                      ELSE
    1562            0 :                         IF (line_search_iteration == fixed_line_search_niter) THEN
    1563            0 :                            line_search = .FALSE.
    1564              :                            line_search_iteration = 0
    1565            0 :                            line_search_iteration = line_search_iteration + 1
    1566              :                         END IF
    1567              :                      END IF
    1568              : 
    1569              :                   END IF ! fixed_line_search_niter fork
    1570              : 
    1571              :                END IF ! iteration.ne.0
    1572              : 
    1573          956 :                IF (line_search) THEN
    1574          462 :                   energy_diff = 0.0_dp
    1575              :                ELSE
    1576          494 :                   energy_diff = energy_new - energy_old
    1577          494 :                   energy_old = energy_new
    1578              :                END IF
    1579              : 
    1580              :                ! update the step direction
    1581          956 :                IF (.NOT. line_search) THEN
    1582              : 
    1583              :                   !IF (unit_nr>0) THEN
    1584              :                   !   WRITE(unit_nr,*) "....updating step direction...."
    1585              :                   !ENDIF
    1586              : 
    1587          988 :                   cg_iteration = cg_iteration + 1
    1588              : 
    1589              :                   ! save the previous step
    1590          988 :                   DO ispin = 1, nspins
    1591          988 :                      CALL dbcsr_copy(prev_step(ispin), step(ispin))
    1592              :                   END DO ! ispin
    1593              : 
    1594              :                   ! compute the new step (apply preconditioner if available)
    1595            0 :                   SELECT CASE (prec_type)
    1596              :                   CASE (xalmo_prec_full)
    1597              : 
    1598              :                      ! solving approximate Newton eq in the full (linearized) space
    1599              :                      CALL newton_grad_to_step( &
    1600              :                         optimizer=almo_scf_env%opt_xalmo_newton_pcg_solver, &
    1601              :                         m_grad=grad(:), &
    1602              :                         m_delta=step(:), &
    1603              :                         m_s=almo_scf_env%matrix_s(:), &
    1604              :                         m_ks=almo_scf_env%matrix_ks(:), &
    1605              :                         m_siginv=almo_scf_env%matrix_sigma_inv(:), &
    1606              :                         m_quench_t=quench_t(:), &
    1607              :                         m_FTsiginv=FTsiginv(:), &
    1608              :                         m_siginvTFTsiginv=siginvTFTsiginv(:), &
    1609              :                         m_ST=ST(:), &
    1610              :                         m_t=matrix_t_out(:), &
    1611              :                         m_sig_sqrti_ii=m_sig_sqrti_ii(:), &
    1612              :                         domain_s_inv=almo_scf_env%domain_s_inv(:, :), &
    1613              :                         domain_r_down=domain_r_down(:, :), &
    1614              :                         domain_map=almo_scf_env%domain_map(:), &
    1615              :                         cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1616              :                         nocc_of_domain=almo_scf_env%nocc_of_domain(:, :), &
    1617              :                         para_env=almo_scf_env%para_env, &
    1618              :                         blacs_env=almo_scf_env%blacs_env, &
    1619              :                         eps_filter=almo_scf_env%eps_filter, &
    1620              :                         optimize_theta=optimize_theta, &
    1621              :                         penalty_occ_vol=penalty_occ_vol, &
    1622              :                         normalize_orbitals=normalize_orbitals, &
    1623              :                         penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(:), &
    1624              :                         penalty_occ_vol_pf2=penalty_occ_vol_h_prefactor(:), &
    1625              :                         special_case=my_special_case &
    1626            0 :                         )
    1627              : 
    1628              :                   CASE (xalmo_prec_domain)
    1629              : 
    1630              :                      ! compute and invert preconditioner?
    1631          494 :                      IF (.NOT. blissful_neglect .AND. &
    1632              :                          ((just_started .AND. perturbation_only) .OR. &
    1633              :                           (iteration == 0 .AND. (.NOT. perturbation_only))) &
    1634              :                          ) THEN
    1635              : 
    1636              :                         ! computing preconditioner
    1637          148 :                         DO ispin = 1, nspins
    1638              :                            CALL compute_preconditioner( &
    1639              :                               domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    1640              :                               m_prec_out=prec_vv(ispin), &
    1641              :                               m_ks=almo_scf_env%matrix_ks(ispin), &
    1642              :                               m_s=almo_scf_env%matrix_s(1), &
    1643              :                               m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    1644              :                               m_quench_t=quench_t(ispin), &
    1645              :                               m_FTsiginv=FTsiginv(ispin), &
    1646              :                               m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    1647              :                               m_ST=ST(ispin), &
    1648              :                               para_env=almo_scf_env%para_env, &
    1649              :                               blacs_env=almo_scf_env%blacs_env, &
    1650              :                               nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    1651              :                               domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    1652              :                               domain_r_down=domain_r_down(:, ispin), &
    1653              :                               cpu_of_domain=almo_scf_env%cpu_of_domain, &
    1654              :                               domain_map=almo_scf_env%domain_map(ispin), &
    1655              :                               assume_t0_q0x=assume_t0_q0x, &
    1656              :                               penalty_occ_vol=penalty_occ_vol, &
    1657              :                               penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    1658              :                               eps_filter=almo_scf_env%eps_filter, &
    1659              :                               neg_thr=0.5_dp, &
    1660              :                               spin_factor=spin_factor, &
    1661              :                               skip_inversion=.FALSE., &
    1662          568 :                               special_case=my_special_case)
    1663              :                         END DO ! ispin
    1664              :                      END IF ! compute_prec
    1665              : 
    1666              :                      !IF (unit_nr>0) THEN
    1667              :                      !   WRITE(unit_nr,*) "....applying precomputed preconditioner...."
    1668              :                      !ENDIF
    1669              : 
    1670          494 :                      IF (my_special_case == xalmo_case_block_diag .OR. &
    1671              :                          my_special_case == xalmo_case_fully_deloc) THEN
    1672              : 
    1673          488 :                         DO ispin = 1, nspins
    1674              : 
    1675              :                            CALL dbcsr_multiply("N", "N", -1.0_dp, &
    1676              :                                                prec_vv(ispin), &
    1677              :                                                grad(ispin), &
    1678              :                                                0.0_dp, step(ispin), &
    1679          488 :                                                filter_eps=almo_scf_env%eps_filter)
    1680              : 
    1681              :                         END DO ! ispin
    1682              : 
    1683              :                      ELSE
    1684              : 
    1685              :                         !!! RZK-warning Currently for non-theta only
    1686          250 :                         IF (optimize_theta) THEN
    1687            0 :                            CPABORT("theta is NYI")
    1688              :                         END IF
    1689              : 
    1690          500 :                         DO ispin = 1, nspins
    1691              : 
    1692              :                            CALL apply_domain_operators( &
    1693              :                               matrix_in=grad(ispin), &
    1694              :                               matrix_out=step(ispin), &
    1695              :                               operator1=almo_scf_env%domain_preconditioner(:, ispin), &
    1696              :                               dpattern=quench_t(ispin), &
    1697              :                               map=almo_scf_env%domain_map(ispin), &
    1698              :                               node_of_domain=almo_scf_env%cpu_of_domain, &
    1699              :                               my_action=0, &
    1700          250 :                               filter_eps=almo_scf_env%eps_filter)
    1701          500 :                            CALL dbcsr_scale(step(ispin), -1.0_dp)
    1702              : 
    1703              :                            !CALL dbcsr_copy(m_tmp_no_3,&
    1704              :                            !        quench_t(ispin))
    1705              :                            !CALL inverse_of_elements(m_tmp_no_3)
    1706              :                            !CALL dbcsr_copy(m_tmp_no_2,step)
    1707              :                            !CALL dbcsr_hadamard_product(&
    1708              :                            !        m_tmp_no_2,&
    1709              :                            !        m_tmp_no_3,&
    1710              :                            !        step)
    1711              :                            !CALL dbcsr_copy(m_tmp_no_3,quench_t(ispin))
    1712              : 
    1713              :                         END DO ! ispin
    1714              : 
    1715              :                      END IF ! special case
    1716              : 
    1717              :                   CASE (xalmo_prec_zero)
    1718              : 
    1719              :                      ! no preconditioner
    1720          494 :                      DO ispin = 1, nspins
    1721              : 
    1722            0 :                         CALL dbcsr_copy(step(ispin), grad(ispin))
    1723            0 :                         CALL dbcsr_scale(step(ispin), -1.0_dp)
    1724              : 
    1725              :                      END DO ! ispin
    1726              : 
    1727              :                   END SELECT ! preconditioner type fork
    1728              : 
    1729              :                   ! check whether we need to reset conjugate directions
    1730          494 :                   IF (iteration == 0) THEN
    1731           92 :                      reset_conjugator = .TRUE.
    1732              :                   END IF
    1733              : 
    1734              :                   ! compute the conjugation coefficient - beta
    1735          494 :                   IF (.NOT. reset_conjugator) THEN
    1736              : 
    1737              :                      CALL compute_cg_beta( &
    1738              :                         beta=beta, &
    1739              :                         reset_conjugator=reset_conjugator, &
    1740              :                         conjugator=optimizer%conjugator, &
    1741              :                         grad=grad(:), &
    1742              :                         prev_grad=prev_grad(:), &
    1743              :                         step=step(:), &
    1744              :                         prev_step=prev_step(:), &
    1745              :                         prev_minus_prec_grad=prev_minus_prec_grad(:) &
    1746          402 :                         )
    1747              : 
    1748              :                   END IF
    1749              : 
    1750          494 :                   IF (reset_conjugator) THEN
    1751              : 
    1752           92 :                      beta = 0.0_dp
    1753           92 :                      IF (unit_nr > 0 .AND. (.NOT. just_started)) THEN
    1754            3 :                         WRITE (unit_nr, '(T2,A35)') "Re-setting conjugator to zero"
    1755              :                      END IF
    1756           92 :                      reset_conjugator = .FALSE.
    1757              : 
    1758              :                   END IF
    1759              : 
    1760              :                   ! save the preconditioned gradient (useful for beta)
    1761          988 :                   DO ispin = 1, nspins
    1762              : 
    1763          494 :                      CALL dbcsr_copy(prev_minus_prec_grad(ispin), step(ispin))
    1764              : 
    1765              :                      !IF (unit_nr>0) THEN
    1766              :                      !   WRITE(unit_nr,*) "....final beta....", beta
    1767              :                      !ENDIF
    1768              : 
    1769              :                      ! conjugate the step direction
    1770          988 :                      CALL dbcsr_add(step(ispin), prev_step(ispin), 1.0_dp, beta)
    1771              : 
    1772              :                   END DO ! ispin
    1773              : 
    1774              :                END IF ! update the step direction
    1775              : 
    1776              :                ! estimate the step size
    1777          956 :                IF (.NOT. line_search) THEN
    1778              :                   ! we just changed the direction and
    1779              :                   ! we have only E and grad from the current step
    1780              :                   ! it is not enouhg to compute step_size - just guess it
    1781          494 :                   e0 = energy_new
    1782          494 :                   g0 = 0.0_dp
    1783          988 :                   DO ispin = 1, nspins
    1784          494 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1785          988 :                      g0 = g0 + tempreal
    1786              :                   END DO ! ispin
    1787          494 :                   IF (iteration == 0) THEN
    1788           92 :                      step_size = optimizer%lin_search_step_size_guess
    1789              :                   ELSE
    1790          402 :                      IF (next_step_size_guess <= 0.0_dp) THEN
    1791            2 :                         step_size = optimizer%lin_search_step_size_guess
    1792              :                      ELSE
    1793              :                         ! take the last value
    1794          400 :                         step_size = next_step_size_guess*1.05_dp
    1795              :                      END IF
    1796              :                   END IF
    1797              :                   !IF (unit_nr > 0) THEN
    1798              :                   !   WRITE (unit_nr, '(A2,3F12.5)') &
    1799              :                   !      "EG", e0, g0, step_size
    1800              :                   !ENDIF
    1801          494 :                   next_step_size_guess = step_size
    1802              :                ELSE
    1803          462 :                   IF (fixed_line_search_niter == 0) THEN
    1804          462 :                      e1 = energy_new
    1805          462 :                      g1 = 0.0_dp
    1806          924 :                      DO ispin = 1, nspins
    1807          462 :                         CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    1808          924 :                         g1 = g1 + tempreal
    1809              :                      END DO ! ispin
    1810              :                      ! we have accumulated some points along this direction
    1811              :                      ! use only the most recent g0 (quadratic approximation)
    1812          462 :                      appr_sec_der = (g1 - g0)/step_size
    1813              :                      !IF (unit_nr > 0) THEN
    1814              :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    1815              :                      !      "EG", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    1816              :                      !ENDIF
    1817          462 :                      step_size = -g1/appr_sec_der
    1818          462 :                      e0 = e1
    1819          462 :                      g0 = g1
    1820              :                   ELSE
    1821              :                      ! use e0, g0 and e1 to compute g1 and make a step
    1822              :                      ! if the next iteration is also line_search
    1823              :                      ! use e1 and the calculated g1 as e0 and g0
    1824            0 :                      e1 = energy_new
    1825            0 :                      appr_sec_der = 2.0*((e1 - e0)/step_size - g0)/step_size
    1826            0 :                      g1 = appr_sec_der*step_size + g0
    1827              :                      !IF (unit_nr > 0) THEN
    1828              :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    1829              :                      !      "EG", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    1830              :                      !ENDIF
    1831              :                      !appr_sec_der=(g1-g0)/step_size
    1832            0 :                      step_size = -g1/appr_sec_der
    1833            0 :                      e0 = e1
    1834            0 :                      g0 = g1
    1835              :                   END IF
    1836          462 :                   next_step_size_guess = next_step_size_guess + step_size
    1837              :                END IF
    1838              : 
    1839              :                ! update theta
    1840         1912 :                DO ispin = 1, nspins
    1841         1912 :                   CALL dbcsr_add(m_theta(ispin), step(ispin), 1.0_dp, step_size)
    1842              :                END DO ! ispin
    1843              : 
    1844              :             END IF ! not.prepare_to_exit
    1845              : 
    1846         1048 :             IF (line_search) THEN
    1847          482 :                iter_type = "LS"
    1848              :             ELSE
    1849          566 :                iter_type = "CG"
    1850              :             END IF
    1851              : 
    1852         1048 :             t2 = m_walltime()
    1853         1048 :             IF (unit_nr > 0) THEN
    1854          524 :                iter_type = TRIM("ALMO SCF "//iter_type)
    1855              :                WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') &
    1856          524 :                   iter_type, iteration, &
    1857          524 :                   energy_new, energy_diff, grad_norm, &
    1858         1048 :                   t2 - t1
    1859          524 :                IF (penalty_occ_local .OR. penalty_occ_vol) THEN
    1860              :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1861            0 :                      "Energy component:", (energy_new - penalty_func_new - localization_obj_function)
    1862              :                END IF
    1863          524 :                IF (penalty_occ_local) THEN
    1864              :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1865            0 :                      "Localization component:", localization_obj_function
    1866              :                END IF
    1867          524 :                IF (penalty_occ_vol) THEN
    1868              :                   WRITE (unit_nr, '(T2,A25,F23.10)') &
    1869            0 :                      "Penalty component:", penalty_func_new
    1870              :                END IF
    1871              :             END IF
    1872              : 
    1873         1048 :             IF (my_special_case == xalmo_case_block_diag) THEN
    1874           46 :                IF (penalty_occ_vol) THEN
    1875            0 :                   almo_scf_env%almo_scf_energy = energy_new - penalty_func_new - localization_obj_function
    1876              :                ELSE
    1877           46 :                   almo_scf_env%almo_scf_energy = energy_new - localization_obj_function
    1878              :                END IF
    1879              :             END IF
    1880              : 
    1881         1048 :             t1 = m_walltime()
    1882              : 
    1883         1048 :             iteration = iteration + 1
    1884         1048 :             IF (prepare_to_exit) EXIT
    1885              : 
    1886              :          END DO ! inner SCF loop
    1887              : 
    1888           92 :          IF (converged .OR. (outer_iteration >= outer_max_iter)) THEN
    1889           86 :             outer_prepare_to_exit = .TRUE.
    1890              :          END IF
    1891              : 
    1892           92 :          outer_iteration = outer_iteration + 1
    1893           92 :          IF (outer_prepare_to_exit) EXIT
    1894              : 
    1895              :       END DO ! outer SCF loop
    1896              : 
    1897          172 :       DO ispin = 1, nspins
    1898           86 :          IF (converged .AND. almo_mathematica) THEN
    1899              :             CPWARN_IF(ispin > 1, "Mathematica files will be overwritten")
    1900              :             CALL print_mathematica_matrix(matrix_t_out(ispin), "matrixTf.dat")
    1901              :          END IF
    1902              :       END DO ! ispin
    1903              : 
    1904              :       ! post SCF-loop calculations
    1905           86 :       IF (converged) THEN
    1906              : 
    1907              :          CALL wrap_up_xalmo_scf( &
    1908              :             qs_env=qs_env, &
    1909              :             almo_scf_env=almo_scf_env, &
    1910              :             perturbation_in=perturbation_only, &
    1911              :             m_xalmo_in=matrix_t_out, &
    1912              :             m_quench_in=quench_t, &
    1913           86 :             energy_inout=energy_new)
    1914              : 
    1915              :       END IF ! if converged
    1916              : 
    1917          172 :       DO ispin = 1, nspins
    1918           86 :          CALL dbcsr_release(prec_vv(ispin))
    1919           86 :          CALL dbcsr_release(STsiginv_0(ispin))
    1920           86 :          CALL dbcsr_release(ST(ispin))
    1921           86 :          CALL dbcsr_release(FTsiginv(ispin))
    1922           86 :          CALL dbcsr_release(siginvTFTsiginv(ispin))
    1923           86 :          CALL dbcsr_release(prev_grad(ispin))
    1924           86 :          CALL dbcsr_release(prev_step(ispin))
    1925           86 :          CALL dbcsr_release(grad(ispin))
    1926           86 :          CALL dbcsr_release(step(ispin))
    1927           86 :          CALL dbcsr_release(prev_minus_prec_grad(ispin))
    1928           86 :          CALL dbcsr_release(m_theta(ispin))
    1929           86 :          CALL dbcsr_release(m_t_in_local(ispin))
    1930           86 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
    1931           86 :          CALL release_submatrices(domain_r_down(:, ispin))
    1932           86 :          CALL release_submatrices(bad_modes_projector_down(:, ispin))
    1933           86 :          CALL dbcsr_release(tempNOcc(ispin))
    1934           86 :          CALL dbcsr_release(tempNOcc_1(ispin))
    1935          172 :          CALL dbcsr_release(tempOccOcc(ispin))
    1936              :       END DO ! ispin
    1937              : 
    1938           86 :       DEALLOCATE (tempNOcc)
    1939           86 :       DEALLOCATE (tempNOcc_1)
    1940           86 :       DEALLOCATE (tempOccOcc)
    1941           86 :       DEALLOCATE (prec_vv)
    1942           86 :       DEALLOCATE (siginvTFTsiginv)
    1943           86 :       DEALLOCATE (STsiginv_0)
    1944           86 :       DEALLOCATE (FTsiginv)
    1945           86 :       DEALLOCATE (ST)
    1946           86 :       DEALLOCATE (prev_grad)
    1947           86 :       DEALLOCATE (grad)
    1948           86 :       DEALLOCATE (prev_step)
    1949           86 :       DEALLOCATE (step)
    1950           86 :       DEALLOCATE (prev_minus_prec_grad)
    1951           86 :       DEALLOCATE (m_sig_sqrti_ii)
    1952              : 
    1953          684 :       DEALLOCATE (domain_r_down)
    1954          684 :       DEALLOCATE (bad_modes_projector_down)
    1955              : 
    1956           86 :       DEALLOCATE (penalty_occ_vol_g_prefactor)
    1957           86 :       DEALLOCATE (penalty_occ_vol_h_prefactor)
    1958           86 :       DEALLOCATE (grad_norm_spin)
    1959           86 :       DEALLOCATE (nocc)
    1960              : 
    1961           86 :       DEALLOCATE (m_theta, m_t_in_local)
    1962           86 :       IF (penalty_occ_local) THEN
    1963            0 :          DO idim0 = 1, dim_op
    1964            0 :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    1965            0 :                DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    1966            0 :                DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    1967              :             END DO
    1968              :          END DO
    1969            0 :          DEALLOCATE (op_sm_set_qs)
    1970            0 :          DEALLOCATE (op_sm_set_almo)
    1971            0 :          DEALLOCATE (weights)
    1972              :       END IF
    1973              : 
    1974           86 :       IF (.NOT. converged .AND. .NOT. optimizer%early_stopping_on) THEN
    1975            0 :          CPABORT("Optimization not converged! ")
    1976              :       END IF
    1977              : 
    1978           86 :       CALL timestop(handle)
    1979              : 
    1980          172 :    END SUBROUTINE almo_scf_xalmo_pcg
    1981              : 
    1982              : ! **************************************************************************************************
    1983              : !> \brief Optimization of NLMOs using PCG minimizers
    1984              : !> \param qs_env ...
    1985              : !> \param optimizer   controls the optimization algorithm
    1986              : !> \param matrix_s - AO overlap (NAOs x NAOs)
    1987              : !> \param matrix_mo_in - initial MOs (NAOs x NMOs)
    1988              : !> \param matrix_mo_out - final MOs (NAOs x NMOs)
    1989              : !> \param template_matrix_sigma - template (NMOs x NMOs)
    1990              : !> \param overlap_determinant - the determinant of the MOs overlap
    1991              : !> \param mat_distr_aos - info on the distribution of AOs
    1992              : !> \param virtuals ...
    1993              : !> \param eps_filter ...
    1994              : !> \par History
    1995              : !>       2018.10 created [Rustam Z Khaliullin]
    1996              : !> \author Rustam Z Khaliullin
    1997              : ! **************************************************************************************************
    1998            8 :    SUBROUTINE almo_scf_construct_nlmos(qs_env, optimizer, &
    1999              :                                        matrix_s, matrix_mo_in, matrix_mo_out, &
    2000              :                                        template_matrix_sigma, overlap_determinant, &
    2001              :                                        mat_distr_aos, virtuals, eps_filter)
    2002              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    2003              :       TYPE(optimizer_options_type), INTENT(INOUT)        :: optimizer
    2004              :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_s
    2005              :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
    2006              :          INTENT(INOUT)                                   :: matrix_mo_in, matrix_mo_out
    2007              :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:), &
    2008              :          INTENT(IN)                                      :: template_matrix_sigma
    2009              :       REAL(KIND=dp), INTENT(INOUT)                       :: overlap_determinant
    2010              :       INTEGER, INTENT(IN)                                :: mat_distr_aos
    2011              :       LOGICAL, INTENT(IN)                                :: virtuals
    2012              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    2013              : 
    2014              :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_construct_nlmos'
    2015              : 
    2016              :       CHARACTER(LEN=30)                                  :: iter_type, print_string
    2017              :       INTEGER :: cg_iteration, dim_op, handle, iatom, idim0, isgf, ispin, iteration, &
    2018              :          line_search_iteration, linear_search_type, max_iter, natom, ncol, nspins, &
    2019              :          outer_iteration, outer_max_iter, prec_type, reim, unit_nr
    2020           16 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_sgf, last_sgf, nocc, nsgf
    2021              :       LOGICAL                                            :: converged, d_bfgs, just_started, l_bfgs, &
    2022              :                                                             line_search, outer_prepare_to_exit, &
    2023              :                                                             prepare_to_exit, reset_conjugator
    2024              :       REAL(KIND=dp) :: appr_sec_der, beta, bfgs_rho, bfgs_sum, denom, denom2, e0, e1, g0, g0sign, &
    2025              :          g1, g1sign, grad_norm, line_search_error, localization_obj_function, &
    2026              :          localization_obj_function_ispin, next_step_size_guess, obj_function_ispin, objf_diff, &
    2027              :          objf_new, objf_old, penalty_amplitude, penalty_func_ispin, penalty_func_new, spin_factor, &
    2028              :          step_size, t1, t2, tempreal
    2029            8 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: diagonal, grad_norm_spin, &
    2030            8 :                                                             penalty_vol_prefactor, &
    2031            8 :                                                             suggested_vol_penalty, weights
    2032              :       TYPE(cell_type), POINTER                           :: cell
    2033              :       TYPE(cp_logger_type), POINTER                      :: logger
    2034            8 :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: qs_matrix_s
    2035            8 :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set_almo, op_sm_set_qs
    2036            8 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: approx_inv_hessian, bfgs_s, bfgs_y, grad, &
    2037            8 :          m_S0, m_sig_sqrti_ii, m_siginv, m_sigma, m_t_mo_local, m_theta, m_theta_normalized, &
    2038            8 :          prev_grad, prev_m_theta, prev_minus_prec_grad, prev_step, step, tempNOcc1, tempOccOcc1, &
    2039            8 :          tempOccOcc2, tempOccOcc3
    2040            8 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:, :, :)  :: m_B0
    2041           24 :       TYPE(lbfgs_history_type)                           :: nlmo_lbfgs_history
    2042              :       TYPE(mp_comm_type)                                 :: group
    2043            8 :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
    2044            8 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
    2045              : 
    2046            8 :       CALL timeset(routineN, handle)
    2047              : 
    2048              :       ! get a useful output_unit
    2049            8 :       logger => cp_get_default_logger()
    2050            8 :       IF (logger%para_env%is_source()) THEN
    2051            4 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    2052              :       ELSE
    2053              :          unit_nr = -1
    2054              :       END IF
    2055              : 
    2056            8 :       nspins = SIZE(matrix_mo_in)
    2057              : 
    2058            8 :       IF (unit_nr > 0) THEN
    2059            4 :          WRITE (unit_nr, *)
    2060            4 :          IF (.NOT. virtuals) THEN
    2061            4 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 24), &
    2062            8 :                " Optimization of occupied NLMOs ", REPEAT("-", 23)
    2063              :          ELSE
    2064            0 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 24), &
    2065            0 :                " Optimization of virtual NLMOs ", REPEAT("-", 24)
    2066              :          END IF
    2067            4 :          WRITE (unit_nr, *)
    2068            4 :          WRITE (unit_nr, '(T2,A13,A6,A23,A14,A14,A9)') "Method", "Iter", &
    2069            8 :             "Objective Function", "Change", "Convergence", "Time"
    2070            4 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
    2071              :       END IF
    2072              : 
    2073            8 :       NULLIFY (particle_set)
    2074              : 
    2075              :       CALL get_qs_env(qs_env=qs_env, &
    2076              :                       matrix_s=qs_matrix_s, &
    2077              :                       cell=cell, &
    2078              :                       particle_set=particle_set, &
    2079            8 :                       qs_kind_set=qs_kind_set)
    2080              : 
    2081            8 :       natom = SIZE(particle_set, 1)
    2082           24 :       ALLOCATE (first_sgf(natom))
    2083           16 :       ALLOCATE (last_sgf(natom))
    2084           16 :       ALLOCATE (nsgf(natom))
    2085              :       !   construction of
    2086              :       CALL get_particle_set(particle_set, qs_kind_set, &
    2087            8 :                             first_sgf=first_sgf, last_sgf=last_sgf, nsgf=nsgf)
    2088              : 
    2089              :       ! m_theta contains a set of variational parameters
    2090              :       ! that define one-electron orbitals
    2091           32 :       ALLOCATE (m_theta(nspins))
    2092           16 :       DO ispin = 1, nspins
    2093              :          CALL dbcsr_create(m_theta(ispin), &
    2094              :                            template=template_matrix_sigma(ispin), &
    2095            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2096              :          ! create initial guess for the main variable - identity matrix
    2097            8 :          CALL dbcsr_set(m_theta(ispin), 0.0_dp)
    2098           16 :          CALL dbcsr_add_on_diag(m_theta(ispin), 1.0_dp)
    2099              :       END DO
    2100              : 
    2101            8 :       SELECT CASE (optimizer%opt_penalty%operator_type)
    2102              :       CASE (op_loc_berry)
    2103              : 
    2104            0 :          IF (cell%orthorhombic) THEN
    2105            0 :             dim_op = 3
    2106              :          ELSE
    2107            0 :             dim_op = 6
    2108              :          END IF
    2109            0 :          ALLOCATE (weights(6))
    2110            0 :          weights = 0.0_dp
    2111            0 :          CALL initialize_weights(cell, weights)
    2112            0 :          ALLOCATE (op_sm_set_qs(2, dim_op))
    2113            0 :          ALLOCATE (op_sm_set_almo(2, dim_op))
    2114              :          ! allocate space for T0^t.B.T0
    2115            0 :          ALLOCATE (m_B0(2, dim_op, nspins))
    2116            0 :          DO idim0 = 1, dim_op
    2117            0 :             DO reim = 1, SIZE(op_sm_set_qs, 1)
    2118            0 :                NULLIFY (op_sm_set_qs(reim, idim0)%matrix, op_sm_set_almo(reim, idim0)%matrix)
    2119            0 :                ALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2120            0 :                ALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2121              :                CALL dbcsr_copy(op_sm_set_qs(reim, idim0)%matrix, qs_matrix_s(1)%matrix, &
    2122            0 :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    2123            0 :                CALL dbcsr_set(op_sm_set_qs(reim, idim0)%matrix, 0.0_dp)
    2124              :                CALL dbcsr_copy(op_sm_set_almo(reim, idim0)%matrix, matrix_s, &
    2125            0 :                              name="almo_scf_env%op_sm_"//TRIM(ADJUSTL(cp_to_string(reim)))//"-"//TRIM(ADJUSTL(cp_to_string(idim0))))
    2126            0 :                CALL dbcsr_set(op_sm_set_almo(reim, idim0)%matrix, 0.0_dp)
    2127            0 :                DO ispin = 1, nspins
    2128              :                   CALL dbcsr_create(m_B0(reim, idim0, ispin), &
    2129              :                                     template=m_theta(ispin), &
    2130            0 :                                     matrix_type=dbcsr_type_no_symmetry)
    2131            0 :                   CALL dbcsr_set(m_B0(reim, idim0, ispin), 0.0_dp)
    2132              :                END DO
    2133              :             END DO
    2134              :          END DO
    2135              : 
    2136            0 :          CALL compute_berry_operator(qs_env, cell, op_sm_set_qs, dim_op)
    2137              : 
    2138              :       CASE (op_loc_pipek)
    2139              : 
    2140            8 :          dim_op = natom
    2141           24 :          ALLOCATE (weights(dim_op))
    2142           80 :          weights = 1.0_dp
    2143              : 
    2144          184 :          ALLOCATE (m_B0(1, dim_op, nspins))
    2145              :          !m_B0 first dim is 1 now!
    2146           88 :          DO idim0 = 1, dim_op
    2147          152 :             DO reim = 1, 1 !SIZE(op_sm_set_qs, 1)
    2148          216 :                DO ispin = 1, nspins
    2149              :                   CALL dbcsr_create(m_B0(reim, idim0, ispin), &
    2150              :                                     template=m_theta(ispin), &
    2151           72 :                                     matrix_type=dbcsr_type_no_symmetry)
    2152          144 :                   CALL dbcsr_set(m_B0(reim, idim0, ispin), 0.0_dp)
    2153              :                END DO
    2154              :             END DO
    2155              :          END DO
    2156              : 
    2157              :       END SELECT
    2158              : 
    2159              :       ! penalty amplitude adjusts the strenght of volume conservation
    2160            8 :       penalty_amplitude = optimizer%opt_penalty%penalty_strength
    2161              :       !penalty_occ_vol = ( optimizer%opt_penalty%occ_vol_method /= penalty_type_none )
    2162              :       !penalty_local = ( optimizer%opt_penalty%occ_loc_method /= penalty_type_none )
    2163              : 
    2164              :       ! preconditioner control
    2165            8 :       prec_type = optimizer%preconditioner
    2166              : 
    2167              :       ! use diagonal BFGS if preconditioner is set
    2168            8 :       d_bfgs = .FALSE.
    2169            8 :       l_bfgs = .FALSE.
    2170            8 :       IF (prec_type /= xalmo_prec_zero) l_bfgs = .TRUE.
    2171            8 :       IF (l_bfgs .AND. (optimizer%conjugator /= cg_zero)) THEN
    2172            0 :          CPABORT("Cannot use conjugators with BFGS")
    2173              :       END IF
    2174            8 :       IF (l_bfgs) THEN
    2175            8 :          CALL lbfgs_create(nlmo_lbfgs_history, nspins, nstore=10)
    2176              :       END IF
    2177              : 
    2178              :       IF (nspins == 1) THEN
    2179              :          spin_factor = 2.0_dp
    2180              :       ELSE
    2181              :          spin_factor = 1.0_dp
    2182              :       END IF
    2183              : 
    2184           24 :       ALLOCATE (grad_norm_spin(nspins))
    2185           24 :       ALLOCATE (nocc(nspins))
    2186           16 :       ALLOCATE (penalty_vol_prefactor(nspins))
    2187           16 :       ALLOCATE (suggested_vol_penalty(nspins))
    2188              : 
    2189              :       ! create a local copy of matrix_mo_in because
    2190              :       ! matrix_mo_in and matrix_mo_out can be the same matrix
    2191              :       ! we need to make sure data in matrix_mo_in is intact
    2192              :       ! after we start writing to matrix_mo_out
    2193           24 :       ALLOCATE (m_t_mo_local(nspins))
    2194           16 :       DO ispin = 1, nspins
    2195              :          CALL dbcsr_create(m_t_mo_local(ispin), &
    2196              :                            template=matrix_mo_in(ispin), &
    2197            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2198           16 :          CALL dbcsr_copy(m_t_mo_local(ispin), matrix_mo_in(ispin))
    2199              :       END DO
    2200              : 
    2201           24 :       ALLOCATE (approx_inv_hessian(nspins))
    2202           24 :       ALLOCATE (m_theta_normalized(nspins))
    2203           32 :       ALLOCATE (prev_m_theta(nspins))
    2204           24 :       ALLOCATE (m_S0(nspins))
    2205           24 :       ALLOCATE (prev_grad(nspins))
    2206           24 :       ALLOCATE (grad(nspins))
    2207           24 :       ALLOCATE (prev_step(nspins))
    2208           24 :       ALLOCATE (step(nspins))
    2209           24 :       ALLOCATE (prev_minus_prec_grad(nspins))
    2210           24 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    2211           24 :       ALLOCATE (m_sigma(nspins))
    2212           24 :       ALLOCATE (m_siginv(nspins))
    2213           32 :       ALLOCATE (tempNOcc1(nspins))
    2214           24 :       ALLOCATE (tempOccOcc1(nspins))
    2215           24 :       ALLOCATE (tempOccOcc2(nspins))
    2216           24 :       ALLOCATE (tempOccOcc3(nspins))
    2217           24 :       ALLOCATE (bfgs_y(nspins))
    2218           24 :       ALLOCATE (bfgs_s(nspins))
    2219              : 
    2220           16 :       DO ispin = 1, nspins
    2221              : 
    2222              :          ! init temporary storage
    2223              :          CALL dbcsr_create(tempNOcc1(ispin), &
    2224              :                            template=matrix_mo_out(ispin), &
    2225            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2226              :          CALL dbcsr_create(approx_inv_hessian(ispin), &
    2227              :                            template=m_theta(ispin), &
    2228            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2229              :          CALL dbcsr_create(m_theta_normalized(ispin), &
    2230              :                            template=m_theta(ispin), &
    2231            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2232              :          CALL dbcsr_create(prev_m_theta(ispin), &
    2233              :                            template=m_theta(ispin), &
    2234            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2235              :          CALL dbcsr_create(m_S0(ispin), &
    2236              :                            template=m_theta(ispin), &
    2237            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2238              :          CALL dbcsr_create(prev_grad(ispin), &
    2239              :                            template=m_theta(ispin), &
    2240            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2241              :          CALL dbcsr_create(grad(ispin), &
    2242              :                            template=m_theta(ispin), &
    2243            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2244              :          CALL dbcsr_create(prev_step(ispin), &
    2245              :                            template=m_theta(ispin), &
    2246            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2247              :          CALL dbcsr_create(step(ispin), &
    2248              :                            template=m_theta(ispin), &
    2249            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2250              :          CALL dbcsr_create(prev_minus_prec_grad(ispin), &
    2251              :                            template=m_theta(ispin), &
    2252            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2253              :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    2254              :                            template=m_theta(ispin), &
    2255            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2256              :          CALL dbcsr_create(m_sigma(ispin), &
    2257              :                            template=m_theta(ispin), &
    2258            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2259              :          CALL dbcsr_create(m_siginv(ispin), &
    2260              :                            template=m_theta(ispin), &
    2261            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2262              :          CALL dbcsr_create(tempOccOcc1(ispin), &
    2263              :                            template=m_theta(ispin), &
    2264            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2265              :          CALL dbcsr_create(tempOccOcc2(ispin), &
    2266              :                            template=m_theta(ispin), &
    2267            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2268              :          CALL dbcsr_create(tempOccOcc3(ispin), &
    2269              :                            template=m_theta(ispin), &
    2270            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2271              :          CALL dbcsr_create(bfgs_s(ispin), &
    2272              :                            template=m_theta(ispin), &
    2273            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2274              :          CALL dbcsr_create(bfgs_y(ispin), &
    2275              :                            template=m_theta(ispin), &
    2276            8 :                            matrix_type=dbcsr_type_no_symmetry)
    2277              : 
    2278            8 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    2279            8 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    2280              : 
    2281              :          CALL dbcsr_get_info(template_matrix_sigma(ispin), &
    2282            8 :                              nfullrows_total=nocc(ispin))
    2283              : 
    2284            8 :          penalty_vol_prefactor(ispin) = -penalty_amplitude !KEEP: * spin_factor * nocc(ispin)
    2285              : 
    2286              :          ! compute m_S0=T0^t.S.T0
    2287              :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2288              :                              matrix_s, &
    2289              :                              m_t_mo_local(ispin), &
    2290              :                              0.0_dp, tempNOcc1(ispin), &
    2291            8 :                              filter_eps=eps_filter)
    2292              :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2293              :                              m_t_mo_local(ispin), &
    2294              :                              tempNOcc1(ispin), &
    2295              :                              0.0_dp, m_S0(ispin), &
    2296            8 :                              filter_eps=eps_filter)
    2297              : 
    2298            8 :          SELECT CASE (optimizer%opt_penalty%operator_type)
    2299              : 
    2300              :          CASE (op_loc_berry)
    2301              : 
    2302              :             ! compute m_B0=T0^t.B.T0
    2303            0 :             DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    2304              : 
    2305            0 :                DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    2306              : 
    2307              :                   CALL matrix_qs_to_almo(op_sm_set_qs(reim, idim0)%matrix, &
    2308            0 :                                          op_sm_set_almo(reim, idim0)%matrix, mat_distr_aos)
    2309              : 
    2310              :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2311              :                                       op_sm_set_almo(reim, idim0)%matrix, &
    2312              :                                       m_t_mo_local(ispin), &
    2313              :                                       0.0_dp, tempNOcc1(ispin), &
    2314            0 :                                       filter_eps=eps_filter)
    2315              : 
    2316              :                   CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2317              :                                       m_t_mo_local(ispin), &
    2318              :                                       tempNOcc1(ispin), &
    2319              :                                       0.0_dp, m_B0(reim, idim0, ispin), &
    2320            0 :                                       filter_eps=eps_filter)
    2321              : 
    2322            0 :                   DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2323            0 :                   DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2324              : 
    2325              :                END DO
    2326              : 
    2327              :             END DO ! end loop over idim0
    2328              : 
    2329              :          CASE (op_loc_pipek)
    2330              : 
    2331              :             ! compute m_B0=T0^t.B.T0
    2332           80 :             DO iatom = 1, natom ! this loop is over "miller" ind
    2333              : 
    2334           72 :                isgf = first_sgf(iatom)
    2335           72 :                ncol = nsgf(iatom)
    2336              : 
    2337              :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2338              :                                    matrix_s, &
    2339              :                                    m_t_mo_local(ispin), &
    2340              :                                    0.0_dp, tempNOcc1(ispin), &
    2341           72 :                                    filter_eps=eps_filter)
    2342              : 
    2343              :                CALL dbcsr_multiply("T", "N", 0.5_dp, &
    2344              :                                    m_t_mo_local(ispin), &
    2345              :                                    tempNOcc1(ispin), &
    2346              :                                    0.0_dp, m_B0(1, iatom, ispin), &
    2347              :                                    first_k=isgf, last_k=isgf + ncol - 1, &
    2348           72 :                                    filter_eps=eps_filter)
    2349              : 
    2350              :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2351              :                                    matrix_s, &
    2352              :                                    m_t_mo_local(ispin), &
    2353              :                                    0.0_dp, tempNOcc1(ispin), &
    2354              :                                    first_k=isgf, last_k=isgf + ncol - 1, &
    2355           72 :                                    filter_eps=eps_filter)
    2356              : 
    2357              :                CALL dbcsr_multiply("T", "N", 0.5_dp, &
    2358              :                                    m_t_mo_local(ispin), &
    2359              :                                    tempNOcc1(ispin), &
    2360              :                                    1.0_dp, m_B0(1, iatom, ispin), &
    2361           80 :                                    filter_eps=eps_filter)
    2362              : 
    2363              :             END DO ! end loop over iatom
    2364              : 
    2365              :          END SELECT
    2366              : 
    2367              :       END DO ! ispin
    2368              : 
    2369            8 :       IF (optimizer%opt_penalty%operator_type == op_loc_berry) THEN
    2370            0 :          DO idim0 = 1, SIZE(op_sm_set_qs, 2) ! this loop is over miller ind
    2371            0 :             DO reim = 1, SIZE(op_sm_set_qs, 1) ! this loop is over Re/Im
    2372            0 :                DEALLOCATE (op_sm_set_qs(reim, idim0)%matrix)
    2373            0 :                DEALLOCATE (op_sm_set_almo(reim, idim0)%matrix)
    2374              :             END DO
    2375              :          END DO
    2376            0 :          DEALLOCATE (op_sm_set_qs, op_sm_set_almo)
    2377              :       END IF
    2378              : 
    2379              :       ! start the outer SCF loop
    2380            8 :       outer_max_iter = optimizer%max_iter_outer_loop
    2381            8 :       outer_prepare_to_exit = .FALSE.
    2382            8 :       outer_iteration = 0
    2383            8 :       grad_norm = 0.0_dp
    2384            8 :       penalty_func_new = 0.0_dp
    2385            8 :       linear_search_type = 1 ! safe restart, no quadratic assumption, takes more steps
    2386              :       localization_obj_function = 0.0_dp
    2387              :       penalty_func_new = 0.0_dp
    2388              : 
    2389              :       DO
    2390              : 
    2391              :          ! start the inner SCF loop
    2392            8 :          max_iter = optimizer%max_iter
    2393            8 :          prepare_to_exit = .FALSE.
    2394            8 :          line_search = .FALSE.
    2395            8 :          converged = .FALSE.
    2396            8 :          iteration = 0
    2397            8 :          cg_iteration = 0
    2398            8 :          line_search_iteration = 0
    2399            8 :          obj_function_ispin = 0.0_dp
    2400            8 :          objf_new = 0.0_dp
    2401            8 :          objf_old = 0.0_dp
    2402            8 :          objf_diff = 0.0_dp
    2403            8 :          line_search_error = 0.0_dp
    2404            8 :          t1 = m_walltime()
    2405            8 :          next_step_size_guess = 0.0_dp
    2406              : 
    2407              :          DO
    2408              : 
    2409           82 :             just_started = (iteration == 0) .AND. (outer_iteration == 0)
    2410              : 
    2411          164 :             DO ispin = 1, nspins
    2412              : 
    2413           82 :                CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), group=group)
    2414              : 
    2415              :                ! compute diagonal (a^t.sigma0.a)^(-1/2)
    2416              :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2417              :                                    m_S0(ispin), m_theta(ispin), 0.0_dp, &
    2418              :                                    tempOccOcc1(ispin), &
    2419           82 :                                    filter_eps=eps_filter)
    2420           82 :                CALL dbcsr_set(m_sig_sqrti_ii(ispin), 0.0_dp)
    2421           82 :                CALL dbcsr_add_on_diag(m_sig_sqrti_ii(ispin), 1.0_dp)
    2422              :                CALL dbcsr_multiply("T", "N", 1.0_dp, &
    2423              :                                    m_theta(ispin), tempOccOcc1(ispin), 0.0_dp, &
    2424              :                                    m_sig_sqrti_ii(ispin), &
    2425           82 :                                    retain_sparsity=.TRUE.)
    2426          246 :                ALLOCATE (diagonal(nocc(ispin)))
    2427           82 :                CALL dbcsr_get_diag(m_sig_sqrti_ii(ispin), diagonal)
    2428           82 :                CALL group%sum(diagonal)
    2429              :                ! TODO: works for zero diagonal elements?
    2430         1368 :                diagonal(:) = 1.0_dp/SQRT(diagonal(:))
    2431           82 :                CALL dbcsr_set(m_sig_sqrti_ii(ispin), 0.0_dp)
    2432           82 :                CALL dbcsr_set_diag(m_sig_sqrti_ii(ispin), diagonal)
    2433           82 :                DEALLOCATE (diagonal)
    2434              : 
    2435              :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2436              :                                    m_theta(ispin), &
    2437              :                                    m_sig_sqrti_ii(ispin), &
    2438              :                                    0.0_dp, m_theta_normalized(ispin), &
    2439           82 :                                    filter_eps=eps_filter)
    2440              : 
    2441              :                ! compute new orbitals
    2442              :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    2443              :                                    m_t_mo_local(ispin), &
    2444              :                                    m_theta_normalized(ispin), &
    2445              :                                    0.0_dp, matrix_mo_out(ispin), &
    2446          246 :                                    filter_eps=eps_filter)
    2447              : 
    2448              :             END DO
    2449              : 
    2450              :             ! compute objective function
    2451           82 :             localization_obj_function = 0.0_dp
    2452           82 :             penalty_func_new = 0.0_dp
    2453          164 :             DO ispin = 1, nspins
    2454              : 
    2455              :                CALL compute_obj_nlmos( &
    2456              :                   !obj_function_ispin=obj_function_ispin, &
    2457              :                   localization_obj_function_ispin=localization_obj_function_ispin, &
    2458              :                   penalty_func_ispin=penalty_func_ispin, &
    2459              :                   overlap_determinant=overlap_determinant, &
    2460              :                   m_sigma=m_sigma(ispin), &
    2461              :                   nocc=nocc(ispin), &
    2462              :                   m_B0=m_B0(:, :, ispin), &
    2463              :                   m_theta_normalized=m_theta_normalized(ispin), &
    2464              :                   template_matrix_mo=matrix_mo_out(ispin), &
    2465              :                   weights=weights, &
    2466              :                   m_S0=m_S0(ispin), &
    2467              :                   just_started=just_started, &
    2468              :                   penalty_vol_prefactor=penalty_vol_prefactor(ispin), &
    2469              :                   penalty_amplitude=penalty_amplitude, &
    2470           82 :                   eps_filter=eps_filter)
    2471              : 
    2472           82 :                localization_obj_function = localization_obj_function + localization_obj_function_ispin
    2473          164 :                penalty_func_new = penalty_func_new + penalty_func_ispin
    2474              : 
    2475              :             END DO ! ispin
    2476           82 :             objf_new = penalty_func_new + localization_obj_function
    2477              : 
    2478          164 :             DO ispin = 1, nspins
    2479              :                ! save the previous gradient to compute beta
    2480              :                ! do it only if the previous grad was computed
    2481              :                ! for .NOT.line_search
    2482          164 :                IF (line_search_iteration == 0 .AND. iteration /= 0) THEN
    2483           30 :                   CALL dbcsr_copy(prev_grad(ispin), grad(ispin))
    2484              :                END IF
    2485              : 
    2486              :             END DO ! ispin
    2487              : 
    2488              :             ! compute the gradient
    2489          164 :             DO ispin = 1, nspins
    2490              : 
    2491              :                CALL invert_Hotelling( &
    2492              :                   matrix_inverse=m_siginv(ispin), &
    2493              :                   matrix=m_sigma(ispin), &
    2494              :                   threshold=eps_filter*10.0_dp, &
    2495              :                   filter_eps=eps_filter, &
    2496           82 :                   silent=.FALSE.)
    2497              : 
    2498              :                CALL compute_gradient_nlmos( &
    2499              :                   m_grad_out=grad(ispin), &
    2500              :                   m_B0=m_B0(:, :, ispin), &
    2501              :                   weights=weights, &
    2502              :                   m_S0=m_S0(ispin), &
    2503              :                   m_theta_normalized=m_theta_normalized(ispin), &
    2504              :                   m_siginv=m_siginv(ispin), &
    2505              :                   m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    2506              :                   penalty_vol_prefactor=penalty_vol_prefactor(ispin), &
    2507              :                   eps_filter=eps_filter, &
    2508          164 :                   suggested_vol_penalty=suggested_vol_penalty(ispin))
    2509              : 
    2510              :             END DO ! ispin
    2511              : 
    2512              :             ! check convergence and other exit criteria
    2513          164 :             DO ispin = 1, nspins
    2514          164 :                grad_norm_spin(ispin) = dbcsr_maxabs(grad(ispin))
    2515              :             END DO ! ispin
    2516          246 :             grad_norm = MAXVAL(grad_norm_spin)
    2517              : 
    2518           82 :             converged = (grad_norm <= optimizer%eps_error)
    2519           82 :             IF (converged .OR. (iteration >= max_iter)) THEN
    2520              :                prepare_to_exit = .TRUE.
    2521              :             END IF
    2522              : 
    2523              :             ! it is not time to exit just yet
    2524           74 :             IF (.NOT. prepare_to_exit) THEN
    2525              : 
    2526              :                ! check the gradient along the step direction
    2527              :                ! and decide whether to switch to the line-search mode
    2528              :                ! do not do this in the first iteration
    2529           74 :                IF (iteration /= 0) THEN
    2530              : 
    2531              :                   ! enforce at least one line search
    2532              :                   ! without even checking the error
    2533           68 :                   IF (.NOT. line_search) THEN
    2534              : 
    2535           30 :                      line_search = .TRUE.
    2536           30 :                      line_search_iteration = line_search_iteration + 1
    2537              : 
    2538              :                   ELSE
    2539              : 
    2540              :                      ! check the line-search error and decide whether to
    2541              :                      ! change the direction
    2542              :                      line_search_error = 0.0_dp
    2543              :                      denom = 0.0_dp
    2544              :                      denom2 = 0.0_dp
    2545              : 
    2546           76 :                      DO ispin = 1, nspins
    2547              : 
    2548           38 :                         CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2549           38 :                         line_search_error = line_search_error + tempreal
    2550           38 :                         CALL dbcsr_dot(grad(ispin), grad(ispin), tempreal)
    2551           38 :                         denom = denom + tempreal
    2552           38 :                         CALL dbcsr_dot(step(ispin), step(ispin), tempreal)
    2553           76 :                         denom2 = denom2 + tempreal
    2554              : 
    2555              :                      END DO ! ispin
    2556              : 
    2557              :                      ! cosine of the angle between the step and grad
    2558              :                      ! (must be close to zero at convergence)
    2559           38 :                      line_search_error = line_search_error/SQRT(denom)/SQRT(denom2)
    2560              : 
    2561           38 :                      IF (ABS(line_search_error) > optimizer%lin_search_eps_error) THEN
    2562           14 :                         line_search = .TRUE.
    2563           14 :                         line_search_iteration = line_search_iteration + 1
    2564              :                      ELSE
    2565              :                         line_search = .FALSE.
    2566              :                         line_search_iteration = 0
    2567              :                      END IF
    2568              : 
    2569              :                   END IF
    2570              : 
    2571              :                END IF ! iteration.ne.0
    2572              : 
    2573            6 :                IF (line_search) THEN
    2574           44 :                   objf_diff = 0.0_dp
    2575              :                ELSE
    2576           30 :                   objf_diff = objf_new - objf_old
    2577           30 :                   objf_old = objf_new
    2578              :                END IF
    2579              : 
    2580              :                ! update the step direction
    2581           74 :                IF (.NOT. line_search) THEN
    2582              : 
    2583           60 :                   cg_iteration = cg_iteration + 1
    2584              : 
    2585              :                   ! save the previous step
    2586           60 :                   DO ispin = 1, nspins
    2587           60 :                      CALL dbcsr_copy(prev_step(ispin), step(ispin))
    2588              :                   END DO ! ispin
    2589              : 
    2590              :                   ! compute the new step:
    2591              :                   ! if available use second derivative info - bfgs, hessian, preconditioner
    2592           30 :                   IF (prec_type == xalmo_prec_zero) THEN ! no second derivatives
    2593              : 
    2594              :                      ! no preconditioner
    2595            0 :                      DO ispin = 1, nspins
    2596              : 
    2597            0 :                         CALL dbcsr_copy(step(ispin), grad(ispin))
    2598            0 :                         CALL dbcsr_scale(step(ispin), -1.0_dp)
    2599              : 
    2600              :                      END DO ! ispin
    2601              : 
    2602              :                   ELSE ! use second derivatives
    2603              : 
    2604              :                      ! compute and invert hessian/precond?
    2605           30 :                      IF (iteration == 0) THEN
    2606              : 
    2607              :                         IF (d_bfgs) THEN
    2608              : 
    2609              :                            ! create matrix filled with 1.0 here
    2610              :                            CALL fill_matrix_with_ones(approx_inv_hessian(1))
    2611              :                            IF (nspins > 1) THEN
    2612              :                               DO ispin = 2, nspins
    2613              :                                  CALL dbcsr_copy(approx_inv_hessian(ispin), approx_inv_hessian(1))
    2614              :                               END DO
    2615              :                            END IF
    2616              : 
    2617            6 :                         ELSE IF (l_bfgs) THEN
    2618              : 
    2619            6 :                            CALL lbfgs_seed(nlmo_lbfgs_history, m_theta, grad)
    2620           12 :                            DO ispin = 1, nspins
    2621            6 :                               CALL dbcsr_copy(step(ispin), grad(ispin))
    2622           12 :                               CALL dbcsr_scale(step(ispin), -1.0_dp)
    2623              :                            END DO ! ispin
    2624              : 
    2625              :                         ELSE
    2626              : 
    2627              :                            ! computing preconditioner
    2628            0 :                            DO ispin = 1, nspins
    2629              : 
    2630              :                               ! TODO: write preconditioner code later
    2631              :                               ! For now, create matrix filled with 1.0 here
    2632            0 :                               CALL fill_matrix_with_ones(approx_inv_hessian(ispin))
    2633              :                               !CALL compute_preconditioner(&
    2634              :                               !       m_prec_out=approx_hessian(ispin),&
    2635              :                               !       m_ks=almo_scf_env%matrix_ks(ispin),&
    2636              :                               !       m_s=matrix_s,&
    2637              :                               !       m_siginv=almo_scf_env%template_matrix_sigma(ispin),&
    2638              :                               !       m_quench_t=quench_t(ispin),&
    2639              :                               !       m_FTsiginv=FTsiginv(ispin),&
    2640              :                               !       m_siginvTFTsiginv=siginvTFTsiginv(ispin),&
    2641              :                               !       m_ST=ST(ispin),&
    2642              :                               !       para_env=almo_scf_env%para_env,&
    2643              :                               !       blacs_env=almo_scf_env%blacs_env,&
    2644              :                               !       nocc_of_domain=almo_scf_env%nocc_of_domain(:,ispin),&
    2645              :                               !       domain_s_inv=almo_scf_env%domain_s_inv(:,ispin),&
    2646              :                               !       domain_r_down=domain_r_down(:,ispin),&
    2647              :                               !       cpu_of_domain=almo_scf_env%cpu_of_domain,&
    2648              :                               !       domain_map=almo_scf_env%domain_map(ispin),&
    2649              :                               !       assume_t0_q0x=assume_t0_q0x,&
    2650              :                               !       penalty_occ_vol=penalty_occ_vol,&
    2651              :                               !       penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin),&
    2652              :                               !       eps_filter=eps_filter,&
    2653              :                               !       neg_thr=0.5_dp,&
    2654              :                               !       spin_factor=spin_factor,&
    2655              :                               !       special_case=my_special_case)
    2656              :                               !CALL invert hessian
    2657              :                            END DO ! ispin
    2658              : 
    2659              :                         END IF
    2660              : 
    2661              :                      ELSE ! not iteration zero
    2662              : 
    2663              :                         ! update approx inverse hessian
    2664              :                         IF (d_bfgs) THEN ! diagonal BFGS
    2665              : 
    2666              :                            DO ispin = 1, nspins
    2667              : 
    2668              :                               ! compute s and y
    2669              :                               CALL dbcsr_copy(bfgs_y(ispin), grad(ispin))
    2670              :                               CALL dbcsr_add(bfgs_y(ispin), prev_grad(ispin), 1.0_dp, -1.0_dp)
    2671              :                               CALL dbcsr_copy(bfgs_s(ispin), m_theta(ispin))
    2672              :                               CALL dbcsr_add(bfgs_s(ispin), prev_m_theta(ispin), 1.0_dp, -1.0_dp)
    2673              : 
    2674              :                               ! compute rho
    2675              :                               CALL dbcsr_dot(grad(ispin), step(ispin), bfgs_rho)
    2676              :                               bfgs_rho = 1.0_dp/bfgs_rho
    2677              : 
    2678              :                               ! compute the sum of the squared elements of bfgs_y
    2679              :                               CALL dbcsr_dot(bfgs_y(ispin), bfgs_y(ispin), bfgs_sum)
    2680              : 
    2681              :                               ! first term: start collecting new inv hessian in this temp matrix
    2682              :                               CALL dbcsr_copy(tempOccOcc2(ispin), approx_inv_hessian(ispin))
    2683              : 
    2684              :                               ! second term: + rho * s * s
    2685              :                               CALL dbcsr_hadamard_product(bfgs_s(ispin), bfgs_s(ispin), tempOccOcc1(ispin))
    2686              :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc1(ispin), 1.0_dp, bfgs_rho)
    2687              : 
    2688              :                               ! third term: + rho^2 * s * s * H * sum_(y * y)
    2689              :                               CALL dbcsr_hadamard_product(tempOccOcc1(ispin), &
    2690              :                                                           approx_inv_hessian(ispin), tempOccOcc3(ispin))
    2691              :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc3(ispin), &
    2692              :                                              1.0_dp, bfgs_rho*bfgs_rho*bfgs_sum)
    2693              : 
    2694              :                               ! fourth term: - 2 * rho * s * y * H
    2695              :                               CALL dbcsr_hadamard_product(bfgs_y(ispin), &
    2696              :                                                           approx_inv_hessian(ispin), tempOccOcc1(ispin))
    2697              :                               CALL dbcsr_hadamard_product(bfgs_s(ispin), tempOccOcc1(ispin), tempOccOcc3(ispin))
    2698              :                               CALL dbcsr_add(tempOccOcc2(ispin), tempOccOcc3(ispin), &
    2699              :                                              1.0_dp, -2.0_dp*bfgs_rho)
    2700              : 
    2701              :                               CALL dbcsr_copy(approx_inv_hessian(ispin), tempOccOcc2(ispin))
    2702              : 
    2703              :                            END DO
    2704              : 
    2705           24 :                         ELSE IF (l_bfgs) THEN
    2706              : 
    2707           24 :                            CALL lbfgs_get_direction(nlmo_lbfgs_history, m_theta, grad, step)
    2708              : 
    2709              :                         END IF ! which method?
    2710              : 
    2711              :                      END IF ! compute approximate inverse hessian
    2712              : 
    2713           30 :                      IF (.NOT. l_bfgs) THEN
    2714              : 
    2715            0 :                         DO ispin = 1, nspins
    2716              : 
    2717              :                            CALL dbcsr_hadamard_product(approx_inv_hessian(ispin), &
    2718            0 :                                                        grad(ispin), step(ispin))
    2719            0 :                            CALL dbcsr_scale(step(ispin), -1.0_dp)
    2720              : 
    2721              :                         END DO ! ispin
    2722              : 
    2723              :                      END IF
    2724              : 
    2725              :                   END IF ! second derivative type fork
    2726              : 
    2727              :                   ! check whether we need to reset conjugate directions
    2728           30 :                   IF (iteration == 0) THEN
    2729            6 :                      reset_conjugator = .TRUE.
    2730              :                   END IF
    2731              : 
    2732              :                   ! compute the conjugation coefficient - beta
    2733           30 :                   IF (.NOT. reset_conjugator) THEN
    2734              :                      CALL compute_cg_beta( &
    2735              :                         beta=beta, &
    2736              :                         reset_conjugator=reset_conjugator, &
    2737              :                         conjugator=optimizer%conjugator, &
    2738              :                         grad=grad(:), &
    2739              :                         prev_grad=prev_grad(:), &
    2740              :                         step=step(:), &
    2741              :                         prev_step=prev_step(:), &
    2742              :                         prev_minus_prec_grad=prev_minus_prec_grad(:) &
    2743           24 :                         )
    2744              : 
    2745              :                   END IF
    2746              : 
    2747           30 :                   IF (reset_conjugator) THEN
    2748              : 
    2749            6 :                      beta = 0.0_dp
    2750            6 :                      IF (unit_nr > 0 .AND. (.NOT. just_started)) THEN
    2751            0 :                         WRITE (unit_nr, '(T2,A35)') "Re-setting conjugator to zero"
    2752              :                      END IF
    2753            6 :                      reset_conjugator = .FALSE.
    2754              : 
    2755              :                   END IF
    2756              : 
    2757              :                   ! save the preconditioned gradient (useful for beta)
    2758           60 :                   DO ispin = 1, nspins
    2759              : 
    2760           30 :                      CALL dbcsr_copy(prev_minus_prec_grad(ispin), step(ispin))
    2761              : 
    2762              :                      ! conjugate the step direction
    2763           60 :                      CALL dbcsr_add(step(ispin), prev_step(ispin), 1.0_dp, beta)
    2764              : 
    2765              :                   END DO ! ispin
    2766              : 
    2767              :                END IF ! update the step direction
    2768              : 
    2769              :                ! estimate the step size
    2770           74 :                IF (.NOT. line_search) THEN
    2771              :                   ! we just changed the direction and
    2772              :                   ! we have only E and grad from the current step
    2773              :                   ! it is not enough to compute step_size - just guess it
    2774           30 :                   e0 = objf_new
    2775           30 :                   g0 = 0.0_dp
    2776           60 :                   DO ispin = 1, nspins
    2777           30 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2778           60 :                      g0 = g0 + tempreal
    2779              :                   END DO ! ispin
    2780              :                   g0sign = SIGN(1.0_dp, g0) ! sign of g0
    2781              :                   IF (linear_search_type == 1) THEN ! this is quadratic LS
    2782           30 :                      IF (iteration == 0) THEN
    2783            6 :                         step_size = optimizer%lin_search_step_size_guess
    2784              :                      ELSE
    2785           24 :                         IF (next_step_size_guess <= 0.0_dp) THEN
    2786            0 :                            step_size = optimizer%lin_search_step_size_guess
    2787              :                         ELSE
    2788              :                            ! take the last value
    2789           24 :                            step_size = optimizer%lin_search_step_size_guess
    2790              :                            !step_size = next_step_size_guess*1.05_dp
    2791              :                         END IF
    2792              :                      END IF
    2793              :                   ELSE IF (linear_search_type == 2) THEN ! this is cautious LS
    2794              :                      ! this LS type is designed not to trust quadratic appr
    2795              :                      ! so it always restarts from a safe step size
    2796              :                      step_size = optimizer%lin_search_step_size_guess
    2797              :                   END IF
    2798           30 :                   IF (unit_nr > 0) THEN
    2799           15 :                      WRITE (unit_nr, '(T21,3A19)') "Line position", "Line grad", "Next line step"
    2800           15 :                      WRITE (unit_nr, '(T2,A19,3F19.5)') "Line search", 0.0_dp, g0, step_size
    2801              :                   END IF
    2802           30 :                   next_step_size_guess = step_size
    2803              :                ELSE ! this is not the first line search
    2804           44 :                   e1 = objf_new
    2805           44 :                   g1 = 0.0_dp
    2806           88 :                   DO ispin = 1, nspins
    2807           44 :                      CALL dbcsr_dot(grad(ispin), step(ispin), tempreal)
    2808           88 :                      g1 = g1 + tempreal
    2809              :                   END DO ! ispin
    2810           44 :                   g1sign = SIGN(1.0_dp, g1) ! sign of g1
    2811              :                   IF (linear_search_type == 1) THEN
    2812              :                      ! we have accumulated some points along this direction
    2813              :                      ! use only the most recent g0 (quadratic approximation)
    2814           44 :                      appr_sec_der = (g1 - g0)/step_size
    2815              :                      !IF (unit_nr > 0) THEN
    2816              :                      !   WRITE (unit_nr, '(A2,7F12.5)') &
    2817              :                      !      "DT", e0, e1, g0, g1, appr_sec_der, step_size, -g1/appr_sec_der
    2818              :                      !ENDIF
    2819           44 :                      step_size = -g1/appr_sec_der
    2820              :                   ELSE IF (linear_search_type == 2) THEN
    2821              :                      ! alternative method for finding step size
    2822              :                      ! do not use quadratic approximation, only gradient signs
    2823              :                      IF (g1sign /= g0sign) THEN
    2824              :                         step_size = -step_size/2.0;
    2825              :                      ELSE
    2826              :                         step_size = step_size*1.5;
    2827              :                      END IF
    2828              :                   END IF
    2829              :                   ! end alternative LS types
    2830           44 :                   IF (unit_nr > 0) THEN
    2831           22 :                      WRITE (unit_nr, '(T21,3A19)') "Line position", "Line grad", "Next line step"
    2832           22 :                      WRITE (unit_nr, '(T2,A19,3F19.5)') "Line search", next_step_size_guess, g1, step_size
    2833              :                   END IF
    2834           44 :                   e0 = e1
    2835           44 :                   g0 = g1
    2836              :                   g0sign = g1sign
    2837           44 :                   next_step_size_guess = next_step_size_guess + step_size
    2838              :                END IF
    2839              : 
    2840              :                ! update theta
    2841          148 :                DO ispin = 1, nspins
    2842           74 :                   IF (.NOT. line_search) THEN ! we prepared to perform the first line search
    2843              :                      ! "previous" refers to the previous CG step, not the previous LS step
    2844           30 :                      CALL dbcsr_copy(prev_m_theta(ispin), m_theta(ispin))
    2845              :                   END IF
    2846          148 :                   CALL dbcsr_add(m_theta(ispin), step(ispin), 1.0_dp, step_size)
    2847              :                END DO ! ispin
    2848              : 
    2849              :             END IF ! not.prepare_to_exit
    2850              : 
    2851           82 :             IF (line_search) THEN
    2852           50 :                iter_type = "LS"
    2853              :             ELSE
    2854           32 :                iter_type = "CG"
    2855              :             END IF
    2856              : 
    2857           82 :             t2 = m_walltime()
    2858           82 :             IF (unit_nr > 0) THEN
    2859           41 :                iter_type = TRIM("NLMO OPT "//iter_type)
    2860              :                WRITE (unit_nr, '(T2,A13,I6,F23.10,E14.5,F14.9,F9.2)') &
    2861           41 :                   iter_type, iteration, &
    2862           41 :                   objf_new, objf_diff, grad_norm, &
    2863           82 :                   t2 - t1
    2864              :                WRITE (unit_nr, '(T2,A19,F23.10)') &
    2865           41 :                   "Localization:", localization_obj_function
    2866              :                WRITE (unit_nr, '(T2,A19,F23.10)') &
    2867           41 :                   "Orthogonalization:", penalty_func_new
    2868              :             END IF
    2869           82 :             t1 = m_walltime()
    2870              : 
    2871           82 :             iteration = iteration + 1
    2872           82 :             IF (prepare_to_exit) EXIT
    2873              : 
    2874              :          END DO ! inner loop
    2875              : 
    2876            8 :          IF (converged .OR. (outer_iteration >= outer_max_iter)) THEN
    2877            8 :             outer_prepare_to_exit = .TRUE.
    2878              :          END IF
    2879              : 
    2880            8 :          outer_iteration = outer_iteration + 1
    2881            8 :          IF (outer_prepare_to_exit) EXIT
    2882              : 
    2883              :       END DO ! outer loop
    2884              : 
    2885              :       ! return the optimal determinant penalty
    2886            8 :       optimizer%opt_penalty%penalty_strength = 0.0_dp
    2887           16 :       DO ispin = 1, nspins
    2888              :          optimizer%opt_penalty%penalty_strength = optimizer%opt_penalty%penalty_strength + &
    2889           16 :                                                   (-1.0_dp)*penalty_vol_prefactor(ispin)
    2890              :       END DO
    2891            8 :       optimizer%opt_penalty%penalty_strength = optimizer%opt_penalty%penalty_strength/nspins
    2892              : 
    2893            8 :       IF (converged) THEN
    2894            8 :          iter_type = "Final"
    2895              :       ELSE
    2896            0 :          iter_type = "Unconverged"
    2897              :       END IF
    2898              : 
    2899            8 :       IF (unit_nr > 0) THEN
    2900            4 :          WRITE (unit_nr, '()')
    2901            4 :          print_string = TRIM(iter_type)//" localization:"
    2902              :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2903            4 :             print_string, localization_obj_function
    2904            4 :          print_string = TRIM(iter_type)//" determinant:"
    2905              :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2906            4 :             print_string, overlap_determinant
    2907            4 :          print_string = TRIM(iter_type)//" penalty strength:"
    2908              :          WRITE (unit_nr, '(T2,A29,F30.10)') &
    2909            4 :             print_string, optimizer%opt_penalty%penalty_strength
    2910              :       END IF
    2911              : 
    2912              :       ! clean up
    2913            8 :       IF (l_bfgs) THEN
    2914            8 :          CALL lbfgs_release(nlmo_lbfgs_history)
    2915              :       END IF
    2916           16 :       DO ispin = 1, nspins
    2917           80 :          DO idim0 = 1, SIZE(m_B0, 2)
    2918          152 :             DO reim = 1, SIZE(m_B0, 1)
    2919          144 :                CALL dbcsr_release(m_B0(reim, idim0, ispin))
    2920              :             END DO
    2921              :          END DO
    2922            8 :          CALL dbcsr_release(m_theta(ispin))
    2923            8 :          CALL dbcsr_release(m_t_mo_local(ispin))
    2924            8 :          CALL dbcsr_release(tempNOcc1(ispin))
    2925            8 :          CALL dbcsr_release(approx_inv_hessian(ispin))
    2926            8 :          CALL dbcsr_release(prev_m_theta(ispin))
    2927            8 :          CALL dbcsr_release(m_theta_normalized(ispin))
    2928            8 :          CALL dbcsr_release(m_S0(ispin))
    2929            8 :          CALL dbcsr_release(prev_grad(ispin))
    2930            8 :          CALL dbcsr_release(grad(ispin))
    2931            8 :          CALL dbcsr_release(prev_step(ispin))
    2932            8 :          CALL dbcsr_release(step(ispin))
    2933            8 :          CALL dbcsr_release(prev_minus_prec_grad(ispin))
    2934            8 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
    2935            8 :          CALL dbcsr_release(m_sigma(ispin))
    2936            8 :          CALL dbcsr_release(m_siginv(ispin))
    2937            8 :          CALL dbcsr_release(tempOccOcc1(ispin))
    2938            8 :          CALL dbcsr_release(tempOccOcc2(ispin))
    2939            8 :          CALL dbcsr_release(tempOccOcc3(ispin))
    2940            8 :          CALL dbcsr_release(bfgs_y(ispin))
    2941           16 :          CALL dbcsr_release(bfgs_s(ispin))
    2942              :       END DO ! ispin
    2943              : 
    2944            8 :       DEALLOCATE (grad_norm_spin)
    2945            8 :       DEALLOCATE (nocc)
    2946            8 :       DEALLOCATE (penalty_vol_prefactor)
    2947            8 :       DEALLOCATE (suggested_vol_penalty)
    2948              : 
    2949            8 :       DEALLOCATE (approx_inv_hessian)
    2950            8 :       DEALLOCATE (prev_m_theta)
    2951            8 :       DEALLOCATE (m_theta_normalized)
    2952            8 :       DEALLOCATE (m_S0)
    2953            8 :       DEALLOCATE (prev_grad)
    2954            8 :       DEALLOCATE (grad)
    2955            8 :       DEALLOCATE (prev_step)
    2956            8 :       DEALLOCATE (step)
    2957            8 :       DEALLOCATE (prev_minus_prec_grad)
    2958            8 :       DEALLOCATE (m_sig_sqrti_ii)
    2959            8 :       DEALLOCATE (m_sigma)
    2960            8 :       DEALLOCATE (m_siginv)
    2961            8 :       DEALLOCATE (tempNOcc1)
    2962            8 :       DEALLOCATE (tempOccOcc1)
    2963            8 :       DEALLOCATE (tempOccOcc2)
    2964            8 :       DEALLOCATE (tempOccOcc3)
    2965            8 :       DEALLOCATE (bfgs_y)
    2966            8 :       DEALLOCATE (bfgs_s)
    2967              : 
    2968            8 :       DEALLOCATE (m_theta, m_t_mo_local)
    2969            8 :       DEALLOCATE (m_B0)
    2970            8 :       DEALLOCATE (weights)
    2971            8 :       DEALLOCATE (first_sgf, last_sgf, nsgf)
    2972              : 
    2973            8 :       IF (.NOT. converged) THEN
    2974            0 :          CPABORT("Optimization not converged! ")
    2975              :       END IF
    2976              : 
    2977            8 :       CALL timestop(handle)
    2978              : 
    2979           24 :    END SUBROUTINE almo_scf_construct_nlmos
    2980              : 
    2981              : ! **************************************************************************************************
    2982              : !> \brief Analysis of the orbitals
    2983              : !> \param detailed_analysis ...
    2984              : !> \param eps_filter ...
    2985              : !> \param m_T_in ...
    2986              : !> \param m_T0_in ...
    2987              : !> \param m_siginv_in ...
    2988              : !> \param m_siginv0_in ...
    2989              : !> \param m_S_in ...
    2990              : !> \param m_KS0_in ...
    2991              : !> \param m_quench_t_in ...
    2992              : !> \param energy_out ...
    2993              : !> \param m_eda_out ...
    2994              : !> \param m_cta_out ...
    2995              : !> \par History
    2996              : !>       2017.07 created [Rustam Z Khaliullin]
    2997              : !> \author Rustam Z Khaliullin
    2998              : ! **************************************************************************************************
    2999           24 :    SUBROUTINE xalmo_analysis(detailed_analysis, eps_filter, m_T_in, m_T0_in, &
    3000           24 :                              m_siginv_in, m_siginv0_in, m_S_in, m_KS0_in, m_quench_t_in, energy_out, &
    3001           24 :                              m_eda_out, m_cta_out)
    3002              : 
    3003              :       LOGICAL, INTENT(IN)                                :: detailed_analysis
    3004              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    3005              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_T_in, m_T0_in, m_siginv_in, &
    3006              :                                                             m_siginv0_in, m_S_in, m_KS0_in, &
    3007              :                                                             m_quench_t_in
    3008              :       REAL(KIND=dp), INTENT(INOUT)                       :: energy_out
    3009              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_eda_out, m_cta_out
    3010              : 
    3011              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'xalmo_analysis'
    3012              : 
    3013              :       INTEGER                                            :: handle, ispin, nspins
    3014              :       REAL(KIND=dp)                                      :: energy_ispin, spin_factor
    3015              :       TYPE(dbcsr_type)                                   :: FTsiginv0, Fvo0, m_X, siginvTFTsiginv0, &
    3016              :                                                             ST0
    3017              : 
    3018           24 :       CALL timeset(routineN, handle)
    3019              : 
    3020           24 :       nspins = SIZE(m_T_in)
    3021              : 
    3022           24 :       IF (nspins == 1) THEN
    3023           24 :          spin_factor = 2.0_dp
    3024              :       ELSE
    3025            0 :          spin_factor = 1.0_dp
    3026              :       END IF
    3027              : 
    3028           24 :       energy_out = 0.0_dp
    3029           48 :       DO ispin = 1, nspins
    3030              : 
    3031              :          ! create temporary matrices
    3032              :          CALL dbcsr_create(Fvo0, &
    3033              :                            template=m_T_in(ispin), &
    3034           24 :                            matrix_type=dbcsr_type_no_symmetry)
    3035              :          CALL dbcsr_create(FTsiginv0, &
    3036              :                            template=m_T_in(ispin), &
    3037           24 :                            matrix_type=dbcsr_type_no_symmetry)
    3038              :          CALL dbcsr_create(ST0, &
    3039              :                            template=m_T_in(ispin), &
    3040           24 :                            matrix_type=dbcsr_type_no_symmetry)
    3041              :          CALL dbcsr_create(m_X, &
    3042              :                            template=m_T_in(ispin), &
    3043           24 :                            matrix_type=dbcsr_type_no_symmetry)
    3044              :          CALL dbcsr_create(siginvTFTsiginv0, &
    3045              :                            template=m_siginv0_in(ispin), &
    3046           24 :                            matrix_type=dbcsr_type_no_symmetry)
    3047              : 
    3048              :          ! compute F_{virt,occ} for the zero-delocalization state
    3049              :          CALL compute_frequently_used_matrices( &
    3050              :             filter_eps=eps_filter, &
    3051              :             m_T_in=m_T0_in(ispin), &
    3052              :             m_siginv_in=m_siginv0_in(ispin), &
    3053              :             m_S_in=m_S_in(1), &
    3054              :             m_F_in=m_KS0_in(ispin), &
    3055              :             m_FTsiginv_out=FTsiginv0, &
    3056              :             m_siginvTFTsiginv_out=siginvTFTsiginv0, &
    3057           24 :             m_ST_out=ST0)
    3058           24 :          CALL dbcsr_copy(Fvo0, m_quench_t_in(ispin))
    3059           24 :          CALL dbcsr_copy(Fvo0, FTsiginv0, keep_sparsity=.TRUE.)
    3060              :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    3061              :                              ST0, &
    3062              :                              siginvTFTsiginv0, &
    3063              :                              1.0_dp, Fvo0, &
    3064           24 :                              retain_sparsity=.TRUE.)
    3065              : 
    3066              :          ! get single excitation amplitudes
    3067           24 :          CALL dbcsr_copy(m_X, m_T0_in(ispin))
    3068           24 :          CALL dbcsr_add(m_X, m_T_in(ispin), -1.0_dp, 1.0_dp)
    3069              : 
    3070           24 :          CALL dbcsr_dot(m_X, Fvo0, energy_ispin)
    3071           24 :          energy_out = energy_out + energy_ispin*spin_factor
    3072              : 
    3073           24 :          IF (detailed_analysis) THEN
    3074              : 
    3075            2 :             CALL dbcsr_hadamard_product(m_X, Fvo0, m_eda_out(ispin))
    3076            2 :             CALL dbcsr_scale(m_eda_out(ispin), spin_factor)
    3077            2 :             CALL dbcsr_filter(m_eda_out(ispin), eps_filter)
    3078              : 
    3079              :             ! first, compute [QR'R]_mu^i = [(S-SRS).X.siginv']_mu^i
    3080              :             ! a. FTsiginv0 = S.T0*siginv0
    3081              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3082              :                                 ST0, &
    3083              :                                 m_siginv0_in(ispin), &
    3084              :                                 0.0_dp, FTsiginv0, &
    3085            2 :                                 filter_eps=eps_filter)
    3086              :             ! c. tmp1(use ST0) = S.X
    3087              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3088              :                                 m_S_in(1), &
    3089              :                                 m_X, &
    3090              :                                 0.0_dp, ST0, &
    3091            2 :                                 filter_eps=eps_filter)
    3092              :             ! d. tmp2 = tr(T0).tmp1 = tr(T0).S.X
    3093              :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    3094              :                                 m_T0_in(ispin), &
    3095              :                                 ST0, &
    3096              :                                 0.0_dp, siginvTFTsiginv0, &
    3097            2 :                                 filter_eps=eps_filter)
    3098              :             ! e. tmp1 = tmp1 - tmp3.tmp2 = S.X - S.T0.siginv0*tr(T0).S.X
    3099              :             !         = (1-S.R0).S.X
    3100              :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    3101              :                                 FTsiginv0, &
    3102              :                                 siginvTFTsiginv0, &
    3103              :                                 1.0_dp, ST0, &
    3104            2 :                                 filter_eps=eps_filter)
    3105              :             ! f. tmp2(use FTsiginv0) = tmp1*siginv
    3106              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3107              :                                 ST0, &
    3108              :                                 m_siginv_in(ispin), &
    3109              :                                 0.0_dp, FTsiginv0, &
    3110            2 :                                 filter_eps=eps_filter)
    3111              :             ! second, compute traces of blocks [RR'Q]^x_y * [X]^y_x
    3112              :             CALL dbcsr_hadamard_product(m_X, &
    3113            2 :                                         FTsiginv0, m_cta_out(ispin))
    3114            2 :             CALL dbcsr_scale(m_cta_out(ispin), spin_factor)
    3115            2 :             CALL dbcsr_filter(m_cta_out(ispin), eps_filter)
    3116              : 
    3117              :          END IF ! do ALMO EDA/CTA
    3118              : 
    3119           24 :          CALL dbcsr_release(Fvo0)
    3120           24 :          CALL dbcsr_release(FTsiginv0)
    3121           24 :          CALL dbcsr_release(ST0)
    3122           24 :          CALL dbcsr_release(m_X)
    3123           72 :          CALL dbcsr_release(siginvTFTsiginv0)
    3124              : 
    3125              :       END DO ! ispin
    3126              : 
    3127           24 :       CALL timestop(handle)
    3128              : 
    3129           24 :    END SUBROUTINE xalmo_analysis
    3130              : 
    3131              : ! **************************************************************************************************
    3132              : !> \brief Compute matrices that are used often in various parts of the
    3133              : !>        optimization procedure
    3134              : !> \param filter_eps ...
    3135              : !> \param m_T_in ...
    3136              : !> \param m_siginv_in ...
    3137              : !> \param m_S_in ...
    3138              : !> \param m_F_in ...
    3139              : !> \param m_FTsiginv_out ...
    3140              : !> \param m_siginvTFTsiginv_out ...
    3141              : !> \param m_ST_out ...
    3142              : !> \par History
    3143              : !>       2016.12 created [Rustam Z Khaliullin]
    3144              : !> \author Rustam Z Khaliullin
    3145              : ! **************************************************************************************************
    3146         1498 :    SUBROUTINE compute_frequently_used_matrices(filter_eps, &
    3147              :                                                m_T_in, m_siginv_in, m_S_in, m_F_in, m_FTsiginv_out, &
    3148              :                                                m_siginvTFTsiginv_out, m_ST_out)
    3149              : 
    3150              :       REAL(KIND=dp), INTENT(IN)                          :: filter_eps
    3151              :       TYPE(dbcsr_type), INTENT(IN)                       :: m_T_in, m_siginv_in, m_S_in, m_F_in
    3152              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_FTsiginv_out, m_siginvTFTsiginv_out, &
    3153              :                                                             m_ST_out
    3154              : 
    3155              :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_frequently_used_matrices'
    3156              : 
    3157              :       INTEGER                                            :: handle
    3158              :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_oo_1
    3159              : 
    3160         1498 :       CALL timeset(routineN, handle)
    3161              : 
    3162              :       CALL dbcsr_create(m_tmp_no_1, &
    3163              :                         template=m_T_in, &
    3164         1498 :                         matrix_type=dbcsr_type_no_symmetry)
    3165              :       CALL dbcsr_create(m_tmp_oo_1, &
    3166              :                         template=m_siginv_in, &
    3167         1498 :                         matrix_type=dbcsr_type_no_symmetry)
    3168              : 
    3169              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3170              :                           m_F_in, &
    3171              :                           m_T_in, &
    3172              :                           0.0_dp, m_tmp_no_1, &
    3173         1498 :                           filter_eps=filter_eps)
    3174              : 
    3175              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3176              :                           m_tmp_no_1, &
    3177              :                           m_siginv_in, &
    3178              :                           0.0_dp, m_FTsiginv_out, &
    3179         1498 :                           filter_eps=filter_eps)
    3180              : 
    3181              :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    3182              :                           m_T_in, &
    3183              :                           m_FTsiginv_out, &
    3184              :                           0.0_dp, m_tmp_oo_1, &
    3185         1498 :                           filter_eps=filter_eps)
    3186              : 
    3187              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3188              :                           m_siginv_in, &
    3189              :                           m_tmp_oo_1, &
    3190              :                           0.0_dp, m_siginvTFTsiginv_out, &
    3191         1498 :                           filter_eps=filter_eps)
    3192              : 
    3193              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3194              :                           m_S_in, &
    3195              :                           m_T_in, &
    3196              :                           0.0_dp, m_ST_out, &
    3197         1498 :                           filter_eps=filter_eps)
    3198              : 
    3199         1498 :       CALL dbcsr_release(m_tmp_no_1)
    3200         1498 :       CALL dbcsr_release(m_tmp_oo_1)
    3201              : 
    3202         1498 :       CALL timestop(handle)
    3203              : 
    3204         1498 :    END SUBROUTINE compute_frequently_used_matrices
    3205              : 
    3206              : ! **************************************************************************************************
    3207              : !> \brief Split the matrix of virtual orbitals into two:
    3208              : !>        retained orbs and discarded
    3209              : !> \param almo_scf_env ...
    3210              : !> \par History
    3211              : !>       2011.09 created [Rustam Z Khaliullin]
    3212              : !> \author Rustam Z Khaliullin
    3213              : ! **************************************************************************************************
    3214            0 :    SUBROUTINE split_v_blk(almo_scf_env)
    3215              : 
    3216              :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    3217              : 
    3218              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'split_v_blk'
    3219              : 
    3220              :       INTEGER                                            :: discarded_v, handle, iblock_col, &
    3221              :                                                             iblock_col_size, iblock_row, &
    3222              :                                                             iblock_row_size, ispin, retained_v
    3223            0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: data_p
    3224              :       TYPE(dbcsr_iterator_type)                          :: iter
    3225              : 
    3226            0 :       CALL timeset(routineN, handle)
    3227              : 
    3228            0 :       DO ispin = 1, almo_scf_env%nspins
    3229              : 
    3230              :          CALL dbcsr_work_create(almo_scf_env%matrix_v_blk(ispin), &
    3231            0 :                                 work_mutable=.TRUE.)
    3232              :          CALL dbcsr_work_create(almo_scf_env%matrix_v_disc_blk(ispin), &
    3233            0 :                                 work_mutable=.TRUE.)
    3234              : 
    3235            0 :          CALL dbcsr_iterator_start(iter, almo_scf_env%matrix_v_full_blk(ispin))
    3236              : 
    3237            0 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
    3238              : 
    3239              :             CALL dbcsr_iterator_next_block(iter, iblock_row, iblock_col, data_p, &
    3240            0 :                                            row_size=iblock_row_size, col_size=iblock_col_size)
    3241              : 
    3242            0 :             IF (iblock_row /= iblock_col) THEN
    3243            0 :                CPABORT("off-diagonal block found")
    3244              :             END IF
    3245              : 
    3246            0 :             retained_v = almo_scf_env%nvirt_of_domain(iblock_col, ispin)
    3247            0 :             discarded_v = almo_scf_env%nvirt_disc_of_domain(iblock_col, ispin)
    3248            0 :             CPASSERT(retained_v > 0)
    3249            0 :             CPASSERT(discarded_v > 0)
    3250              :             CALL dbcsr_put_block(almo_scf_env%matrix_v_disc_blk(ispin), iblock_row, iblock_col, &
    3251            0 :                                  block=data_p(:, (retained_v + 1):iblock_col_size))
    3252              :             CALL dbcsr_put_block(almo_scf_env%matrix_v_blk(ispin), iblock_row, iblock_col, &
    3253            0 :                                  block=data_p(:, 1:retained_v))
    3254              : 
    3255              :          END DO ! iterator
    3256            0 :          CALL dbcsr_iterator_stop(iter)
    3257              : 
    3258            0 :          CALL dbcsr_finalize(almo_scf_env%matrix_v_blk(ispin))
    3259            0 :          CALL dbcsr_finalize(almo_scf_env%matrix_v_disc_blk(ispin))
    3260              : 
    3261              :       END DO ! ispin
    3262              : 
    3263            0 :       CALL timestop(handle)
    3264              : 
    3265            0 :    END SUBROUTINE split_v_blk
    3266              : 
    3267              : ! **************************************************************************************************
    3268              : !> \brief various methods for calculating the Harris-Foulkes correction
    3269              : !> \param almo_scf_env ...
    3270              : !> \par History
    3271              : !>       2011.06 created [Rustam Z Khaliullin]
    3272              : !> \author Rustam Z Khaliullin
    3273              : ! **************************************************************************************************
    3274            0 :    SUBROUTINE harris_foulkes_correction(almo_scf_env)
    3275              : 
    3276              :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    3277              : 
    3278              :       CHARACTER(len=*), PARAMETER :: routineN = 'harris_foulkes_correction'
    3279              :       INTEGER, PARAMETER                                 :: cayley_transform = 1, dm_ls_step = 2
    3280              : 
    3281              :       INTEGER :: algorithm_id, handle, handle1, handle2, handle3, handle4, handle5, handle6, &
    3282              :          handle7, handle8, ispin, iteration, n, nmins, nspin, opt_k_max_iter, &
    3283              :          outer_opt_k_iteration, outer_opt_k_max_iter, unit_nr
    3284              :       INTEGER, DIMENSION(1)                              :: fake, nelectron_spin_real
    3285              :       LOGICAL :: converged, line_search, md_in_k_space, outer_opt_k_prepare_to_exit, &
    3286              :          prepare_to_exit, reset_conjugator, reset_step_size, use_cubic_approximation, &
    3287              :          use_quadratic_approximation
    3288              :       REAL(KIND=dp) :: aa, bb, beta, conjugacy_error, conjugacy_error_threshold, &
    3289              :          delta_obj_function, denom, energy_correction_final, frob_matrix, frob_matrix_base, fun0, &
    3290              :          fun1, gfun0, gfun1, grad_norm, grad_norm_frob, kappa, kin_energy, line_search_error, &
    3291              :          line_search_error_threshold, num_threshold, numer, obj_function, quadratic_approx_error, &
    3292              :          quadratic_approx_error_threshold, safety_multiplier, spin_factor, step_size, &
    3293              :          step_size_quadratic_approx, step_size_quadratic_approx2, t1, t1a, t1cholesky, t2, t2a, &
    3294              :          t2cholesky, tau, time_step, x_opt_eps_adaptive, x_opt_eps_adaptive_factor
    3295              :       REAL(KIND=dp), DIMENSION(1)                        :: local_mu
    3296              :       REAL(KIND=dp), DIMENSION(2)                        :: energy_correction
    3297              :       REAL(KIND=dp), DIMENSION(3)                        :: minima
    3298              :       TYPE(cp_logger_type), POINTER                      :: logger
    3299              :       TYPE(ct_step_env_type)                             :: ct_step_env
    3300              :       TYPE(dbcsr_type) :: grad, k_vd_index_down, k_vr_index_down, matrix_k_central, matrix_tmp1, &
    3301              :          matrix_tmp2, prec, prev_grad, prev_minus_prec_grad, prev_step, sigma_oo_curr, &
    3302              :          sigma_oo_curr_inv, sigma_vv_sqrt, sigma_vv_sqrt_guess, sigma_vv_sqrt_inv, &
    3303              :          sigma_vv_sqrt_inv_guess, step, t_curr, tmp1_n_vr, tmp2_n_o, tmp3_vd_vr, tmp4_o_vr, &
    3304              :          tmp_k_blk, vd_fixed, vd_index_sqrt, vd_index_sqrt_inv, velocity, vr_fixed, vr_index_sqrt, &
    3305              :          vr_index_sqrt_inv
    3306            0 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: matrix_p_almo_scf_converged
    3307              : 
    3308            0 :       CALL timeset(routineN, handle)
    3309              : 
    3310              :       ! get a useful output_unit
    3311            0 :       logger => cp_get_default_logger()
    3312            0 :       IF (logger%para_env%is_source()) THEN
    3313            0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    3314              :       ELSE
    3315            0 :          unit_nr = -1
    3316              :       END IF
    3317              : 
    3318            0 :       nspin = almo_scf_env%nspins
    3319            0 :       energy_correction_final = 0.0_dp
    3320            0 :       IF (nspin == 1) THEN
    3321            0 :          spin_factor = 2.0_dp
    3322              :       ELSE
    3323            0 :          spin_factor = 1.0_dp
    3324              :       END IF
    3325              : 
    3326            0 :       IF (almo_scf_env%deloc_use_occ_orbs) THEN
    3327              :          algorithm_id = cayley_transform
    3328              :       ELSE
    3329            0 :          algorithm_id = dm_ls_step
    3330              :       END IF
    3331              : 
    3332            0 :       t1 = m_walltime()
    3333              : 
    3334            0 :       SELECT CASE (algorithm_id)
    3335              :       CASE (cayley_transform)
    3336              : 
    3337              :          ! rescale density matrix by spin factor
    3338              :          ! so the orbitals and density are consistent with each other
    3339            0 :          IF (almo_scf_env%nspins == 1) THEN
    3340            0 :             CALL dbcsr_scale(almo_scf_env%matrix_p(1), 1.0_dp/spin_factor)
    3341              :          END IF
    3342              : 
    3343              :          ! transform matrix_t not matrix_t_blk (we might need ALMOs later)
    3344            0 :          DO ispin = 1, nspin
    3345              : 
    3346              :             CALL dbcsr_copy(almo_scf_env%matrix_t(ispin), &
    3347            0 :                             almo_scf_env%matrix_t_blk(ispin))
    3348              : 
    3349              :             ! obtain orthogonalization matrices for ALMOs
    3350              :             ! RZK-warning - remove this sqrt(sigma) and inv(sqrt(sigma))
    3351              :             ! ideally ALMO scf should use sigma and sigma_inv in
    3352              :             ! the tensor_up_down representation
    3353              : 
    3354            0 :             IF (unit_nr > 0) THEN
    3355            0 :                WRITE (unit_nr, *) "sqrt and inv(sqrt) of MO overlap matrix"
    3356              :             END IF
    3357              :             CALL dbcsr_create(almo_scf_env%matrix_sigma_sqrt(ispin), &
    3358              :                               template=almo_scf_env%matrix_sigma(ispin), &
    3359            0 :                               matrix_type=dbcsr_type_no_symmetry)
    3360              :             CALL dbcsr_create(almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3361              :                               template=almo_scf_env%matrix_sigma(ispin), &
    3362            0 :                               matrix_type=dbcsr_type_no_symmetry)
    3363              : 
    3364              :             CALL matrix_sqrt_Newton_Schulz(almo_scf_env%matrix_sigma_sqrt(ispin), &
    3365              :                                            almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3366              :                                            almo_scf_env%matrix_sigma(ispin), &
    3367              :                                            threshold=almo_scf_env%eps_filter, &
    3368              :                                            order=almo_scf_env%order_lanczos, &
    3369              :                                            eps_lanczos=almo_scf_env%eps_lanczos, &
    3370            0 :                                            max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    3371              : 
    3372            0 :             IF (safe_mode) THEN
    3373              :                CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma(ispin), &
    3374              :                                  matrix_type=dbcsr_type_no_symmetry)
    3375              :                CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_sigma(ispin), &
    3376              :                                  matrix_type=dbcsr_type_no_symmetry)
    3377              : 
    3378              :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3379              :                                    almo_scf_env%matrix_sigma(ispin), &
    3380              :                                    0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3381              :                CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    3382              :                                    almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3383              :                                    0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    3384              : 
    3385              :                frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    3386              :                CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    3387              :                frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    3388              :                IF (unit_nr > 0) THEN
    3389              :                   WRITE (unit_nr, *) "Error for (inv(sqrt(SIG))*SIG*inv(sqrt(SIG))-I)", frob_matrix/frob_matrix_base
    3390              :                END IF
    3391              : 
    3392              :                CALL dbcsr_release(matrix_tmp1)
    3393              :                CALL dbcsr_release(matrix_tmp2)
    3394              :             END IF
    3395              :          END DO
    3396              : 
    3397            0 :          IF (almo_scf_env%almo_update_algorithm == almo_scf_diag) THEN
    3398              : 
    3399            0 :             DO ispin = 1, nspin
    3400              : 
    3401            0 :                t1a = m_walltime()
    3402              : 
    3403            0 :                line_search_error_threshold = almo_scf_env%real01
    3404            0 :                conjugacy_error_threshold = almo_scf_env%real02
    3405            0 :                quadratic_approx_error_threshold = almo_scf_env%real03
    3406            0 :                x_opt_eps_adaptive_factor = almo_scf_env%real04
    3407              : 
    3408              :                !! the outer loop for k optimization
    3409            0 :                outer_opt_k_max_iter = almo_scf_env%opt_k_outer_max_iter
    3410            0 :                outer_opt_k_prepare_to_exit = .FALSE.
    3411            0 :                outer_opt_k_iteration = 0
    3412            0 :                grad_norm = 0.0_dp
    3413            0 :                grad_norm_frob = 0.0_dp
    3414            0 :                CALL dbcsr_set(almo_scf_env%matrix_x(ispin), 0.0_dp)
    3415            0 :                IF (almo_scf_env%deloc_truncate_virt == virt_full) outer_opt_k_max_iter = 0
    3416              : 
    3417            0 :                DO
    3418              : 
    3419              :                   ! obtain proper retained virtuals (1-R)|ALMO_vr>
    3420              :                   CALL apply_projector(psi_in=almo_scf_env%matrix_v_blk(ispin), &
    3421              :                                        psi_out=almo_scf_env%matrix_v(ispin), &
    3422              :                                        psi_projector=almo_scf_env%matrix_t_blk(ispin), &
    3423              :                                        metric=almo_scf_env%matrix_s(1), &
    3424              :                                        project_out=.TRUE., &
    3425              :                                        psi_projector_orthogonal=.FALSE., &
    3426              :                                        proj_in_template=almo_scf_env%matrix_ov(ispin), &
    3427              :                                        eps_filter=almo_scf_env%eps_filter, &
    3428            0 :                                        sig_inv_projector=almo_scf_env%matrix_sigma_inv(ispin))
    3429              :                   !sig_inv_template=almo_scf_env%matrix_sigma_inv(ispin),&
    3430              : 
    3431              :                   ! save initial retained virtuals
    3432              :                   CALL dbcsr_create(vr_fixed, &
    3433            0 :                                     template=almo_scf_env%matrix_v(ispin))
    3434            0 :                   CALL dbcsr_copy(vr_fixed, almo_scf_env%matrix_v(ispin))
    3435              : 
    3436              :                   ! init matrices common for optimized and non-optimized virts
    3437              :                   CALL dbcsr_create(sigma_vv_sqrt, &
    3438              :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3439            0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3440              :                   CALL dbcsr_create(sigma_vv_sqrt_inv, &
    3441              :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3442            0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3443              :                   CALL dbcsr_create(sigma_vv_sqrt_inv_guess, &
    3444              :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3445            0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3446              :                   CALL dbcsr_create(sigma_vv_sqrt_guess, &
    3447              :                                     template=almo_scf_env%matrix_sigma_vv(ispin), &
    3448            0 :                                     matrix_type=dbcsr_type_no_symmetry)
    3449            0 :                   CALL dbcsr_set(sigma_vv_sqrt_guess, 0.0_dp)
    3450            0 :                   CALL dbcsr_add_on_diag(sigma_vv_sqrt_guess, 1.0_dp)
    3451            0 :                   CALL dbcsr_filter(sigma_vv_sqrt_guess, almo_scf_env%eps_filter)
    3452            0 :                   CALL dbcsr_set(sigma_vv_sqrt_inv_guess, 0.0_dp)
    3453            0 :                   CALL dbcsr_add_on_diag(sigma_vv_sqrt_inv_guess, 1.0_dp)
    3454            0 :                   CALL dbcsr_filter(sigma_vv_sqrt_inv_guess, almo_scf_env%eps_filter)
    3455              : 
    3456              :                   ! do things required to optimize virtuals
    3457            0 :                   IF (almo_scf_env%deloc_truncate_virt /= virt_full) THEN
    3458              : 
    3459              :                      ! project retained virtuals out of discarded block-by-block
    3460              :                      ! (1-Q^VR_ALMO)|ALMO_vd>
    3461              :                      ! this is probably not necessary, do it just to be safe
    3462              :                      !CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin),&
    3463              :                      !        psi_out=almo_scf_env%matrix_v_disc(ispin),&
    3464              :                      !        psi_projector=almo_scf_env%matrix_v_blk(ispin),&
    3465              :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3466              :                      !        project_out=.TRUE.,&
    3467              :                      !        psi_projector_orthogonal=.FALSE.,&
    3468              :                      !        proj_in_template=almo_scf_env%matrix_k_tr(ispin),&
    3469              :                      !        eps_filter=almo_scf_env%eps_filter,&
    3470              :                      !        sig_inv_template=almo_scf_env%matrix_sigma_vv(ispin))
    3471              :                      !CALL dbcsr_copy(almo_scf_env%matrix_v_disc_blk(ispin),&
    3472              :                      !        almo_scf_env%matrix_v_disc(ispin))
    3473              : 
    3474              :                      ! construct discarded virtuals (1-R)|ALMO_vd>
    3475              :                      CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin), &
    3476              :                                           psi_out=almo_scf_env%matrix_v_disc(ispin), &
    3477              :                                           psi_projector=almo_scf_env%matrix_t_blk(ispin), &
    3478              :                                           metric=almo_scf_env%matrix_s(1), &
    3479              :                                           project_out=.TRUE., &
    3480              :                                           psi_projector_orthogonal=.FALSE., &
    3481              :                                           proj_in_template=almo_scf_env%matrix_ov_disc(ispin), &
    3482              :                                           eps_filter=almo_scf_env%eps_filter, &
    3483            0 :                                           sig_inv_projector=almo_scf_env%matrix_sigma_inv(ispin))
    3484              :                      !sig_inv_template=almo_scf_env%matrix_sigma_inv(ispin),&
    3485              : 
    3486              :                      ! save initial discarded
    3487              :                      CALL dbcsr_create(vd_fixed, &
    3488            0 :                                        template=almo_scf_env%matrix_v_disc(ispin))
    3489            0 :                      CALL dbcsr_copy(vd_fixed, almo_scf_env%matrix_v_disc(ispin))
    3490              : 
    3491              :                      !! create the down metric in the retained k-subspace
    3492              :                      CALL dbcsr_create(k_vr_index_down, &
    3493              :                                        template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    3494            0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3495              :                      !CALL dbcsr_copy(k_vr_index_down,&
    3496              :                      !        almo_scf_env%matrix_sigma_vv_blk(ispin))
    3497              : 
    3498              :                      !CALL get_overlap(bra=almo_scf_env%matrix_v_blk(ispin),&
    3499              :                      !        ket=almo_scf_env%matrix_v_blk(ispin),&
    3500              :                      !        overlap=k_vr_index_down,&
    3501              :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3502              :                      !        retain_overlap_sparsity=.FALSE.,&
    3503              :                      !        eps_filter=almo_scf_env%eps_filter)
    3504              : 
    3505              :                      !! create the up metric in the discarded k-subspace
    3506              :                      CALL dbcsr_create(k_vd_index_down, &
    3507              :                                        template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    3508            0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3509              :                      !CALL dbcsr_init(k_vd_index_up)
    3510              :                      !CALL dbcsr_create(k_vd_index_up,&
    3511              :                      !        template=almo_scf_env%matrix_vv_disc_blk(ispin),&
    3512              :                      !        matrix_type=dbcsr_type_no_symmetry)
    3513              :                      !CALL dbcsr_copy(k_vd_index_down,&
    3514              :                      !        almo_scf_env%matrix_vv_disc_blk(ispin))
    3515              : 
    3516              :                      !CALL get_overlap(bra=almo_scf_env%matrix_v_disc_blk(ispin),&
    3517              :                      !        ket=almo_scf_env%matrix_v_disc_blk(ispin),&
    3518              :                      !        overlap=k_vd_index_down,&
    3519              :                      !        metric=almo_scf_env%matrix_s_blk(1),&
    3520              :                      !        retain_overlap_sparsity=.FALSE.,&
    3521              :                      !        eps_filter=almo_scf_env%eps_filter)
    3522              : 
    3523              :                      !IF (unit_nr>0) THEN
    3524              :                      !   WRITE(unit_nr,*) "Inverting blocked overlap matrix of discarded virtuals"
    3525              :                      !ENDIF
    3526              :                      !CALL invert_Hotelling(k_vd_index_up,&
    3527              :                      !        k_vd_index_down,&
    3528              :                      !        almo_scf_env%eps_filter)
    3529              :                      !IF (safe_mode) THEN
    3530              :                      !   CALL dbcsr_init(matrix_tmp1)
    3531              :                      !   CALL dbcsr_create(matrix_tmp1,template=k_vd_index_down,&
    3532              :                      !                        matrix_type=dbcsr_type_no_symmetry)
    3533              :                      !   CALL dbcsr_multiply("N","N",1.0_dp,k_vd_index_up,&
    3534              :                      !                          k_vd_index_down,&
    3535              :                      !                          0.0_dp, matrix_tmp1,&
    3536              :                      !                          filter_eps=almo_scf_env%eps_filter)
    3537              :                      !   frob_matrix_base=dbcsr_frobenius_norm(matrix_tmp1)
    3538              :                      !   CALL dbcsr_add_on_diag(matrix_tmp1,-1.0_dp)
    3539              :                      !   frob_matrix=dbcsr_frobenius_norm(matrix_tmp1)
    3540              :                      !   IF (unit_nr>0) THEN
    3541              :                      !      WRITE(unit_nr,*) "Error for (inv(SIG)*SIG-I)",&
    3542              :                      !            frob_matrix/frob_matrix_base
    3543              :                      !   ENDIF
    3544              :                      !   CALL dbcsr_release(matrix_tmp1)
    3545              :                      !ENDIF
    3546              : 
    3547              :                      ! init matrices necessary for optimization of truncated virts
    3548              :                      ! init blocked gradient before setting K to zero
    3549              :                      ! otherwise the block structure might be lost
    3550              :                      CALL dbcsr_create(grad, &
    3551            0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3552            0 :                      CALL dbcsr_copy(grad, almo_scf_env%matrix_k_blk(ispin))
    3553              : 
    3554              :                      ! init MD in the k-space
    3555            0 :                      md_in_k_space = almo_scf_env%logical01
    3556            0 :                      IF (md_in_k_space) THEN
    3557              :                         CALL dbcsr_create(velocity, &
    3558            0 :                                           template=almo_scf_env%matrix_k_blk(ispin))
    3559            0 :                         CALL dbcsr_copy(velocity, almo_scf_env%matrix_k_blk(ispin))
    3560            0 :                         CALL dbcsr_set(velocity, 0.0_dp)
    3561            0 :                         time_step = almo_scf_env%opt_k_trial_step_size
    3562              :                      END IF
    3563              : 
    3564              :                      CALL dbcsr_create(prev_step, &
    3565            0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3566              : 
    3567              :                      CALL dbcsr_create(prev_minus_prec_grad, &
    3568            0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3569              : 
    3570              :                      ! initialize diagonal blocks of the preconditioner to 1.0_dp
    3571              :                      CALL dbcsr_create(prec, &
    3572            0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3573            0 :                      CALL dbcsr_copy(prec, almo_scf_env%matrix_k_blk(ispin))
    3574            0 :                      CALL dbcsr_set(prec, 1.0_dp)
    3575              : 
    3576              :                      ! generate initial K (extrapolate if previous values are available)
    3577            0 :                      CALL dbcsr_set(almo_scf_env%matrix_k_blk(ispin), 0.0_dp)
    3578              :                      ! matrix_k_central stores current k because matrix_k_blk is updated
    3579              :                      ! during linear search
    3580              :                      CALL dbcsr_create(matrix_k_central, &
    3581            0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3582              :                      CALL dbcsr_copy(matrix_k_central, &
    3583            0 :                                      almo_scf_env%matrix_k_blk(ispin))
    3584              :                      CALL dbcsr_create(tmp_k_blk, &
    3585            0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3586              :                      CALL dbcsr_create(step, &
    3587            0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3588            0 :                      CALL dbcsr_set(step, 0.0_dp)
    3589              :                      CALL dbcsr_create(t_curr, &
    3590            0 :                                        template=almo_scf_env%matrix_t(ispin))
    3591              :                      CALL dbcsr_create(sigma_oo_curr, &
    3592              :                                        template=almo_scf_env%matrix_sigma(ispin), &
    3593            0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3594              :                      CALL dbcsr_create(sigma_oo_curr_inv, &
    3595              :                                        template=almo_scf_env%matrix_sigma(ispin), &
    3596            0 :                                        matrix_type=dbcsr_type_no_symmetry)
    3597              :                      CALL dbcsr_create(tmp1_n_vr, &
    3598            0 :                                        template=almo_scf_env%matrix_v(ispin))
    3599              :                      CALL dbcsr_create(tmp3_vd_vr, &
    3600            0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3601              :                      CALL dbcsr_create(tmp2_n_o, &
    3602            0 :                                        template=almo_scf_env%matrix_t(ispin))
    3603              :                      CALL dbcsr_create(tmp4_o_vr, &
    3604            0 :                                        template=almo_scf_env%matrix_ov(ispin))
    3605              :                      CALL dbcsr_create(prev_grad, &
    3606            0 :                                        template=almo_scf_env%matrix_k_blk(ispin))
    3607            0 :                      CALL dbcsr_set(prev_grad, 0.0_dp)
    3608              : 
    3609              :                      !CALL dbcsr_init(sigma_oo_guess)
    3610              :                      !CALL dbcsr_create(sigma_oo_guess,&
    3611              :                      !        template=almo_scf_env%matrix_sigma(ispin),&
    3612              :                      !        matrix_type=dbcsr_type_no_symmetry)
    3613              :                      !CALL dbcsr_set(sigma_oo_guess,0.0_dp)
    3614              :                      !CALL dbcsr_add_on_diag(sigma_oo_guess,1.0_dp)
    3615              :                      !CALL dbcsr_filter(sigma_oo_guess,almo_scf_env%eps_filter)
    3616              :                      !CALL dbcsr_print(sigma_oo_guess)
    3617              : 
    3618              :                   END IF ! done constructing discarded virtuals
    3619              : 
    3620              :                   ! init variables
    3621            0 :                   opt_k_max_iter = almo_scf_env%opt_k_max_iter
    3622            0 :                   iteration = 0
    3623            0 :                   converged = .FALSE.
    3624            0 :                   prepare_to_exit = .FALSE.
    3625            0 :                   beta = 0.0_dp
    3626            0 :                   line_search = .FALSE.
    3627            0 :                   obj_function = 0.0_dp
    3628            0 :                   conjugacy_error = 0.0_dp
    3629            0 :                   line_search_error = 0.0_dp
    3630            0 :                   fun0 = 0.0_dp
    3631            0 :                   fun1 = 0.0_dp
    3632            0 :                   gfun0 = 0.0_dp
    3633            0 :                   gfun1 = 0.0_dp
    3634            0 :                   step_size_quadratic_approx = 0.0_dp
    3635            0 :                   reset_step_size = .TRUE.
    3636            0 :                   IF (almo_scf_env%deloc_truncate_virt == virt_full) opt_k_max_iter = 0
    3637              : 
    3638              :                   ! start cg iterations to optimize matrix_k_blk
    3639            0 :                   DO
    3640              : 
    3641            0 :                      CALL timeset('k_opt_vr', handle1)
    3642              : 
    3643            0 :                      IF (almo_scf_env%deloc_truncate_virt /= virt_full) THEN
    3644              : 
    3645              :                         ! construct k-excited virtuals
    3646              :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vd_fixed, &
    3647              :                                             almo_scf_env%matrix_k_blk(ispin), &
    3648              :                                             0.0_dp, almo_scf_env%matrix_v(ispin), &
    3649            0 :                                             filter_eps=almo_scf_env%eps_filter)
    3650              :                         CALL dbcsr_add(almo_scf_env%matrix_v(ispin), vr_fixed, &
    3651            0 :                                        +1.0_dp, +1.0_dp)
    3652              :                      END IF
    3653              : 
    3654              :                      ! decompose the overlap matrix of the current retained orbitals
    3655              :                      !IF (unit_nr>0) THEN
    3656              :                      !   WRITE(unit_nr,*) "decompose the active VV overlap matrix"
    3657              :                      !ENDIF
    3658              :                      CALL get_overlap(bra=almo_scf_env%matrix_v(ispin), &
    3659              :                                       ket=almo_scf_env%matrix_v(ispin), &
    3660              :                                       overlap=almo_scf_env%matrix_sigma_vv(ispin), &
    3661              :                                       metric=almo_scf_env%matrix_s(1), &
    3662              :                                       retain_overlap_sparsity=.FALSE., &
    3663            0 :                                       eps_filter=almo_scf_env%eps_filter)
    3664              :                      ! use either cholesky or sqrt
    3665              :                      !! RZK-warning: strangely, cholesky does not work with k-optimization
    3666            0 :                      IF (almo_scf_env%deloc_truncate_virt == virt_full) THEN
    3667            0 :                         CALL timeset('cholesky', handle2)
    3668            0 :                         t1cholesky = m_walltime()
    3669              : 
    3670              :                         ! re-create sigma_vv_sqrt because desymmetrize is buggy -
    3671              :                         ! it will create multiple copies of blocks
    3672              :                         CALL dbcsr_create(sigma_vv_sqrt, &
    3673              :                                           template=almo_scf_env%matrix_sigma_vv(ispin), &
    3674            0 :                                           matrix_type=dbcsr_type_no_symmetry)
    3675              :                         CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma_vv(ispin), &
    3676            0 :                                                 sigma_vv_sqrt)
    3677              :                         CALL cp_dbcsr_cholesky_decompose(sigma_vv_sqrt, &
    3678              :                                                          para_env=almo_scf_env%para_env, &
    3679            0 :                                                          blacs_env=almo_scf_env%blacs_env)
    3680            0 :                         CALL make_triu(sigma_vv_sqrt)
    3681            0 :                         CALL dbcsr_filter(sigma_vv_sqrt, almo_scf_env%eps_filter)
    3682              :                         ! apply SOLVE to compute U^(-1) : U*U^(-1)=I
    3683            0 :                         CALL dbcsr_get_info(sigma_vv_sqrt, nfullrows_total=n)
    3684              :                         CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3685            0 :                                           matrix_type=dbcsr_type_no_symmetry)
    3686            0 :                         CALL dbcsr_set(matrix_tmp1, 0.0_dp)
    3687            0 :                         CALL dbcsr_add_on_diag(matrix_tmp1, 1.0_dp)
    3688              :                         CALL cp_dbcsr_cholesky_restore(matrix_tmp1, n, sigma_vv_sqrt, &
    3689              :                                                        sigma_vv_sqrt_inv, op="SOLVE", pos="RIGHT", &
    3690              :                                                        para_env=almo_scf_env%para_env, &
    3691            0 :                                                        blacs_env=almo_scf_env%blacs_env)
    3692            0 :                         CALL dbcsr_filter(sigma_vv_sqrt_inv, almo_scf_env%eps_filter)
    3693            0 :                         CALL dbcsr_release(matrix_tmp1)
    3694              :                         IF (safe_mode) THEN
    3695              :                            CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3696              :                                              matrix_type=dbcsr_type_no_symmetry)
    3697              :                            CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma_vv(ispin), &
    3698              :                                                    matrix_tmp1)
    3699              :                            CALL dbcsr_multiply("T", "N", 1.0_dp, sigma_vv_sqrt, &
    3700              :                                                sigma_vv_sqrt, &
    3701              :                                                -1.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3702              :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3703              :                            CALL dbcsr_add_on_diag(matrix_tmp1, 1.0_dp)
    3704              :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3705              :                            IF (unit_nr > 0) THEN
    3706              :                               WRITE (unit_nr, *) "Error for ( U^T * U - Sig )", &
    3707              :                                  frob_matrix/frob_matrix_base
    3708              :                            END IF
    3709              :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_vv_sqrt_inv, &
    3710              :                                                sigma_vv_sqrt, &
    3711              :                                                0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3712              :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3713              :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3714              :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3715              :                            IF (unit_nr > 0) THEN
    3716              :                               WRITE (unit_nr, *) "Error for ( inv(U) * U - I )", &
    3717              :                                  frob_matrix/frob_matrix_base
    3718              :                            END IF
    3719              :                            CALL dbcsr_release(matrix_tmp1)
    3720              :                         END IF ! safe_mode
    3721            0 :                         t2cholesky = m_walltime()
    3722            0 :                         IF (unit_nr > 0) THEN
    3723            0 :                            WRITE (unit_nr, *) "Cholesky+inverse wall-time: ", t2cholesky - t1cholesky
    3724              :                         END IF
    3725            0 :                         CALL timestop(handle2)
    3726              :                      ELSE
    3727              :                         CALL matrix_sqrt_Newton_Schulz(sigma_vv_sqrt, &
    3728              :                                                        sigma_vv_sqrt_inv, &
    3729              :                                                        almo_scf_env%matrix_sigma_vv(ispin), &
    3730              :                                                        !matrix_sqrt_inv_guess=sigma_vv_sqrt_inv_guess,&
    3731              :                                                        !matrix_sqrt_guess=sigma_vv_sqrt_guess,&
    3732              :                                                        threshold=almo_scf_env%eps_filter, &
    3733              :                                                        order=almo_scf_env%order_lanczos, &
    3734              :                                                        eps_lanczos=almo_scf_env%eps_lanczos, &
    3735            0 :                                                        max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    3736            0 :                         CALL dbcsr_copy(sigma_vv_sqrt_inv_guess, sigma_vv_sqrt_inv)
    3737            0 :                         CALL dbcsr_copy(sigma_vv_sqrt_guess, sigma_vv_sqrt)
    3738              :                         IF (safe_mode) THEN
    3739              :                            CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3740              :                                              matrix_type=dbcsr_type_no_symmetry)
    3741              :                            CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_sigma_vv(ispin), &
    3742              :                                              matrix_type=dbcsr_type_no_symmetry)
    3743              : 
    3744              :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_vv_sqrt_inv, &
    3745              :                                                almo_scf_env%matrix_sigma_vv(ispin), &
    3746              :                                                0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    3747              :                            CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    3748              :                                                sigma_vv_sqrt_inv, &
    3749              :                                                0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    3750              : 
    3751              :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    3752              :                            CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    3753              :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    3754              :                            IF (unit_nr > 0) THEN
    3755              :                               WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    3756              :                                  frob_matrix/frob_matrix_base
    3757              :                            END IF
    3758              : 
    3759              :                            CALL dbcsr_release(matrix_tmp1)
    3760              :                            CALL dbcsr_release(matrix_tmp2)
    3761              :                         END IF
    3762              :                      END IF
    3763            0 :                      CALL timestop(handle1)
    3764              : 
    3765              :                      ! compute excitation amplitudes (to the current set of retained virtuals)
    3766              :                      ! set convergence criterion for x-optimization
    3767            0 :                      IF ((iteration == 0) .AND. (.NOT. line_search) .AND. &
    3768              :                          (outer_opt_k_iteration == 0)) THEN
    3769              :                         x_opt_eps_adaptive = &
    3770            0 :                            almo_scf_env%deloc_cayley_eps_convergence
    3771              :                      ELSE
    3772              :                         x_opt_eps_adaptive = &
    3773              :                            MAX(ABS(almo_scf_env%deloc_cayley_eps_convergence), &
    3774            0 :                                ABS(x_opt_eps_adaptive_factor*grad_norm))
    3775              :                      END IF
    3776            0 :                      CALL ct_step_env_init(ct_step_env)
    3777              :                      CALL ct_step_env_set(ct_step_env, &
    3778              :                                           para_env=almo_scf_env%para_env, &
    3779              :                                           blacs_env=almo_scf_env%blacs_env, &
    3780              :                                           use_occ_orbs=.TRUE., &
    3781              :                                           use_virt_orbs=.TRUE., &
    3782              :                                           occ_orbs_orthogonal=.FALSE., &
    3783              :                                           virt_orbs_orthogonal=.FALSE., &
    3784              :                                           pp_preconditioner_full=almo_scf_env%deloc_cayley_occ_precond, &
    3785              :                                           qq_preconditioner_full=almo_scf_env%deloc_cayley_vir_precond, &
    3786              :                                           tensor_type=almo_scf_env%deloc_cayley_tensor_type, &
    3787              :                                           neglect_quadratic_term=almo_scf_env%deloc_cayley_linear, &
    3788              :                                           conjugator=almo_scf_env%deloc_cayley_conjugator, &
    3789              :                                           max_iter=almo_scf_env%deloc_cayley_max_iter, &
    3790              :                                           calculate_energy_corr=.TRUE., &
    3791              :                                           update_p=.FALSE., &
    3792              :                                           update_q=.FALSE., &
    3793              :                                           eps_convergence=x_opt_eps_adaptive, &
    3794              :                                           eps_filter=almo_scf_env%eps_filter, &
    3795              :                                           !nspins=1,&
    3796              :                                           q_index_up=sigma_vv_sqrt_inv, &
    3797              :                                           q_index_down=sigma_vv_sqrt, &
    3798              :                                           p_index_up=almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    3799              :                                           p_index_down=almo_scf_env%matrix_sigma_sqrt(ispin), &
    3800              :                                           matrix_ks=almo_scf_env%matrix_ks_0deloc(ispin), &
    3801              :                                           matrix_t=almo_scf_env%matrix_t(ispin), &
    3802              :                                           matrix_qp_template=almo_scf_env%matrix_vo(ispin), &
    3803              :                                           matrix_pq_template=almo_scf_env%matrix_ov(ispin), &
    3804              :                                           matrix_v=almo_scf_env%matrix_v(ispin), &
    3805            0 :                                           matrix_x_guess=almo_scf_env%matrix_x(ispin))
    3806              :                      ! perform calculations
    3807            0 :                      CALL ct_step_execute(ct_step_env)
    3808              :                      ! get the energy correction
    3809              :                      CALL ct_step_env_get(ct_step_env, &
    3810              :                                           energy_correction=energy_correction(ispin), &
    3811            0 :                                           copy_matrix_x=almo_scf_env%matrix_x(ispin))
    3812            0 :                      CALL ct_step_env_clean(ct_step_env)
    3813              :                      ! RZK-warning matrix_x is being transformed
    3814              :                      ! back and forth between orth and up_down representations
    3815            0 :                      energy_correction(1) = energy_correction(1)*spin_factor
    3816              : 
    3817            0 :                      IF (opt_k_max_iter /= 0) THEN
    3818              : 
    3819            0 :                         CALL timeset('k_opt_t_curr', handle3)
    3820              : 
    3821              :                         ! construct current occupied orbitals T_blk + V_r*X
    3822              :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3823              :                                             almo_scf_env%matrix_v(ispin), &
    3824              :                                             almo_scf_env%matrix_x(ispin), &
    3825              :                                             0.0_dp, t_curr, &
    3826            0 :                                             filter_eps=almo_scf_env%eps_filter)
    3827              :                         CALL dbcsr_add(t_curr, almo_scf_env%matrix_t_blk(ispin), &
    3828            0 :                                        +1.0_dp, +1.0_dp)
    3829              : 
    3830              :                         ! calculate current occupied overlap
    3831              :                         !IF (unit_nr>0) THEN
    3832              :                         !   WRITE(unit_nr,*) "Inverting current occ overlap matrix"
    3833              :                         !ENDIF
    3834              :                         CALL get_overlap(bra=t_curr, &
    3835              :                                          ket=t_curr, &
    3836              :                                          overlap=sigma_oo_curr, &
    3837              :                                          metric=almo_scf_env%matrix_s(1), &
    3838              :                                          retain_overlap_sparsity=.FALSE., &
    3839            0 :                                          eps_filter=almo_scf_env%eps_filter)
    3840            0 :                         IF (iteration == 0) THEN
    3841              :                            CALL invert_Hotelling(sigma_oo_curr_inv, &
    3842              :                                                  sigma_oo_curr, &
    3843              :                                                  threshold=almo_scf_env%eps_filter, &
    3844            0 :                                                  use_inv_as_guess=.FALSE.)
    3845              :                         ELSE
    3846              :                            CALL invert_Hotelling(sigma_oo_curr_inv, &
    3847              :                                                  sigma_oo_curr, &
    3848              :                                                  threshold=almo_scf_env%eps_filter, &
    3849            0 :                                                  use_inv_as_guess=.TRUE.)
    3850              :                            !CALL dbcsr_copy(sigma_oo_guess,sigma_oo_curr_inv)
    3851              :                         END IF
    3852              :                         IF (safe_mode) THEN
    3853              :                            CALL dbcsr_create(matrix_tmp1, template=sigma_oo_curr, &
    3854              :                                              matrix_type=dbcsr_type_no_symmetry)
    3855              :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_oo_curr, &
    3856              :                                                sigma_oo_curr_inv, &
    3857              :                                                0.0_dp, matrix_tmp1, &
    3858              :                                                filter_eps=almo_scf_env%eps_filter)
    3859              :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3860              :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3861              :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3862              :                            !CALL dbcsr_filter(matrix_tmp1,almo_scf_env%eps_filter)
    3863              :                            !CALL dbcsr_print(matrix_tmp1)
    3864              :                            IF (unit_nr > 0) THEN
    3865              :                               WRITE (unit_nr, *) "Error for (SIG*inv(SIG)-I)", &
    3866              :                                  frob_matrix/frob_matrix_base, frob_matrix_base
    3867              :                            END IF
    3868              :                            CALL dbcsr_release(matrix_tmp1)
    3869              :                         END IF
    3870              :                         IF (safe_mode) THEN
    3871              :                            CALL dbcsr_create(matrix_tmp1, template=sigma_oo_curr, &
    3872              :                                              matrix_type=dbcsr_type_no_symmetry)
    3873              :                            CALL dbcsr_multiply("N", "N", 1.0_dp, sigma_oo_curr_inv, &
    3874              :                                                sigma_oo_curr, &
    3875              :                                                0.0_dp, matrix_tmp1, &
    3876              :                                                filter_eps=almo_scf_env%eps_filter)
    3877              :                            frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    3878              :                            CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    3879              :                            frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    3880              :                            !CALL dbcsr_filter(matrix_tmp1,almo_scf_env%eps_filter)
    3881              :                            !CALL dbcsr_print(matrix_tmp1)
    3882              :                            IF (unit_nr > 0) THEN
    3883              :                               WRITE (unit_nr, *) "Error for (inv(SIG)*SIG-I)", &
    3884              :                                  frob_matrix/frob_matrix_base, frob_matrix_base
    3885              :                            END IF
    3886              :                            CALL dbcsr_release(matrix_tmp1)
    3887              :                         END IF
    3888              : 
    3889            0 :                         CALL timestop(handle3)
    3890            0 :                         CALL timeset('k_opt_vd', handle4)
    3891              : 
    3892              :                         ! construct current discarded virtuals:
    3893              :                         ! (1-R_curr)(1-Q^VR_curr)|ALMO_vd_basis> =
    3894              :                         ! = (1-Q^VR_curr)|ALMO_vd_basis>
    3895              :                         ! use sigma_vv_sqrt to store the inverse of the overlap
    3896              :                         ! sigma_vv_inv is computed from sqrt/cholesky
    3897              :                         CALL dbcsr_multiply("N", "T", 1.0_dp, &
    3898              :                                             sigma_vv_sqrt_inv, &
    3899              :                                             sigma_vv_sqrt_inv, &
    3900              :                                             0.0_dp, sigma_vv_sqrt, &
    3901            0 :                                             filter_eps=almo_scf_env%eps_filter)
    3902              :                         CALL apply_projector(psi_in=almo_scf_env%matrix_v_disc_blk(ispin), &
    3903              :                                              psi_out=almo_scf_env%matrix_v_disc(ispin), &
    3904              :                                              psi_projector=almo_scf_env%matrix_v(ispin), &
    3905              :                                              metric=almo_scf_env%matrix_s(1), &
    3906              :                                              project_out=.FALSE., &
    3907              :                                              psi_projector_orthogonal=.FALSE., &
    3908              :                                              proj_in_template=almo_scf_env%matrix_k_tr(ispin), &
    3909              :                                              eps_filter=almo_scf_env%eps_filter, &
    3910            0 :                                              sig_inv_projector=sigma_vv_sqrt)
    3911              :                         !sig_inv_template=almo_scf_env%matrix_sigma_vv(ispin),&
    3912              :                         CALL dbcsr_add(almo_scf_env%matrix_v_disc(ispin), &
    3913            0 :                                        vd_fixed, -1.0_dp, +1.0_dp)
    3914              : 
    3915            0 :                         CALL timestop(handle4)
    3916            0 :                         CALL timeset('k_opt_grad', handle5)
    3917              : 
    3918              :                         ! evaluate the gradient from the assembled components
    3919              :                         ! grad_xx = c0 [ (Vd_curr^tr)*F*T_curr*sigma_oo_curr_inv*(X^tr)]_xx
    3920              :                         ! save previous gradient to calculate conjugation coef
    3921            0 :                         IF (line_search) THEN
    3922            0 :                            CALL dbcsr_copy(prev_grad, grad)
    3923              :                         END IF
    3924              :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3925              :                                             almo_scf_env%matrix_ks_0deloc(ispin), &
    3926              :                                             t_curr, &
    3927              :                                             0.0_dp, tmp2_n_o, &
    3928            0 :                                             filter_eps=almo_scf_env%eps_filter)
    3929              :                         CALL dbcsr_multiply("N", "T", 1.0_dp, &
    3930              :                                             sigma_oo_curr_inv, &
    3931              :                                             almo_scf_env%matrix_x(ispin), &
    3932              :                                             0.0_dp, tmp4_o_vr, &
    3933            0 :                                             filter_eps=almo_scf_env%eps_filter)
    3934              :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    3935              :                                             tmp2_n_o, &
    3936              :                                             tmp4_o_vr, &
    3937              :                                             0.0_dp, tmp1_n_vr, &
    3938            0 :                                             filter_eps=almo_scf_env%eps_filter)
    3939              :                         CALL dbcsr_multiply("T", "N", 2.0_dp*spin_factor, &
    3940              :                                             almo_scf_env%matrix_v_disc(ispin), &
    3941              :                                             tmp1_n_vr, &
    3942              :                                             0.0_dp, grad, &
    3943            0 :                                             retain_sparsity=.TRUE.)
    3944              :                         !filter_eps=almo_scf_env%eps_filter,&
    3945              :                         ! keep tmp2_n_o for the next step
    3946              :                         ! keep tmp4_o_vr for the preconditioner
    3947              : 
    3948              :                         ! check convergence and other exit criteria
    3949            0 :                         grad_norm_frob = dbcsr_frobenius_norm(grad)
    3950            0 :                         grad_norm = dbcsr_maxabs(grad)
    3951            0 :                         converged = (grad_norm < almo_scf_env%opt_k_eps_convergence)
    3952            0 :                         IF (converged .OR. (iteration >= opt_k_max_iter)) THEN
    3953            0 :                            prepare_to_exit = .TRUE.
    3954              :                         END IF
    3955            0 :                         CALL timestop(handle5)
    3956              : 
    3957            0 :                         IF (.NOT. prepare_to_exit) THEN
    3958              : 
    3959            0 :                            CALL timeset('k_opt_energy', handle6)
    3960              : 
    3961              :                            ! compute "energy" c0*Tr[sig_inv_oo*t*F*t]
    3962              :                            CALL dbcsr_multiply("T", "N", spin_factor, &
    3963              :                                                t_curr, &
    3964              :                                                tmp2_n_o, &
    3965              :                                                0.0_dp, sigma_oo_curr, &
    3966            0 :                                                filter_eps=almo_scf_env%eps_filter)
    3967              :                            delta_obj_function = fun0
    3968            0 :                            CALL dbcsr_dot(sigma_oo_curr_inv, sigma_oo_curr, obj_function)
    3969            0 :                            delta_obj_function = obj_function - delta_obj_function
    3970            0 :                            IF (line_search) THEN
    3971              :                               fun1 = obj_function
    3972              :                            ELSE
    3973            0 :                               fun0 = obj_function
    3974              :                            END IF
    3975              : 
    3976            0 :                            CALL timestop(handle6)
    3977              : 
    3978              :                            ! update the step direction
    3979            0 :                            IF (.NOT. line_search) THEN
    3980              : 
    3981            0 :                               CALL timeset('k_opt_step', handle7)
    3982              : 
    3983            0 :                               IF ((.NOT. md_in_k_space) .AND. &
    3984              :                                   (iteration >= MAX(0, almo_scf_env%opt_k_prec_iter_start) .AND. &
    3985              :                                    MOD(iteration - almo_scf_env%opt_k_prec_iter_start, &
    3986              :                                        almo_scf_env%opt_k_prec_iter_freq) == 0)) THEN
    3987              : 
    3988              :                                  !IF ((iteration.eq.0).AND.(.NOT.md_in_k_space)) THEN
    3989              : 
    3990              :                                  ! compute the preconditioner
    3991            0 :                                  IF (unit_nr > 0) THEN
    3992            0 :                                     WRITE (unit_nr, *) "Computing preconditioner"
    3993              :                                  END IF
    3994              :                                  !CALL opt_k_create_preconditioner(prec,&
    3995              :                                  !        almo_scf_env%matrix_v_disc(ispin),&
    3996              :                                  !        almo_scf_env%matrix_ks_0deloc(ispin),&
    3997              :                                  !        almo_scf_env%matrix_x(ispin),&
    3998              :                                  !        tmp4_o_vr,&
    3999              :                                  !        almo_scf_env%matrix_s(1),&
    4000              :                                  !        grad,&
    4001              :                                  !        !almo_scf_env%matrix_v_disc_blk(ispin),&
    4002              :                                  !        vd_fixed,&
    4003              :                                  !        t_curr,&
    4004              :                                  !        k_vd_index_up,&
    4005              :                                  !        k_vr_index_down,&
    4006              :                                  !        tmp1_n_vr,&
    4007              :                                  !        spin_factor,&
    4008              :                                  !        almo_scf_env%eps_filter)
    4009              :                                  CALL opt_k_create_preconditioner_blk(almo_scf_env, &
    4010              :                                                                       almo_scf_env%matrix_v_disc(ispin), &
    4011              :                                                                       tmp4_o_vr, &
    4012              :                                                                       t_curr, &
    4013              :                                                                       ispin, &
    4014            0 :                                                                       spin_factor)
    4015              : 
    4016              :                               END IF
    4017              : 
    4018              :                               ! save the previous step
    4019            0 :                               CALL dbcsr_copy(prev_step, step)
    4020              : 
    4021              :                               ! compute the new step
    4022              :                               CALL opt_k_apply_preconditioner_blk(almo_scf_env, &
    4023            0 :                                                                   step, grad, ispin)
    4024              :                               !CALL dbcsr_hadamard_product(prec,grad,step)
    4025            0 :                               CALL dbcsr_scale(step, -1.0_dp)
    4026              : 
    4027              :                               ! check whether we need to reset conjugate directions
    4028            0 :                               reset_conjugator = .FALSE.
    4029              :                               ! first check if manual reset is active
    4030            0 :                               IF (iteration < MAX(almo_scf_env%opt_k_conj_iter_start, 1) .OR. &
    4031              :                                   MOD(iteration - almo_scf_env%opt_k_conj_iter_start, &
    4032              :                                       almo_scf_env%opt_k_conj_iter_freq) == 0) THEN
    4033              : 
    4034              :                                  reset_conjugator = .TRUE.
    4035              : 
    4036              :                               ELSE
    4037              : 
    4038              :                                  ! check for the errors in the cg algorithm
    4039              :                                  !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4040              :                                  !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4041              :                                  !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4042            0 :                                  CALL dbcsr_dot(grad, prev_minus_prec_grad, numer)
    4043            0 :                                  CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4044            0 :                                  conjugacy_error = numer/denom
    4045              : 
    4046            0 :                                  IF (conjugacy_error > MIN(0.5_dp, conjugacy_error_threshold)) THEN
    4047            0 :                                     reset_conjugator = .TRUE.
    4048            0 :                                     IF (unit_nr > 0) THEN
    4049            0 :                                        WRITE (unit_nr, *) "Lack of progress, conjugacy error is ", conjugacy_error
    4050              :                                     END IF
    4051              :                                  END IF
    4052              : 
    4053              :                                  ! check the gradient along the previous direction
    4054            0 :                                  IF ((iteration /= 0) .AND. (.NOT. reset_conjugator)) THEN
    4055            0 :                                     CALL dbcsr_dot(grad, prev_step, numer)
    4056            0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4057            0 :                                     line_search_error = numer/denom
    4058            0 :                                     IF (line_search_error > line_search_error_threshold) THEN
    4059            0 :                                        reset_conjugator = .TRUE.
    4060            0 :                                        IF (unit_nr > 0) THEN
    4061            0 :                                           WRITE (unit_nr, *) "Bad line search, line search error is ", line_search_error
    4062              :                                        END IF
    4063              :                                     END IF
    4064              :                                  END IF
    4065              : 
    4066              :                               END IF
    4067              : 
    4068              :                               ! compute the conjugation coefficient - beta
    4069            0 :                               IF (.NOT. reset_conjugator) THEN
    4070              : 
    4071            0 :                                  SELECT CASE (almo_scf_env%opt_k_conjugator)
    4072              :                                  CASE (cg_hestenes_stiefel)
    4073            0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4074            0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4075            0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4076            0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4077            0 :                                     beta = -1.0_dp*numer/denom
    4078              :                                  CASE (cg_fletcher_reeves)
    4079              :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4080              :                                     !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4081              :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4082              :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4083              :                                     !beta=numer/denom
    4084            0 :                                     CALL dbcsr_dot(grad, step, numer)
    4085            0 :                                     CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4086            0 :                                     beta = numer/denom
    4087              :                                  CASE (cg_polak_ribiere)
    4088              :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4089              :                                     !CALL dbcsr_dot(prev_grad,tmp_k_blk,denom)
    4090              :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4091              :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4092              :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4093            0 :                                     CALL dbcsr_dot(prev_grad, prev_minus_prec_grad, denom)
    4094            0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4095            0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4096            0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4097            0 :                                     beta = numer/denom
    4098              :                                  CASE (cg_fletcher)
    4099              :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4100              :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4101              :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4102              :                                     !beta=-1.0_dp*numer/denom
    4103            0 :                                     CALL dbcsr_dot(grad, step, numer)
    4104            0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4105            0 :                                     beta = numer/denom
    4106              :                                  CASE (cg_liu_storey)
    4107            0 :                                     CALL dbcsr_dot(prev_grad, prev_step, denom)
    4108              :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4109              :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4110              :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4111            0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4112            0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4113            0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4114            0 :                                     beta = numer/denom
    4115              :                                  CASE (cg_dai_yuan)
    4116              :                                     !CALL dbcsr_hadamard_product(prec,grad,tmp_k_blk)
    4117              :                                     !CALL dbcsr_dot(grad,tmp_k_blk,numer)
    4118              :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4119              :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4120              :                                     !beta=numer/denom
    4121            0 :                                     CALL dbcsr_dot(grad, step, numer)
    4122            0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4123            0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4124            0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4125            0 :                                     beta = -1.0_dp*numer/denom
    4126              :                                  CASE (cg_hager_zhang)
    4127              :                                     !CALL dbcsr_add(prev_grad,grad,-1.0_dp,1.0_dp)
    4128              :                                     !CALL dbcsr_dot(prev_grad,prev_step,denom)
    4129              :                                     !CALL dbcsr_hadamard_product(prec,prev_grad,tmp_k_blk)
    4130              :                                     !CALL dbcsr_dot(tmp_k_blk,prev_grad,numer)
    4131              :                                     !kappa=2.0_dp*numer/denom
    4132              :                                     !CALL dbcsr_dot(tmp_k_blk,grad,numer)
    4133              :                                     !tau=numer/denom
    4134              :                                     !CALL dbcsr_dot(prev_step,grad,numer)
    4135              :                                     !beta=tau-kappa*numer/denom
    4136            0 :                                     CALL dbcsr_copy(tmp_k_blk, grad)
    4137            0 :                                     CALL dbcsr_add(tmp_k_blk, prev_grad, 1.0_dp, -1.0_dp)
    4138            0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_step, denom)
    4139            0 :                                     CALL dbcsr_dot(tmp_k_blk, prev_minus_prec_grad, numer)
    4140            0 :                                     kappa = -2.0_dp*numer/denom
    4141            0 :                                     CALL dbcsr_dot(tmp_k_blk, step, numer)
    4142            0 :                                     tau = -1.0_dp*numer/denom
    4143            0 :                                     CALL dbcsr_dot(prev_step, grad, numer)
    4144            0 :                                     beta = tau - kappa*numer/denom
    4145              :                                  CASE (cg_zero)
    4146            0 :                                     beta = 0.0_dp
    4147              :                                  CASE DEFAULT
    4148            0 :                                     CPABORT("illegal conjugator")
    4149              :                                  END SELECT
    4150              : 
    4151            0 :                                  IF (beta < 0.0_dp) THEN
    4152            0 :                                     IF (unit_nr > 0) THEN
    4153            0 :                                        WRITE (unit_nr, *) "Beta is negative, ", beta
    4154              :                                     END IF
    4155              :                                     reset_conjugator = .TRUE.
    4156              :                                  END IF
    4157              : 
    4158              :                               END IF
    4159              : 
    4160            0 :                               IF (md_in_k_space) THEN
    4161              :                                  reset_conjugator = .TRUE.
    4162              :                               END IF
    4163              : 
    4164            0 :                               IF (reset_conjugator) THEN
    4165              : 
    4166            0 :                                  beta = 0.0_dp
    4167              :                                  !reset_step_size=.TRUE.
    4168              : 
    4169            0 :                                  IF (unit_nr > 0) THEN
    4170            0 :                                     WRITE (unit_nr, *) "(Re)-setting conjugator to zero"
    4171              :                                  END IF
    4172              : 
    4173              :                               END IF
    4174              : 
    4175              :                               ! save the preconditioned gradient
    4176            0 :                               CALL dbcsr_copy(prev_minus_prec_grad, step)
    4177              : 
    4178              :                               ! conjugate the step direction
    4179            0 :                               CALL dbcsr_add(step, prev_step, 1.0_dp, beta)
    4180              : 
    4181            0 :                               CALL timestop(handle7)
    4182              : 
    4183              :                               ! update the step direction
    4184              :                            ELSE ! step update
    4185            0 :                               conjugacy_error = 0.0_dp
    4186              :                            END IF
    4187              : 
    4188              :                            ! compute the gradient with respect to the step size in the curr direction
    4189            0 :                            IF (line_search) THEN
    4190            0 :                               CALL dbcsr_dot(grad, step, gfun1)
    4191            0 :                               line_search_error = gfun1/gfun0
    4192              :                            ELSE
    4193            0 :                               CALL dbcsr_dot(grad, step, gfun0)
    4194              :                            END IF
    4195              : 
    4196              :                            ! make a step - update k
    4197            0 :                            IF (line_search) THEN
    4198              : 
    4199              :                               ! check if the trial step provides enough numerical accuracy
    4200            0 :                               safety_multiplier = 1.0E+1_dp ! must be more than one
    4201              :                               num_threshold = MAX(EPSILON(1.0_dp), &
    4202            0 :                                                   safety_multiplier*(almo_scf_env%eps_filter**2)*almo_scf_env%ndomains)
    4203            0 :                               IF (ABS(fun1 - fun0 - gfun0*step_size) < num_threshold) THEN
    4204            0 :                                  IF (unit_nr > 0) THEN
    4205              :                                     WRITE (unit_nr, '(T3,A,1X,E17.7)') &
    4206            0 :                                        "Numerical accuracy is too low to observe non-linear behavior", &
    4207            0 :                                        ABS(fun1 - fun0 - gfun0*step_size)
    4208            0 :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Error computing ", &
    4209            0 :                                        ABS(gfun0), &
    4210            0 :                                        " is smaller than the threshold", num_threshold
    4211              :                                  END IF
    4212            0 :                                  CPABORT("")
    4213              :                               END IF
    4214            0 :                               IF (ABS(gfun0) < num_threshold) THEN
    4215            0 :                                  IF (unit_nr > 0) THEN
    4216            0 :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Linear gradient", &
    4217            0 :                                        ABS(gfun0), &
    4218            0 :                                        " is smaller than the threshold", num_threshold
    4219              :                                  END IF
    4220            0 :                                  CPABORT("")
    4221              :                               END IF
    4222              : 
    4223            0 :                               use_quadratic_approximation = .TRUE.
    4224            0 :                               use_cubic_approximation = .FALSE.
    4225              : 
    4226              :                               ! find the minimum assuming quadratic form
    4227              :                               ! use f0, f1, g0
    4228            0 :                               step_size_quadratic_approx = -(gfun0*step_size*step_size)/(2.0_dp*(fun1 - fun0 - gfun0*step_size))
    4229              :                               ! use f0, f1, g1
    4230            0 :                              step_size_quadratic_approx2 = -(fun1 - fun0 - step_size*gfun1/2.0_dp)/(gfun1 - (fun1 - fun0)/step_size)
    4231              : 
    4232            0 :                               IF ((step_size_quadratic_approx < 0.0_dp) .AND. &
    4233              :                                   (step_size_quadratic_approx2 < 0.0_dp)) THEN
    4234            0 :                                  IF (unit_nr > 0) THEN
    4235              :                                     WRITE (unit_nr, '(T3,A,1X,E17.7,1X,E17.7,1X,A)') &
    4236            0 :                                        "Quadratic approximation gives negative steps", &
    4237            0 :                                        step_size_quadratic_approx, step_size_quadratic_approx2, &
    4238            0 :                                        "trying cubic..."
    4239              :                                  END IF
    4240              :                                  use_cubic_approximation = .TRUE.
    4241              :                                  use_quadratic_approximation = .FALSE.
    4242              :                               ELSE
    4243            0 :                                  IF (step_size_quadratic_approx < 0.0_dp) THEN
    4244            0 :                                     step_size_quadratic_approx = step_size_quadratic_approx2
    4245              :                                  END IF
    4246            0 :                                  IF (step_size_quadratic_approx2 < 0.0_dp) THEN
    4247            0 :                                     step_size_quadratic_approx2 = step_size_quadratic_approx
    4248              :                                  END IF
    4249              :                               END IF
    4250              : 
    4251              :                               ! check accuracy of the quadratic approximation
    4252              :                               IF (use_quadratic_approximation) THEN
    4253              :                                  quadratic_approx_error = ABS(step_size_quadratic_approx - &
    4254            0 :                                                               step_size_quadratic_approx2)/step_size_quadratic_approx
    4255            0 :                                  IF (quadratic_approx_error > quadratic_approx_error_threshold) THEN
    4256            0 :                                     IF (unit_nr > 0) THEN
    4257            0 :                                        WRITE (unit_nr, '(T3,A,1X,E17.7,1X,E17.7,1X,A)') "Quadratic approximation is poor", &
    4258            0 :                                           step_size_quadratic_approx, step_size_quadratic_approx2, &
    4259            0 :                                           "Try cubic approximation"
    4260              :                                     END IF
    4261              :                                     use_cubic_approximation = .TRUE.
    4262              :                                     use_quadratic_approximation = .FALSE.
    4263              :                                  END IF
    4264              :                               END IF
    4265              : 
    4266              :                               ! check if numerics is fine enough to capture the cubic form
    4267            0 :                               IF (use_cubic_approximation) THEN
    4268              : 
    4269              :                                  ! if quadratic approximation is not accurate enough
    4270              :                                  ! try to find the minimum assuming cubic form
    4271              :                                  ! aa*x**3 + bb*x**2 + cc*x + dd = f(x)
    4272            0 :                                  bb = (-step_size*gfun1 + 3.0_dp*(fun1 - fun0) - 2.0_dp*step_size*gfun0)/(step_size*step_size)
    4273            0 :                                  aa = (gfun1 - 2.0_dp*step_size*bb - gfun0)/(3.0_dp*step_size*step_size)
    4274              : 
    4275            0 :                                  IF (ABS(gfun1 - 2.0_dp*step_size*bb - gfun0) < num_threshold) THEN
    4276            0 :                                     IF (unit_nr > 0) THEN
    4277              :                                        WRITE (unit_nr, '(T3,A,1X,E17.7)') &
    4278            0 :                                           "Numerical accuracy is too low to observe cubic behavior", &
    4279            0 :                                           ABS(gfun1 - 2.0_dp*step_size*bb - gfun0)
    4280              :                                     END IF
    4281              :                                     use_cubic_approximation = .FALSE.
    4282              :                                     use_quadratic_approximation = .TRUE.
    4283              :                                  END IF
    4284            0 :                                  IF (ABS(gfun1) < num_threshold) THEN
    4285            0 :                                     IF (unit_nr > 0) THEN
    4286            0 :                                        WRITE (unit_nr, '(T3,A,1X,E17.7,A,1X,E12.3)') "Linear gradient", &
    4287            0 :                                           ABS(gfun1), &
    4288            0 :                                           " is smaller than the threshold", num_threshold
    4289              :                                     END IF
    4290              :                                     use_cubic_approximation = .FALSE.
    4291              :                                     use_quadratic_approximation = .TRUE.
    4292              :                                  END IF
    4293              :                               END IF
    4294              : 
    4295              :                               ! find the step assuming cubic approximation
    4296            0 :                               IF (use_cubic_approximation) THEN
    4297              :                                  ! to obtain the minimum of the cubic function solve the quadratic equation
    4298              :                                  ! 0.0*x**3 + 3.0*aa*x**2 + 2.0*bb*x + cc = 0
    4299            0 :                                  CALL analytic_line_search(0.0_dp, 3.0_dp*aa, 2.0_dp*bb, gfun0, minima, nmins)
    4300            0 :                                  IF (nmins < 1) THEN
    4301            0 :                                     IF (unit_nr > 0) THEN
    4302              :                                        WRITE (unit_nr, '(T3,A)') &
    4303            0 :                                           "Cubic approximation gives zero soultions! Use quadratic approximation"
    4304              :                                     END IF
    4305              :                                     use_quadratic_approximation = .TRUE.
    4306              :                                     use_cubic_approximation = .TRUE.
    4307              :                                  ELSE
    4308            0 :                                     step_size = minima(1)
    4309            0 :                                     IF (nmins > 1) THEN
    4310            0 :                                        IF (unit_nr > 0) THEN
    4311              :                                           WRITE (unit_nr, '(T3,A)') &
    4312            0 :                                              "More than one solution found! Use quadratic approximation"
    4313              :                                        END IF
    4314              :                                        use_quadratic_approximation = .TRUE.
    4315            0 :                                        use_cubic_approximation = .TRUE.
    4316              :                                     END IF
    4317              :                                  END IF
    4318              :                               END IF
    4319              : 
    4320            0 :                               IF (use_quadratic_approximation) THEN ! use quadratic approximation
    4321            0 :                                  IF (unit_nr > 0) THEN
    4322            0 :                                     WRITE (unit_nr, '(T3,A)') "Use quadratic approximation"
    4323              :                                  END IF
    4324            0 :                                  step_size = (step_size_quadratic_approx + step_size_quadratic_approx2)*0.5_dp
    4325              :                               END IF
    4326              : 
    4327              :                               ! one more check on the step size
    4328            0 :                               IF (step_size < 0.0_dp) THEN
    4329            0 :                                  CPABORT("Negative step proposed")
    4330              :                               END IF
    4331              : 
    4332              :                               CALL dbcsr_copy(almo_scf_env%matrix_k_blk(ispin), &
    4333            0 :                                               matrix_k_central)
    4334              :                               CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4335            0 :                                              step, 1.0_dp, step_size)
    4336              :                               CALL dbcsr_copy(matrix_k_central, &
    4337            0 :                                               almo_scf_env%matrix_k_blk(ispin))
    4338            0 :                               line_search = .FALSE.
    4339              : 
    4340              :                            ELSE
    4341              : 
    4342            0 :                               IF (md_in_k_space) THEN
    4343              : 
    4344              :                                  ! update velocities v(i) = v(i-1) + 0.5*dT*(a(i-1) + a(i))
    4345            0 :                                  IF (iteration /= 0) THEN
    4346              :                                     CALL dbcsr_add(velocity, &
    4347            0 :                                                    step, 1.0_dp, 0.5_dp*time_step)
    4348              :                                     CALL dbcsr_add(velocity, &
    4349            0 :                                                    prev_step, 1.0_dp, 0.5_dp*time_step)
    4350              :                                  END IF
    4351            0 :                                  kin_energy = dbcsr_frobenius_norm(velocity)
    4352            0 :                                  kin_energy = 0.5_dp*kin_energy*kin_energy
    4353              : 
    4354              :                                  ! update positions k(i) = k(i-1) + dT*v(i-1) + 0.5*dT*dT*a(i-1)
    4355              :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4356            0 :                                                 velocity, 1.0_dp, time_step)
    4357              :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4358            0 :                                                 step, 1.0_dp, 0.5_dp*time_step*time_step)
    4359              : 
    4360              :                               ELSE
    4361              : 
    4362            0 :                                  IF (reset_step_size) THEN
    4363            0 :                                     step_size = almo_scf_env%opt_k_trial_step_size
    4364            0 :                                     reset_step_size = .FALSE.
    4365              :                                  ELSE
    4366            0 :                                     step_size = step_size*almo_scf_env%opt_k_trial_step_size_multiplier
    4367              :                                  END IF
    4368              :                                  CALL dbcsr_copy(almo_scf_env%matrix_k_blk(ispin), &
    4369            0 :                                                  matrix_k_central)
    4370              :                                  CALL dbcsr_add(almo_scf_env%matrix_k_blk(ispin), &
    4371            0 :                                                 step, 1.0_dp, step_size)
    4372            0 :                                  line_search = .TRUE.
    4373              :                               END IF
    4374              : 
    4375              :                            END IF
    4376              : 
    4377              :                         END IF ! .NOT.prepare_to_exit
    4378              : 
    4379              :                         ! print the status of the optimization
    4380            0 :                         t2a = m_walltime()
    4381            0 :                         IF (unit_nr > 0) THEN
    4382            0 :                            IF (md_in_k_space) THEN
    4383              :                               WRITE (unit_nr, '(T6,A,1X,I5,1X,E12.3,E16.7,F15.9,F15.9,F15.9,E12.3,F15.9,F15.9,F8.3)') &
    4384            0 :                                  "K iter CG", iteration, time_step, time_step*iteration, &
    4385            0 :                                  energy_correction(ispin), obj_function, delta_obj_function, grad_norm, &
    4386            0 :                                  kin_energy, kin_energy + obj_function, beta
    4387              :                            ELSE
    4388            0 :                               IF (line_search .OR. prepare_to_exit) THEN
    4389              :                                  WRITE (unit_nr, '(T6,A,1X,I3,1X,E12.3,F16.10,F16.10,E12.3,E12.3,E12.3,F8.3,F8.3,F10.3)') &
    4390            0 :                                     "K iter CG", iteration, step_size, &
    4391            0 :                                     energy_correction(ispin), delta_obj_function, grad_norm, &
    4392            0 :                                     gfun0, line_search_error, beta, conjugacy_error, t2a - t1a
    4393              :                                  !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    4394              :                               ELSE
    4395              :                                  WRITE (unit_nr, '(T6,A,1X,I3,1X,E12.3,F16.10,F16.10,E12.3,E12.3,E12.3,F8.3,F8.3,F10.3)') &
    4396            0 :                                     "K iter LS", iteration, step_size, &
    4397            0 :                                     energy_correction(ispin), delta_obj_function, grad_norm, &
    4398            0 :                                     gfun1, line_search_error, beta, conjugacy_error, t2a - t1a
    4399              :                                  !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    4400              :                               END IF
    4401              :                            END IF
    4402            0 :                            CALL m_flush(unit_nr)
    4403              :                         END IF
    4404            0 :                         t1a = m_walltime()
    4405              : 
    4406              :                      ELSE ! opt_k_max_iter .eq. 0
    4407              :                         prepare_to_exit = .TRUE.
    4408              :                      END IF ! opt_k_max_iter .ne. 0
    4409              : 
    4410            0 :                      IF (.NOT. line_search) iteration = iteration + 1
    4411              : 
    4412            0 :                      IF (prepare_to_exit) EXIT
    4413              : 
    4414              :                   END DO ! end iterations on K
    4415              : 
    4416            0 :                   IF (converged .OR. (outer_opt_k_iteration >= outer_opt_k_max_iter)) THEN
    4417            0 :                      outer_opt_k_prepare_to_exit = .TRUE.
    4418              :                   END IF
    4419              : 
    4420            0 :                   IF (almo_scf_env%deloc_truncate_virt /= virt_full) THEN
    4421              : 
    4422            0 :                      IF (unit_nr > 0) THEN
    4423            0 :                         WRITE (unit_nr, *) "Updating ALMO virtuals"
    4424              :                      END IF
    4425              : 
    4426            0 :                      CALL timeset('k_opt_v0_update', handle8)
    4427              : 
    4428              :                      ! update retained ALMO virtuals to restart the cg iterations
    4429              :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4430              :                                          almo_scf_env%matrix_v_disc_blk(ispin), &
    4431              :                                          almo_scf_env%matrix_k_blk(ispin), &
    4432              :                                          0.0_dp, vr_fixed, &
    4433            0 :                                          filter_eps=almo_scf_env%eps_filter)
    4434              :                      CALL dbcsr_add(vr_fixed, almo_scf_env%matrix_v_blk(ispin), &
    4435            0 :                                     +1.0_dp, +1.0_dp)
    4436              : 
    4437              :                      ! update discarded ALMO virtuals to restart the cg iterations
    4438              :                      CALL dbcsr_multiply("N", "T", 1.0_dp, &
    4439              :                                          almo_scf_env%matrix_v_blk(ispin), &
    4440              :                                          almo_scf_env%matrix_k_blk(ispin), &
    4441              :                                          0.0_dp, vd_fixed, &
    4442            0 :                                          filter_eps=almo_scf_env%eps_filter)
    4443              :                      CALL dbcsr_add(vd_fixed, almo_scf_env%matrix_v_disc_blk(ispin), &
    4444            0 :                                     -1.0_dp, +1.0_dp)
    4445              : 
    4446              :                      ! orthogonalize new orbitals on fragments
    4447              :                      CALL get_overlap(bra=vr_fixed, &
    4448              :                                       ket=vr_fixed, &
    4449              :                                       overlap=k_vr_index_down, &
    4450              :                                       metric=almo_scf_env%matrix_s_blk(1), &
    4451              :                                       retain_overlap_sparsity=.FALSE., &
    4452            0 :                                       eps_filter=almo_scf_env%eps_filter)
    4453              :                      CALL dbcsr_create(vr_index_sqrt_inv, template=k_vr_index_down, &
    4454            0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4455              :                      CALL dbcsr_create(vr_index_sqrt, template=k_vr_index_down, &
    4456            0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4457              :                      CALL matrix_sqrt_Newton_Schulz(vr_index_sqrt, &
    4458              :                                                     vr_index_sqrt_inv, &
    4459              :                                                     k_vr_index_down, &
    4460              :                                                     threshold=almo_scf_env%eps_filter, &
    4461              :                                                     order=almo_scf_env%order_lanczos, &
    4462              :                                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4463            0 :                                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4464              :                      IF (safe_mode) THEN
    4465              :                         CALL dbcsr_create(matrix_tmp1, template=k_vr_index_down, &
    4466              :                                           matrix_type=dbcsr_type_no_symmetry)
    4467              :                         CALL dbcsr_create(matrix_tmp2, template=k_vr_index_down, &
    4468              :                                           matrix_type=dbcsr_type_no_symmetry)
    4469              : 
    4470              :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vr_index_sqrt_inv, &
    4471              :                                             k_vr_index_down, &
    4472              :                                             0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4473              :                         CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    4474              :                                             vr_index_sqrt_inv, &
    4475              :                                             0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4476              : 
    4477              :                         frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4478              :                         CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4479              :                         frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4480              :                         IF (unit_nr > 0) THEN
    4481              :                            WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    4482              :                               frob_matrix/frob_matrix_base
    4483              :                         END IF
    4484              : 
    4485              :                         CALL dbcsr_release(matrix_tmp1)
    4486              :                         CALL dbcsr_release(matrix_tmp2)
    4487              :                      END IF
    4488              :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4489              :                                          vr_fixed, &
    4490              :                                          vr_index_sqrt_inv, &
    4491              :                                          0.0_dp, almo_scf_env%matrix_v_blk(ispin), &
    4492            0 :                                          filter_eps=almo_scf_env%eps_filter)
    4493              : 
    4494              :                      CALL get_overlap(bra=vd_fixed, &
    4495              :                                       ket=vd_fixed, &
    4496              :                                       overlap=k_vd_index_down, &
    4497              :                                       metric=almo_scf_env%matrix_s_blk(1), &
    4498              :                                       retain_overlap_sparsity=.FALSE., &
    4499            0 :                                       eps_filter=almo_scf_env%eps_filter)
    4500              :                      CALL dbcsr_create(vd_index_sqrt_inv, template=k_vd_index_down, &
    4501            0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4502              :                      CALL dbcsr_create(vd_index_sqrt, template=k_vd_index_down, &
    4503            0 :                                        matrix_type=dbcsr_type_no_symmetry)
    4504              :                      CALL matrix_sqrt_Newton_Schulz(vd_index_sqrt, &
    4505              :                                                     vd_index_sqrt_inv, &
    4506              :                                                     k_vd_index_down, &
    4507              :                                                     threshold=almo_scf_env%eps_filter, &
    4508              :                                                     order=almo_scf_env%order_lanczos, &
    4509              :                                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4510            0 :                                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4511              :                      IF (safe_mode) THEN
    4512              :                         CALL dbcsr_create(matrix_tmp1, template=k_vd_index_down, &
    4513              :                                           matrix_type=dbcsr_type_no_symmetry)
    4514              :                         CALL dbcsr_create(matrix_tmp2, template=k_vd_index_down, &
    4515              :                                           matrix_type=dbcsr_type_no_symmetry)
    4516              : 
    4517              :                         CALL dbcsr_multiply("N", "N", 1.0_dp, vd_index_sqrt_inv, &
    4518              :                                             k_vd_index_down, &
    4519              :                                             0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4520              :                         CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, &
    4521              :                                             vd_index_sqrt_inv, &
    4522              :                                             0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4523              : 
    4524              :                         frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4525              :                         CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4526              :                         frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4527              :                         IF (unit_nr > 0) THEN
    4528              :                            WRITE (unit_nr, *) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)", &
    4529              :                               frob_matrix/frob_matrix_base
    4530              :                         END IF
    4531              : 
    4532              :                         CALL dbcsr_release(matrix_tmp1)
    4533              :                         CALL dbcsr_release(matrix_tmp2)
    4534              :                      END IF
    4535              :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    4536              :                                          vd_fixed, &
    4537              :                                          vd_index_sqrt_inv, &
    4538              :                                          0.0_dp, almo_scf_env%matrix_v_disc_blk(ispin), &
    4539            0 :                                          filter_eps=almo_scf_env%eps_filter)
    4540              : 
    4541            0 :                      CALL dbcsr_release(vr_index_sqrt_inv)
    4542            0 :                      CALL dbcsr_release(vr_index_sqrt)
    4543            0 :                      CALL dbcsr_release(vd_index_sqrt_inv)
    4544            0 :                      CALL dbcsr_release(vd_index_sqrt)
    4545              : 
    4546            0 :                      CALL timestop(handle8)
    4547              : 
    4548              :                   END IF ! ne.virt_full
    4549              : 
    4550              :                   ! RZK-warning released outside the outer loop
    4551            0 :                   CALL dbcsr_release(sigma_vv_sqrt)
    4552            0 :                   CALL dbcsr_release(sigma_vv_sqrt_inv)
    4553            0 :                   IF (almo_scf_env%deloc_truncate_virt /= virt_full) THEN
    4554            0 :                      CALL dbcsr_release(k_vr_index_down)
    4555            0 :                      CALL dbcsr_release(k_vd_index_down)
    4556              :                      !CALL dbcsr_release(k_vd_index_up)
    4557            0 :                      CALL dbcsr_release(matrix_k_central)
    4558            0 :                      CALL dbcsr_release(vr_fixed)
    4559            0 :                      CALL dbcsr_release(vd_fixed)
    4560            0 :                      CALL dbcsr_release(grad)
    4561            0 :                      CALL dbcsr_release(prec)
    4562            0 :                      CALL dbcsr_release(prev_grad)
    4563            0 :                      CALL dbcsr_release(tmp3_vd_vr)
    4564            0 :                      CALL dbcsr_release(tmp1_n_vr)
    4565            0 :                      CALL dbcsr_release(tmp_k_blk)
    4566            0 :                      CALL dbcsr_release(t_curr)
    4567            0 :                      CALL dbcsr_release(sigma_oo_curr)
    4568            0 :                      CALL dbcsr_release(sigma_oo_curr_inv)
    4569            0 :                      CALL dbcsr_release(step)
    4570            0 :                      CALL dbcsr_release(tmp2_n_o)
    4571            0 :                      CALL dbcsr_release(tmp4_o_vr)
    4572            0 :                      CALL dbcsr_release(prev_step)
    4573            0 :                      CALL dbcsr_release(prev_minus_prec_grad)
    4574            0 :                      IF (md_in_k_space) THEN
    4575            0 :                         CALL dbcsr_release(velocity)
    4576              :                      END IF
    4577              : 
    4578              :                   END IF
    4579              : 
    4580            0 :                   outer_opt_k_iteration = outer_opt_k_iteration + 1
    4581            0 :                   IF (outer_opt_k_prepare_to_exit) EXIT
    4582              : 
    4583              :                END DO ! outer loop for k
    4584              : 
    4585              :             END DO ! ispin
    4586              : 
    4587              :             ! RZK-warning update mo orbitals
    4588              : 
    4589              :          ELSE ! virtual orbitals might not be available use projected AOs
    4590              : 
    4591              :             ! compute sqrt(S) and inv(sqrt(S))
    4592              :             ! RZK-warning - remove this sqrt(S) and inv(sqrt(S))
    4593              :             ! ideally ALMO scf should use sigma and sigma_inv in
    4594              :             ! the tensor_up_down representation
    4595            0 :             IF (.NOT. almo_scf_env%s_sqrt_done) THEN
    4596              : 
    4597            0 :                IF (unit_nr > 0) THEN
    4598            0 :                   WRITE (unit_nr, *) "sqrt and inv(sqrt) of AO overlap matrix"
    4599              :                END IF
    4600              :                CALL dbcsr_create(almo_scf_env%matrix_s_sqrt(1), &
    4601              :                                  template=almo_scf_env%matrix_s(1), &
    4602            0 :                                  matrix_type=dbcsr_type_no_symmetry)
    4603              :                CALL dbcsr_create(almo_scf_env%matrix_s_sqrt_inv(1), &
    4604              :                                  template=almo_scf_env%matrix_s(1), &
    4605            0 :                                  matrix_type=dbcsr_type_no_symmetry)
    4606              : 
    4607              :                CALL matrix_sqrt_Newton_Schulz(almo_scf_env%matrix_s_sqrt(1), &
    4608              :                                               almo_scf_env%matrix_s_sqrt_inv(1), &
    4609              :                                               almo_scf_env%matrix_s(1), &
    4610              :                                               threshold=almo_scf_env%eps_filter, &
    4611              :                                               order=almo_scf_env%order_lanczos, &
    4612              :                                               eps_lanczos=almo_scf_env%eps_lanczos, &
    4613            0 :                                               max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    4614              : 
    4615              :                IF (safe_mode) THEN
    4616              :                   CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_s(1), &
    4617              :                                     matrix_type=dbcsr_type_no_symmetry)
    4618              :                   CALL dbcsr_create(matrix_tmp2, template=almo_scf_env%matrix_s(1), &
    4619              :                                     matrix_type=dbcsr_type_no_symmetry)
    4620              : 
    4621              :                   CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_sqrt_inv(1), &
    4622              :                                       almo_scf_env%matrix_s(1), &
    4623              :                                       0.0_dp, matrix_tmp1, filter_eps=almo_scf_env%eps_filter)
    4624              :                   CALL dbcsr_multiply("N", "N", 1.0_dp, matrix_tmp1, almo_scf_env%matrix_s_sqrt_inv(1), &
    4625              :                                       0.0_dp, matrix_tmp2, filter_eps=almo_scf_env%eps_filter)
    4626              : 
    4627              :                   frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp2)
    4628              :                   CALL dbcsr_add_on_diag(matrix_tmp2, -1.0_dp)
    4629              :                   frob_matrix = dbcsr_frobenius_norm(matrix_tmp2)
    4630              :                   IF (unit_nr > 0) THEN
    4631              :                      WRITE (unit_nr, *) "Error for (inv(sqrt(S))*S*inv(sqrt(S))-I)", frob_matrix/frob_matrix_base
    4632              :                   END IF
    4633              : 
    4634              :                   CALL dbcsr_release(matrix_tmp1)
    4635              :                   CALL dbcsr_release(matrix_tmp2)
    4636              :                END IF
    4637              : 
    4638            0 :                almo_scf_env%s_sqrt_done = .TRUE.
    4639              : 
    4640              :             END IF
    4641              : 
    4642            0 :             DO ispin = 1, nspin
    4643              : 
    4644            0 :                CALL ct_step_env_init(ct_step_env)
    4645              :                CALL ct_step_env_set(ct_step_env, &
    4646              :                                     para_env=almo_scf_env%para_env, &
    4647              :                                     blacs_env=almo_scf_env%blacs_env, &
    4648              :                                     use_occ_orbs=.TRUE., &
    4649              :                                     use_virt_orbs=almo_scf_env%deloc_cayley_use_virt_orbs, &
    4650              :                                     occ_orbs_orthogonal=.FALSE., &
    4651              :                                     virt_orbs_orthogonal=almo_scf_env%orthogonal_basis, &
    4652              :                                     tensor_type=almo_scf_env%deloc_cayley_tensor_type, &
    4653              :                                     neglect_quadratic_term=almo_scf_env%deloc_cayley_linear, &
    4654              :                                     calculate_energy_corr=.TRUE., &
    4655              :                                     update_p=.TRUE., &
    4656              :                                     update_q=.FALSE., &
    4657              :                                     pp_preconditioner_full=almo_scf_env%deloc_cayley_occ_precond, &
    4658              :                                     qq_preconditioner_full=almo_scf_env%deloc_cayley_vir_precond, &
    4659              :                                     eps_convergence=almo_scf_env%deloc_cayley_eps_convergence, &
    4660              :                                     eps_filter=almo_scf_env%eps_filter, &
    4661              :                                     !nspins=almo_scf_env%nspins,&
    4662              :                                     q_index_up=almo_scf_env%matrix_s_sqrt_inv(1), &
    4663              :                                     q_index_down=almo_scf_env%matrix_s_sqrt(1), &
    4664              :                                     p_index_up=almo_scf_env%matrix_sigma_sqrt_inv(ispin), &
    4665              :                                     p_index_down=almo_scf_env%matrix_sigma_sqrt(ispin), &
    4666              :                                     matrix_ks=almo_scf_env%matrix_ks_0deloc(ispin), &
    4667              :                                     matrix_p=almo_scf_env%matrix_p(ispin), &
    4668              :                                     matrix_qp_template=almo_scf_env%matrix_t(ispin), &
    4669              :                                     matrix_pq_template=almo_scf_env%matrix_t_tr(ispin), &
    4670              :                                     matrix_t=almo_scf_env%matrix_t(ispin), &
    4671              :                                     conjugator=almo_scf_env%deloc_cayley_conjugator, &
    4672            0 :                                     max_iter=almo_scf_env%deloc_cayley_max_iter)
    4673              : 
    4674              :                ! perform calculations
    4675            0 :                CALL ct_step_execute(ct_step_env)
    4676              : 
    4677              :                ! for now we do not need the new set of orbitals
    4678              :                ! just get the energy correction
    4679              :                CALL ct_step_env_get(ct_step_env, &
    4680            0 :                                     energy_correction=energy_correction(ispin))
    4681              :                !copy_da_energy_matrix=matrix_eda(ispin),&
    4682              :                !copy_da_charge_matrix=matrix_cta(ispin),&
    4683              : 
    4684            0 :                CALL ct_step_env_clean(ct_step_env)
    4685              : 
    4686              :             END DO
    4687              : 
    4688            0 :             energy_correction(1) = energy_correction(1)*spin_factor
    4689              : 
    4690              :          END IF
    4691              : 
    4692              :          ! print the energy correction and exit
    4693            0 :          DO ispin = 1, nspin
    4694              : 
    4695            0 :             IF (unit_nr > 0) THEN
    4696            0 :                WRITE (unit_nr, *)
    4697            0 :                WRITE (unit_nr, '(T2,A,I6,F20.9)') "ECORR", ispin, &
    4698            0 :                   energy_correction(ispin)
    4699            0 :                WRITE (unit_nr, *)
    4700              :             END IF
    4701            0 :             energy_correction_final = energy_correction_final + energy_correction(ispin)
    4702              : 
    4703              :             !!! print out the results of decomposition analysis
    4704              :             !!IF (unit_nr>0) THEN
    4705              :             !!   WRITE(unit_nr,*)
    4706              :             !!   WRITE(unit_nr,'(T2,A)') "ENERGY DECOMPOSITION"
    4707              :             !!ENDIF
    4708              :             !!CALL print_block_sum(eda_matrix(ispin), unit_nr=6)
    4709              :             !!IF (unit_nr>0) THEN
    4710              :             !!   WRITE(unit_nr,*)
    4711              :             !!   WRITE(unit_nr,'(T2,A)') "CHARGE DECOMPOSITION"
    4712              :             !!ENDIF
    4713              :             !!CALL print_block_sum(cta_matrix(ispin), unit_nr=6)
    4714              : 
    4715              :             ! obtain density matrix from updated MOs
    4716              :             ! RZK-later sigma and sigma_inv are lost here
    4717              :             CALL almo_scf_t_to_proj(t=almo_scf_env%matrix_t(ispin), &
    4718              :                                     p=almo_scf_env%matrix_p(ispin), &
    4719              :                                     eps_filter=almo_scf_env%eps_filter, &
    4720              :                                     orthog_orbs=.FALSE., &
    4721              :                                     nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    4722              :                                     s=almo_scf_env%matrix_s(1), &
    4723              :                                     sigma=almo_scf_env%matrix_sigma(ispin), &
    4724              :                                     sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
    4725              :                                     !use_guess=use_guess, &
    4726              :                                     algorithm=almo_scf_env%sigma_inv_algorithm, &
    4727              :                                     inverse_accelerator=almo_scf_env%order_lanczos, &
    4728              :                                     inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
    4729              :                                     eps_lanczos=almo_scf_env%eps_lanczos, &
    4730              :                                     max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    4731              :                                     para_env=almo_scf_env%para_env, &
    4732            0 :                                     blacs_env=almo_scf_env%blacs_env)
    4733              : 
    4734            0 :             IF (almo_scf_env%nspins == 1) &
    4735              :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
    4736            0 :                                 spin_factor)
    4737              : 
    4738              :          END DO
    4739              : 
    4740              :       CASE (dm_ls_step)
    4741              : 
    4742              :          ! compute the inverse of S
    4743            0 :          IF (.NOT. almo_scf_env%s_inv_done) THEN
    4744            0 :             IF (unit_nr > 0) THEN
    4745            0 :                WRITE (unit_nr, *) "Inverting AO overlap matrix"
    4746              :             END IF
    4747              :             CALL dbcsr_create(almo_scf_env%matrix_s_inv(1), &
    4748              :                               template=almo_scf_env%matrix_s(1), &
    4749            0 :                               matrix_type=dbcsr_type_no_symmetry)
    4750            0 :             IF (.NOT. almo_scf_env%s_sqrt_done) THEN
    4751              :                CALL invert_Hotelling(almo_scf_env%matrix_s_inv(1), &
    4752              :                                      almo_scf_env%matrix_s(1), &
    4753            0 :                                      threshold=almo_scf_env%eps_filter)
    4754              :             ELSE
    4755              :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_sqrt_inv(1), &
    4756              :                                    almo_scf_env%matrix_s_sqrt_inv(1), &
    4757              :                                    0.0_dp, almo_scf_env%matrix_s_inv(1), &
    4758            0 :                                    filter_eps=almo_scf_env%eps_filter)
    4759              :             END IF
    4760              : 
    4761              :             IF (safe_mode) THEN
    4762              :                CALL dbcsr_create(matrix_tmp1, template=almo_scf_env%matrix_s(1), &
    4763              :                                  matrix_type=dbcsr_type_no_symmetry)
    4764              :                CALL dbcsr_multiply("N", "N", 1.0_dp, almo_scf_env%matrix_s_inv(1), &
    4765              :                                    almo_scf_env%matrix_s(1), &
    4766              :                                    0.0_dp, matrix_tmp1, &
    4767              :                                    filter_eps=almo_scf_env%eps_filter)
    4768              :                frob_matrix_base = dbcsr_frobenius_norm(matrix_tmp1)
    4769              :                CALL dbcsr_add_on_diag(matrix_tmp1, -1.0_dp)
    4770              :                frob_matrix = dbcsr_frobenius_norm(matrix_tmp1)
    4771              :                IF (unit_nr > 0) THEN
    4772              :                   WRITE (unit_nr, *) "Error for (inv(S)*S-I)", &
    4773              :                      frob_matrix/frob_matrix_base
    4774              :                END IF
    4775              :                CALL dbcsr_release(matrix_tmp1)
    4776              :             END IF
    4777              : 
    4778            0 :             almo_scf_env%s_inv_done = .TRUE.
    4779              : 
    4780              :          END IF
    4781              : 
    4782            0 :          DO ispin = 1, nspin
    4783              :             ! RZK-warning the preconditioner is very important
    4784              :             !       IF (.FALSE.) THEN
    4785              :             !           CALL apply_matrix_preconditioner(almo_scf_env%matrix_ks(ispin),&
    4786              :             !                   "forward",almo_scf_env%matrix_s_blk_sqrt(1),&
    4787              :             !                   almo_scf_env%matrix_s_blk_sqrt_inv(1))
    4788              :             !       ENDIF
    4789              :             !CALL dbcsr_filter(almo_scf_env%matrix_ks(ispin),&
    4790              :             !         almo_scf_env%eps_filter)
    4791              :          END DO
    4792              : 
    4793            0 :          ALLOCATE (matrix_p_almo_scf_converged(nspin))
    4794            0 :          DO ispin = 1, nspin
    4795              :             CALL dbcsr_create(matrix_p_almo_scf_converged(ispin), &
    4796            0 :                               template=almo_scf_env%matrix_p(ispin))
    4797              :             CALL dbcsr_copy(matrix_p_almo_scf_converged(ispin), &
    4798            0 :                             almo_scf_env%matrix_p(ispin))
    4799              :          END DO
    4800              : 
    4801              :          ! update the density matrix
    4802            0 :          DO ispin = 1, nspin
    4803              : 
    4804            0 :             nelectron_spin_real(1) = almo_scf_env%nelectrons_spin(ispin)
    4805            0 :             IF (almo_scf_env%nspins == 1) &
    4806            0 :                nelectron_spin_real(1) = nelectron_spin_real(1)/2
    4807              : 
    4808            0 :             local_mu(1) = SUM(almo_scf_env%mu_of_domain(:, ispin))/almo_scf_env%ndomains
    4809            0 :             fake(1) = 123523
    4810              : 
    4811              :             ! RZK UPDATE! the update algorithm is removed because
    4812              :             ! RZK UPDATE! it requires updating core LS_SCF routines
    4813              :             ! RZK UPDATE! (the code exists in the CVS version)
    4814            0 :             CPABORT("CVS only: density_matrix_sign has not been updated in SVN")
    4815              :             ! RZK UPDATE!CALL density_matrix_sign(almo_scf_env%matrix_p(ispin),&
    4816              :             ! RZK UPDATE!                     local_mu,&
    4817              :             ! RZK UPDATE!                     almo_scf_env%fixed_mu,&
    4818              :             ! RZK UPDATE!                     almo_scf_env%matrix_ks_0deloc(ispin),&
    4819              :             ! RZK UPDATE!                     almo_scf_env%matrix_s(1), &
    4820              :             ! RZK UPDATE!                     almo_scf_env%matrix_s_inv(1), &
    4821              :             ! RZK UPDATE!                     nelectron_spin_real,&
    4822              :             ! RZK UPDATE!                     almo_scf_env%eps_filter,&
    4823              :             ! RZK UPDATE!                     fake)
    4824              :             ! RZK UPDATE!
    4825            0 :             almo_scf_env%mu = local_mu(1)
    4826              : 
    4827              :             !IF (almo_scf_env%has_s_preconditioner) THEN
    4828              :             !    CALL apply_matrix_preconditioner(&
    4829              :             !             almo_scf_env%matrix_p_blk(ispin),&
    4830              :             !             "forward",almo_scf_env%matrix_s_blk_sqrt(1),&
    4831              :             !             almo_scf_env%matrix_s_blk_sqrt_inv(1))
    4832              :             !ENDIF
    4833              :             !CALL dbcsr_filter(almo_scf_env%matrix_p(ispin),&
    4834              :             !        almo_scf_env%eps_filter)
    4835              : 
    4836            0 :             IF (almo_scf_env%nspins == 1) &
    4837              :                CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
    4838            0 :                                 spin_factor)
    4839              : 
    4840              :             !CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin),&
    4841              :             !  almo_scf_env%matrix_p(ispin),&
    4842              :             !  energy_correction(ispin))
    4843              :             !IF (unit_nr>0) THEN
    4844              :             !   WRITE(unit_nr,*)
    4845              :             !   WRITE(unit_nr,'(T2,A,I6,F20.9)') "EFAKE",ispin,&
    4846              :             !           energy_correction(ispin)
    4847              :             !   WRITE(unit_nr,*)
    4848              :             !ENDIF
    4849              :             CALL dbcsr_add(matrix_p_almo_scf_converged(ispin), &
    4850            0 :                            almo_scf_env%matrix_p(ispin), -1.0_dp, 1.0_dp)
    4851              :             CALL dbcsr_dot(almo_scf_env%matrix_ks_0deloc(ispin), &
    4852              :                            matrix_p_almo_scf_converged(ispin), &
    4853            0 :                            energy_correction(ispin))
    4854              : 
    4855            0 :             energy_correction_final = energy_correction_final + energy_correction(ispin)
    4856              : 
    4857            0 :             IF (unit_nr > 0) THEN
    4858            0 :                WRITE (unit_nr, *)
    4859            0 :                WRITE (unit_nr, '(T2,A,I6,F20.9)') "ECORR", ispin, &
    4860            0 :                   energy_correction(ispin)
    4861            0 :                WRITE (unit_nr, *)
    4862              :             END IF
    4863              : 
    4864              :          END DO
    4865              : 
    4866            0 :          DO ispin = 1, nspin
    4867            0 :             CALL dbcsr_release(matrix_p_almo_scf_converged(ispin))
    4868              :          END DO
    4869            0 :          DEALLOCATE (matrix_p_almo_scf_converged)
    4870              : 
    4871              :       END SELECT ! algorithm selection
    4872              : 
    4873            0 :       t2 = m_walltime()
    4874              : 
    4875            0 :       IF (unit_nr > 0) THEN
    4876            0 :          WRITE (unit_nr, *)
    4877            0 :          WRITE (unit_nr, '(T2,A,F18.9,F18.9,F18.9,F12.6)') "ETOT", &
    4878            0 :             almo_scf_env%almo_scf_energy, &
    4879            0 :             energy_correction_final, &
    4880            0 :             almo_scf_env%almo_scf_energy + energy_correction_final, &
    4881            0 :             t2 - t1
    4882            0 :          WRITE (unit_nr, *)
    4883              :       END IF
    4884              : 
    4885            0 :       CALL timestop(handle)
    4886              : 
    4887            0 :    END SUBROUTINE harris_foulkes_correction
    4888              : 
    4889              : ! **************************************************************************************************
    4890              : !> \brief triu of a dbcsr matrix
    4891              : !> \param matrix ...
    4892              : ! **************************************************************************************************
    4893            0 :    SUBROUTINE make_triu(matrix)
    4894              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
    4895              : 
    4896              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'make_triu'
    4897              : 
    4898              :       INTEGER                                            :: col, handle, i, j, row
    4899            0 :       REAL(dp), DIMENSION(:, :), POINTER                 :: block
    4900              :       TYPE(dbcsr_iterator_type)                          :: iter
    4901              : 
    4902            0 :       CALL timeset(routineN, handle)
    4903              : 
    4904            0 :       CALL dbcsr_iterator_start(iter, matrix)
    4905            0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    4906            0 :          CALL dbcsr_iterator_next_block(iter, row, col, block)
    4907            0 :          IF (row > col) block(:, :) = 0.0_dp
    4908            0 :          IF (row == col) THEN
    4909            0 :             DO j = 1, SIZE(block, 2)
    4910            0 :             DO i = j + 1, SIZE(block, 1)
    4911            0 :                block(i, j) = 0.0_dp
    4912              :             END DO
    4913              :             END DO
    4914              :          END IF
    4915              :       END DO
    4916            0 :       CALL dbcsr_iterator_stop(iter)
    4917            0 :       CALL dbcsr_filter(matrix, eps=0.0_dp)
    4918              : 
    4919            0 :       CALL timestop(handle)
    4920            0 :    END SUBROUTINE make_triu
    4921              : 
    4922              : ! **************************************************************************************************
    4923              : !> \brief Computes a diagonal preconditioner for the cg optimization of k matrix
    4924              : !> \param prec ...
    4925              : !> \param vd_prop ...
    4926              : !> \param f ...
    4927              : !> \param x ...
    4928              : !> \param oo_inv_x_tr ...
    4929              : !> \param s ...
    4930              : !> \param grad ...
    4931              : !> \param vd_blk ...
    4932              : !> \param t ...
    4933              : !> \param template_vd_vd_blk ...
    4934              : !> \param template_vr_vr_blk ...
    4935              : !> \param template_n_vr ...
    4936              : !> \param spin_factor ...
    4937              : !> \param eps_filter ...
    4938              : !> \par History
    4939              : !>       2011.09 created [Rustam Z Khaliullin]
    4940              : !> \author Rustam Z Khaliullin
    4941              : ! **************************************************************************************************
    4942            0 :    SUBROUTINE opt_k_create_preconditioner(prec, vd_prop, f, x, oo_inv_x_tr, s, grad, &
    4943              :                                           vd_blk, t, template_vd_vd_blk, template_vr_vr_blk, template_n_vr, &
    4944              :                                           spin_factor, eps_filter)
    4945              : 
    4946              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: prec
    4947              :       TYPE(dbcsr_type), INTENT(IN)                       :: vd_prop, f, x, oo_inv_x_tr, s
    4948              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: grad
    4949              :       TYPE(dbcsr_type), INTENT(IN)                       :: vd_blk, t, template_vd_vd_blk, &
    4950              :                                                             template_vr_vr_blk, template_n_vr
    4951              :       REAL(KIND=dp), INTENT(IN)                          :: spin_factor, eps_filter
    4952              : 
    4953              :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_create_preconditioner'
    4954              : 
    4955              :       INTEGER                                            :: handle, p_nrows, q_nrows
    4956            0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: p_diagonal, q_diagonal
    4957              :       TYPE(dbcsr_type)                                   :: pp_diag, qq_diag, t1, t2, tmp, &
    4958              :                                                             tmp1_n_vr, tmp2_n_vr, tmp_n_vd, &
    4959              :                                                             tmp_vd_vd_blk, tmp_vr_vr_blk
    4960              : 
    4961              : ! init diag blocks outside
    4962              : ! init diag blocks otside
    4963              : !INTEGER                                  :: iblock_row, iblock_col,&
    4964              : !                                            nblkrows_tot, nblkcols_tot
    4965              : !REAL(KIND=dp), DIMENSION(:, :), POINTER  :: p_new_block
    4966              : !INTEGER                                  :: mynode, hold, row, col
    4967              : 
    4968            0 :       CALL timeset(routineN, handle)
    4969              : 
    4970              :       ! initialize a matrix to 1.0
    4971            0 :       CALL dbcsr_create(tmp, template=prec)
    4972              :       ! in order to use dbcsr_set matrix blocks must exist
    4973            0 :       CALL dbcsr_copy(tmp, prec)
    4974            0 :       CALL dbcsr_set(tmp, 1.0_dp)
    4975              : 
    4976              :       ! compute qq = (Vd^tr)*F*Vd
    4977            0 :       CALL dbcsr_create(tmp_n_vd, template=vd_prop)
    4978              :       CALL dbcsr_multiply("N", "N", 1.0_dp, f, vd_prop, &
    4979            0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    4980              :       CALL dbcsr_create(tmp_vd_vd_blk, &
    4981            0 :                         template=template_vd_vd_blk)
    4982            0 :       CALL dbcsr_copy(tmp_vd_vd_blk, template_vd_vd_blk)
    4983              :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    4984              :                           0.0_dp, tmp_vd_vd_blk, &
    4985              :                           retain_sparsity=.TRUE., &
    4986            0 :                           filter_eps=eps_filter)
    4987              :       ! copy diagonal elements of the result into rows of a matrix
    4988            0 :       CALL dbcsr_get_info(tmp_vd_vd_blk, nfullrows_total=q_nrows)
    4989            0 :       ALLOCATE (q_diagonal(q_nrows))
    4990            0 :       CALL dbcsr_get_diag(tmp_vd_vd_blk, q_diagonal)
    4991              :       CALL dbcsr_create(qq_diag, &
    4992            0 :                         template=template_vd_vd_blk)
    4993            0 :       CALL dbcsr_add_on_diag(qq_diag, 1.0_dp)
    4994            0 :       CALL dbcsr_set_diag(qq_diag, q_diagonal)
    4995            0 :       CALL dbcsr_create(t1, template=prec)
    4996              :       CALL dbcsr_multiply("N", "N", 1.0_dp, qq_diag, tmp, &
    4997            0 :                           0.0_dp, t1, filter_eps=eps_filter)
    4998              : 
    4999              :       ! compute pp = X*sigma_oo_inv*X^tr
    5000            0 :       CALL dbcsr_create(tmp_vr_vr_blk, template=template_vr_vr_blk)
    5001            0 :       CALL dbcsr_copy(tmp_vr_vr_blk, template_vr_vr_blk)
    5002              :       CALL dbcsr_multiply("N", "N", 1.0_dp, x, oo_inv_x_tr, &
    5003              :                           0.0_dp, tmp_vr_vr_blk, &
    5004              :                           retain_sparsity=.TRUE., &
    5005            0 :                           filter_eps=eps_filter)
    5006              :       ! copy diagonal elements of the result into cols of a matrix
    5007            0 :       CALL dbcsr_get_info(tmp_vr_vr_blk, nfullrows_total=p_nrows)
    5008            0 :       ALLOCATE (p_diagonal(p_nrows))
    5009            0 :       CALL dbcsr_get_diag(tmp_vr_vr_blk, p_diagonal)
    5010            0 :       CALL dbcsr_create(pp_diag, template=template_vr_vr_blk)
    5011            0 :       CALL dbcsr_add_on_diag(pp_diag, 1.0_dp)
    5012            0 :       CALL dbcsr_set_diag(pp_diag, p_diagonal)
    5013            0 :       CALL dbcsr_set(tmp, 1.0_dp)
    5014            0 :       CALL dbcsr_create(t2, template=prec)
    5015              :       CALL dbcsr_multiply("N", "N", 1.0_dp, tmp, pp_diag, &
    5016            0 :                           0.0_dp, t2, filter_eps=eps_filter)
    5017              : 
    5018            0 :       CALL dbcsr_hadamard_product(t1, t2, prec)
    5019              : 
    5020              :       ! compute qq = (Vd^tr)*S*Vd
    5021              :       CALL dbcsr_multiply("N", "N", 1.0_dp, s, vd_prop, &
    5022            0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5023              :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5024              :                           0.0_dp, tmp_vd_vd_blk, &
    5025              :                           retain_sparsity=.TRUE., &
    5026            0 :                           filter_eps=eps_filter)
    5027              :       ! copy diagonal elements of the result into rows of a matrix
    5028            0 :       CALL dbcsr_get_diag(tmp_vd_vd_blk, q_diagonal)
    5029            0 :       CALL dbcsr_add_on_diag(qq_diag, 1.0_dp)
    5030            0 :       CALL dbcsr_set_diag(qq_diag, q_diagonal)
    5031            0 :       CALL dbcsr_set(tmp, 1.0_dp)
    5032              :       CALL dbcsr_multiply("N", "N", 1.0_dp, qq_diag, tmp, &
    5033            0 :                           0.0_dp, t1, filter_eps=eps_filter)
    5034              : 
    5035              :       ! compute pp = X*sig_oo_inv*(T^tr)*F*T*sig_oo_inv*(X^tr)
    5036            0 :       CALL dbcsr_create(tmp1_n_vr, template=template_n_vr)
    5037            0 :       CALL dbcsr_create(tmp2_n_vr, template=template_n_vr)
    5038              :       CALL dbcsr_multiply("N", "N", 1.0_dp, t, oo_inv_x_tr, &
    5039            0 :                           0.0_dp, tmp1_n_vr, filter_eps=eps_filter)
    5040              :       CALL dbcsr_multiply("N", "N", 1.0_dp, f, tmp1_n_vr, &
    5041            0 :                           0.0_dp, tmp2_n_vr, filter_eps=eps_filter)
    5042              :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp1_n_vr, tmp2_n_vr, &
    5043              :                           0.0_dp, tmp_vr_vr_blk, &
    5044              :                           retain_sparsity=.TRUE., &
    5045            0 :                           filter_eps=eps_filter)
    5046              :       ! copy diagonal elements of the result into cols of a matrix
    5047            0 :       CALL dbcsr_get_diag(tmp_vr_vr_blk, p_diagonal)
    5048            0 :       CALL dbcsr_add_on_diag(pp_diag, 1.0_dp)
    5049            0 :       CALL dbcsr_set_diag(pp_diag, p_diagonal)
    5050            0 :       CALL dbcsr_set(tmp, 1.0_dp)
    5051              :       CALL dbcsr_multiply("N", "N", 1.0_dp, tmp, pp_diag, &
    5052            0 :                           0.0_dp, t2, filter_eps=eps_filter)
    5053              : 
    5054            0 :       CALL dbcsr_hadamard_product(t1, t2, tmp)
    5055            0 :       CALL dbcsr_add(prec, tmp, 1.0_dp, -1.0_dp)
    5056            0 :       CALL dbcsr_scale(prec, 2.0_dp*spin_factor)
    5057              : 
    5058              :       ! compute qp = X*sig_oo_inv*(T^tr)*S*Vd
    5059              :       CALL dbcsr_multiply("N", "N", 1.0_dp, s, vd_blk, &
    5060            0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5061              :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp_n_vd, tmp1_n_vr, &
    5062              :                           0.0_dp, tmp, retain_sparsity=.TRUE., &
    5063            0 :                           filter_eps=eps_filter)
    5064            0 :       CALL dbcsr_hadamard_product(grad, tmp, t1)
    5065              :       ! gradient already contains 2.0*spin_factor
    5066            0 :       CALL dbcsr_scale(t1, -2.0_dp)
    5067              : 
    5068            0 :       CALL dbcsr_add(prec, t1, 1.0_dp, 1.0_dp)
    5069              : 
    5070            0 :       CALL inverse_of_elements(prec)
    5071            0 :       CALL dbcsr_filter(prec, eps_filter)
    5072              : 
    5073            0 :       DEALLOCATE (q_diagonal)
    5074            0 :       DEALLOCATE (p_diagonal)
    5075            0 :       CALL dbcsr_release(tmp)
    5076            0 :       CALL dbcsr_release(qq_diag)
    5077            0 :       CALL dbcsr_release(t1)
    5078            0 :       CALL dbcsr_release(pp_diag)
    5079            0 :       CALL dbcsr_release(t2)
    5080            0 :       CALL dbcsr_release(tmp_n_vd)
    5081            0 :       CALL dbcsr_release(tmp_vd_vd_blk)
    5082            0 :       CALL dbcsr_release(tmp_vr_vr_blk)
    5083            0 :       CALL dbcsr_release(tmp1_n_vr)
    5084            0 :       CALL dbcsr_release(tmp2_n_vr)
    5085              : 
    5086            0 :       CALL timestop(handle)
    5087              : 
    5088            0 :    END SUBROUTINE opt_k_create_preconditioner
    5089              : 
    5090              : ! **************************************************************************************************
    5091              : !> \brief Computes a block-diagonal preconditioner for the optimization of
    5092              : !>        k matrix
    5093              : !> \param almo_scf_env ...
    5094              : !> \param vd_prop ...
    5095              : !> \param oo_inv_x_tr ...
    5096              : !> \param t_curr ...
    5097              : !> \param ispin ...
    5098              : !> \param spin_factor ...
    5099              : !> \par History
    5100              : !>       2011.10 created [Rustam Z Khaliullin]
    5101              : !> \author Rustam Z Khaliullin
    5102              : ! **************************************************************************************************
    5103            0 :    SUBROUTINE opt_k_create_preconditioner_blk(almo_scf_env, vd_prop, oo_inv_x_tr, &
    5104              :                                               t_curr, ispin, spin_factor)
    5105              : 
    5106              :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    5107              :       TYPE(dbcsr_type), INTENT(IN)                       :: vd_prop, oo_inv_x_tr, t_curr
    5108              :       INTEGER, INTENT(IN)                                :: ispin
    5109              :       REAL(KIND=dp), INTENT(IN)                          :: spin_factor
    5110              : 
    5111              :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_create_preconditioner_blk'
    5112              : 
    5113              :       INTEGER                                            :: handle
    5114              :       REAL(KIND=dp)                                      :: eps_filter
    5115              :       TYPE(dbcsr_type)                                   :: opt_k_e_dd, opt_k_e_rr, s_dd_sqrt, &
    5116              :                                                             s_rr_sqrt, t1, tmp, tmp1_n_vr, &
    5117              :                                                             tmp2_n_vr, tmp_n_vd, tmp_vd_vd_blk, &
    5118              :                                                             tmp_vr_vr_blk
    5119              : 
    5120              : ! matrices that has been computed outside the routine already
    5121              : 
    5122            0 :       CALL timeset(routineN, handle)
    5123              : 
    5124            0 :       eps_filter = almo_scf_env%eps_filter
    5125              : 
    5126              :       ! compute S_qq = (Vd^tr)*S*Vd
    5127            0 :       CALL dbcsr_create(tmp_n_vd, template=almo_scf_env%matrix_v_disc(ispin))
    5128              :       CALL dbcsr_create(tmp_vd_vd_blk, &
    5129              :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5130            0 :                         matrix_type=dbcsr_type_no_symmetry)
    5131              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5132              :                           almo_scf_env%matrix_s(1), &
    5133              :                           vd_prop, &
    5134            0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5135              :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5136            0 :                       almo_scf_env%matrix_vv_disc_blk(ispin))
    5137              :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5138              :                           0.0_dp, tmp_vd_vd_blk, &
    5139            0 :                           retain_sparsity=.TRUE.)
    5140              : 
    5141              :       CALL dbcsr_create(s_dd_sqrt, &
    5142              :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5143            0 :                         matrix_type=dbcsr_type_no_symmetry)
    5144              :       CALL matrix_sqrt_Newton_Schulz(s_dd_sqrt, &
    5145              :                                      almo_scf_env%opt_k_t_dd(ispin), &
    5146              :                                      tmp_vd_vd_blk, &
    5147              :                                      threshold=eps_filter, &
    5148              :                                      order=almo_scf_env%order_lanczos, &
    5149              :                                      eps_lanczos=almo_scf_env%eps_lanczos, &
    5150            0 :                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5151              : 
    5152              :       ! compute F_qq = (Vd^tr)*F*Vd
    5153              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5154              :                           almo_scf_env%matrix_ks_0deloc(ispin), &
    5155              :                           vd_prop, &
    5156            0 :                           0.0_dp, tmp_n_vd, filter_eps=eps_filter)
    5157              :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5158            0 :                       almo_scf_env%matrix_vv_disc_blk(ispin))
    5159              :       CALL dbcsr_multiply("T", "N", 1.0_dp, vd_prop, tmp_n_vd, &
    5160              :                           0.0_dp, tmp_vd_vd_blk, &
    5161            0 :                           retain_sparsity=.TRUE.)
    5162            0 :       CALL dbcsr_release(tmp_n_vd)
    5163              : 
    5164              :       ! bring to the blocked-orthogonalized basis
    5165              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5166              :                           tmp_vd_vd_blk, &
    5167              :                           almo_scf_env%opt_k_t_dd(ispin), &
    5168            0 :                           0.0_dp, s_dd_sqrt, filter_eps=eps_filter)
    5169              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5170              :                           almo_scf_env%opt_k_t_dd(ispin), &
    5171              :                           s_dd_sqrt, &
    5172            0 :                           0.0_dp, tmp_vd_vd_blk, filter_eps=eps_filter)
    5173              : 
    5174              :       ! diagonalize the matrix
    5175              :       CALL dbcsr_create(opt_k_e_dd, &
    5176            0 :                         template=almo_scf_env%matrix_vv_disc_blk(ispin))
    5177            0 :       CALL dbcsr_release(s_dd_sqrt)
    5178              :       CALL dbcsr_create(s_dd_sqrt, &
    5179              :                         template=almo_scf_env%matrix_vv_disc_blk(ispin), &
    5180            0 :                         matrix_type=dbcsr_type_no_symmetry)
    5181              :       CALL diagonalize_diagonal_blocks(tmp_vd_vd_blk, &
    5182              :                                        s_dd_sqrt, &
    5183            0 :                                        opt_k_e_dd)
    5184              : 
    5185              :       ! obtain the transformation matrix in the discarded subspace
    5186              :       ! T = S^{-1/2}.U
    5187              :       CALL dbcsr_copy(tmp_vd_vd_blk, &
    5188            0 :                       almo_scf_env%opt_k_t_dd(ispin))
    5189              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5190              :                           tmp_vd_vd_blk, &
    5191              :                           s_dd_sqrt, &
    5192              :                           0.0_dp, almo_scf_env%opt_k_t_dd(ispin), &
    5193            0 :                           filter_eps=eps_filter)
    5194            0 :       CALL dbcsr_release(s_dd_sqrt)
    5195            0 :       CALL dbcsr_release(tmp_vd_vd_blk)
    5196              : 
    5197              :       ! copy diagonal elements of the result into rows of a matrix
    5198              :       CALL dbcsr_create(tmp, &
    5199            0 :                         template=almo_scf_env%matrix_k_blk_ones(ispin))
    5200              :       CALL dbcsr_copy(tmp, &
    5201            0 :                       almo_scf_env%matrix_k_blk_ones(ispin))
    5202              :       CALL dbcsr_create(t1, &
    5203            0 :                         template=almo_scf_env%matrix_k_blk_ones(ispin))
    5204              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5205              :                           opt_k_e_dd, tmp, &
    5206            0 :                           0.0_dp, t1, filter_eps=eps_filter)
    5207            0 :       CALL dbcsr_release(opt_k_e_dd)
    5208              : 
    5209              :       ! compute S_pp = X*sigma_oo_inv*X^tr
    5210              :       CALL dbcsr_create(tmp_vr_vr_blk, &
    5211              :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5212            0 :                         matrix_type=dbcsr_type_no_symmetry)
    5213              :       CALL dbcsr_copy(tmp_vr_vr_blk, &
    5214            0 :                       almo_scf_env%matrix_sigma_vv_blk(ispin))
    5215              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5216              :                           almo_scf_env%matrix_x(ispin), &
    5217              :                           oo_inv_x_tr, &
    5218              :                           0.0_dp, tmp_vr_vr_blk, &
    5219            0 :                           retain_sparsity=.TRUE.)
    5220              : 
    5221              :       ! obtain the orthogonalization matrix
    5222              :       CALL dbcsr_create(s_rr_sqrt, &
    5223              :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5224            0 :                         matrix_type=dbcsr_type_no_symmetry)
    5225              :       CALL matrix_sqrt_Newton_Schulz(s_rr_sqrt, &
    5226              :                                      almo_scf_env%opt_k_t_rr(ispin), &
    5227              :                                      tmp_vr_vr_blk, &
    5228              :                                      threshold=eps_filter, &
    5229              :                                      order=almo_scf_env%order_lanczos, &
    5230              :                                      eps_lanczos=almo_scf_env%eps_lanczos, &
    5231            0 :                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5232              : 
    5233              :       ! compute F_pp = X*sig_oo_inv*(T^tr)*F*T*sig_oo_inv*(X^tr)
    5234              :       CALL dbcsr_create(tmp1_n_vr, &
    5235            0 :                         template=almo_scf_env%matrix_v(ispin))
    5236              :       CALL dbcsr_create(tmp2_n_vr, &
    5237            0 :                         template=almo_scf_env%matrix_v(ispin))
    5238              :       CALL dbcsr_multiply("N", "N", 1.0_dp, t_curr, oo_inv_x_tr, &
    5239            0 :                           0.0_dp, tmp1_n_vr, filter_eps=eps_filter)
    5240              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5241              :                           almo_scf_env%matrix_ks_0deloc(ispin), &
    5242              :                           tmp1_n_vr, &
    5243            0 :                           0.0_dp, tmp2_n_vr, filter_eps=eps_filter)
    5244              :       CALL dbcsr_multiply("T", "N", 1.0_dp, tmp1_n_vr, tmp2_n_vr, &
    5245              :                           0.0_dp, tmp_vr_vr_blk, &
    5246            0 :                           retain_sparsity=.TRUE.)
    5247            0 :       CALL dbcsr_release(tmp1_n_vr)
    5248            0 :       CALL dbcsr_release(tmp2_n_vr)
    5249              : 
    5250              :       ! bring to the blocked-orthogonalized basis
    5251              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5252              :                           tmp_vr_vr_blk, &
    5253              :                           almo_scf_env%opt_k_t_rr(ispin), &
    5254            0 :                           0.0_dp, s_rr_sqrt, filter_eps=eps_filter)
    5255              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5256              :                           almo_scf_env%opt_k_t_rr(ispin), &
    5257              :                           s_rr_sqrt, &
    5258            0 :                           0.0_dp, tmp_vr_vr_blk, filter_eps=eps_filter)
    5259              : 
    5260              :       ! diagonalize the matrix
    5261              :       CALL dbcsr_create(opt_k_e_rr, &
    5262            0 :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin))
    5263            0 :       CALL dbcsr_release(s_rr_sqrt)
    5264              :       CALL dbcsr_create(s_rr_sqrt, &
    5265              :                         template=almo_scf_env%matrix_sigma_vv_blk(ispin), &
    5266            0 :                         matrix_type=dbcsr_type_no_symmetry)
    5267              :       CALL diagonalize_diagonal_blocks(tmp_vr_vr_blk, &
    5268              :                                        s_rr_sqrt, &
    5269            0 :                                        opt_k_e_rr)
    5270              : 
    5271              :       ! obtain the transformation matrix in the retained subspace
    5272              :       ! T = S^{-1/2}.U
    5273              :       CALL dbcsr_copy(tmp_vr_vr_blk, &
    5274            0 :                       almo_scf_env%opt_k_t_rr(ispin))
    5275              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5276              :                           tmp_vr_vr_blk, &
    5277              :                           s_rr_sqrt, &
    5278              :                           0.0_dp, almo_scf_env%opt_k_t_rr(ispin), &
    5279            0 :                           filter_eps=eps_filter)
    5280            0 :       CALL dbcsr_release(s_rr_sqrt)
    5281            0 :       CALL dbcsr_release(tmp_vr_vr_blk)
    5282              : 
    5283              :       ! copy diagonal elements of the result into cols of a matrix
    5284              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5285              :                           tmp, opt_k_e_rr, &
    5286              :                           0.0_dp, almo_scf_env%opt_k_denom(ispin), &
    5287            0 :                           filter_eps=eps_filter)
    5288            0 :       CALL dbcsr_release(opt_k_e_rr)
    5289            0 :       CALL dbcsr_release(tmp)
    5290              : 
    5291              :       ! form the denominator matrix
    5292              :       CALL dbcsr_add(almo_scf_env%opt_k_denom(ispin), t1, &
    5293            0 :                      -1.0_dp, 1.0_dp)
    5294            0 :       CALL dbcsr_release(t1)
    5295              :       CALL dbcsr_scale(almo_scf_env%opt_k_denom(ispin), &
    5296            0 :                        2.0_dp*spin_factor)
    5297              : 
    5298            0 :       CALL inverse_of_elements(almo_scf_env%opt_k_denom(ispin))
    5299              :       CALL dbcsr_filter(almo_scf_env%opt_k_denom(ispin), &
    5300            0 :                         eps_filter)
    5301              : 
    5302            0 :       CALL timestop(handle)
    5303              : 
    5304            0 :    END SUBROUTINE opt_k_create_preconditioner_blk
    5305              : 
    5306              : ! **************************************************************************************************
    5307              : !> \brief Applies a block-diagonal preconditioner for the optimization of
    5308              : !>        k matrix (preconditioner matrices must be calculated and stored
    5309              : !>        beforehand)
    5310              : !> \param almo_scf_env ...
    5311              : !> \param step ...
    5312              : !> \param grad ...
    5313              : !> \param ispin ...
    5314              : !> \par History
    5315              : !>       2011.10 created [Rustam Z Khaliullin]
    5316              : !> \author Rustam Z Khaliullin
    5317              : ! **************************************************************************************************
    5318            0 :    SUBROUTINE opt_k_apply_preconditioner_blk(almo_scf_env, step, grad, ispin)
    5319              : 
    5320              :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    5321              :       TYPE(dbcsr_type), INTENT(OUT)                      :: step
    5322              :       TYPE(dbcsr_type), INTENT(IN)                       :: grad
    5323              :       INTEGER, INTENT(IN)                                :: ispin
    5324              : 
    5325              :       CHARACTER(len=*), PARAMETER :: routineN = 'opt_k_apply_preconditioner_blk'
    5326              : 
    5327              :       INTEGER                                            :: handle
    5328              :       REAL(KIND=dp)                                      :: eps_filter
    5329              :       TYPE(dbcsr_type)                                   :: tmp_k
    5330              : 
    5331            0 :       CALL timeset(routineN, handle)
    5332              : 
    5333            0 :       eps_filter = almo_scf_env%eps_filter
    5334              : 
    5335            0 :       CALL dbcsr_create(tmp_k, template=almo_scf_env%matrix_k_blk(ispin))
    5336              : 
    5337              :       ! transform gradient to the correct "diagonal" basis
    5338              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5339              :                           grad, almo_scf_env%opt_k_t_rr(ispin), &
    5340            0 :                           0.0_dp, tmp_k, filter_eps=eps_filter)
    5341              :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    5342              :                           almo_scf_env%opt_k_t_dd(ispin), tmp_k, &
    5343            0 :                           0.0_dp, step, filter_eps=eps_filter)
    5344              : 
    5345              :       ! apply diagonal preconditioner
    5346              :       CALL dbcsr_hadamard_product(step, &
    5347            0 :                                   almo_scf_env%opt_k_denom(ispin), tmp_k)
    5348              : 
    5349              :       ! back-transform the result to the initial basis
    5350              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    5351              :                           almo_scf_env%opt_k_t_dd(ispin), tmp_k, &
    5352            0 :                           0.0_dp, step, filter_eps=eps_filter)
    5353              :       CALL dbcsr_multiply("N", "T", 1.0_dp, &
    5354              :                           step, almo_scf_env%opt_k_t_rr(ispin), &
    5355            0 :                           0.0_dp, tmp_k, filter_eps=eps_filter)
    5356              : 
    5357            0 :       CALL dbcsr_copy(step, tmp_k)
    5358              : 
    5359            0 :       CALL dbcsr_release(tmp_k)
    5360              : 
    5361            0 :       CALL timestop(handle)
    5362              : 
    5363            0 :    END SUBROUTINE opt_k_apply_preconditioner_blk
    5364              : 
    5365              : !! **************************************************************************************************
    5366              : !!> \brief Reduce the number of virtual orbitals by rotating them within
    5367              : !!>        a domain. The rotation is such that minimizes the frobenius norm of
    5368              : !!>        the Fov domain-blocks of the discarded virtuals
    5369              : !!> \par History
    5370              : !!>       2011.08 created [Rustam Z Khaliullin]
    5371              : !!> \author Rustam Z Khaliullin
    5372              : !! **************************************************************************************************
    5373              : !  SUBROUTINE truncate_subspace_v_blk(qs_env,almo_scf_env)
    5374              : !
    5375              : !    TYPE(qs_environment_type), POINTER       :: qs_env
    5376              : !    TYPE(almo_scf_env_type)                  :: almo_scf_env
    5377              : !
    5378              : !    CHARACTER(len=*), PARAMETER :: routineN = 'truncate_subspace_v_blk', &
    5379              : !      routineP = moduleN//':'//routineN
    5380              : !
    5381              : !    INTEGER                                  :: handle, ispin, iblock_row, &
    5382              : !                                                iblock_col, iblock_row_size, &
    5383              : !                                                iblock_col_size, retained_v, &
    5384              : !                                                iteration, line_search_step, &
    5385              : !                                                unit_nr, line_search_step_last
    5386              : !    REAL(KIND=dp)                            :: t1, obj_function, grad_norm,&
    5387              : !                                                c0, b0, a0, obj_function_new,&
    5388              : !                                                t2, alpha, ff1, ff2, step1,&
    5389              : !                                                step2,&
    5390              : !                                                frob_matrix_base,&
    5391              : !                                                frob_matrix
    5392              : !    LOGICAL                                  :: safe_mode, converged, &
    5393              : !                                                prepare_to_exit, failure
    5394              : !    TYPE(cp_logger_type), POINTER            :: logger
    5395              : !    TYPE(dbcsr_type)                      :: Fon, Fov, Fov_filtered, &
    5396              : !                                                temp1_oo, temp2_oo, Fov_original, &
    5397              : !                                                temp0_ov, U_blk_tot, U_blk, &
    5398              : !                                                grad_blk, step_blk, matrix_filter, &
    5399              : !                                                v_full_new,v_full_tmp,&
    5400              : !                                                matrix_sigma_vv_full,&
    5401              : !                                                matrix_sigma_vv_full_sqrt,&
    5402              : !                                                matrix_sigma_vv_full_sqrt_inv,&
    5403              : !                                                matrix_tmp1,&
    5404              : !                                                matrix_tmp2
    5405              : !
    5406              : !    REAL(kind=dp), DIMENSION(:, :), POINTER  :: data_p, p_new_block
    5407              : !    TYPE(dbcsr_iterator_type)                  :: iter
    5408              : !
    5409              : !
    5410              : !REAL(kind=dp), DIMENSION(:), ALLOCATABLE     :: eigenvalues, WORK
    5411              : !REAL(kind=dp), DIMENSION(:,:), ALLOCATABLE   :: data_copy, left_vectors, right_vectors
    5412              : !INTEGER                                      :: LWORK, INFO
    5413              : !TYPE(dbcsr_type)                          :: temp_u_v_full_blk
    5414              : !
    5415              : !    CALL timeset(routineN,handle)
    5416              : !
    5417              : !    safe_mode=.TRUE.
    5418              : !
    5419              : !    ! get a useful output_unit
    5420              : !    logger => cp_get_default_logger()
    5421              : !    IF (logger%para_env%is_source()) THEN
    5422              : !       unit_nr=cp_logger_get_default_unit_nr(logger,local=.TRUE.)
    5423              : !    ELSE
    5424              : !       unit_nr=-1
    5425              : !    ENDIF
    5426              : !
    5427              : !    DO ispin=1,almo_scf_env%nspins
    5428              : !
    5429              : !       t1 = m_walltime()
    5430              : !
    5431              : !       !!!!!!!!!!!!!!!!!
    5432              : !       ! 0. Orthogonalize virtuals
    5433              : !       !    Unfortunately, we have to do it in the FULL V subspace :(
    5434              : !
    5435              : !       CALL dbcsr_init(v_full_new)
    5436              : !       CALL dbcsr_create(v_full_new,&
    5437              : !               template=almo_scf_env%matrix_v_full_blk(ispin),&
    5438              : !               matrix_type=dbcsr_type_no_symmetry)
    5439              : !
    5440              : !       ! project the occupied subspace out
    5441              : !       CALL almo_scf_p_out_from_v(almo_scf_env%matrix_v_full_blk(ispin),&
    5442              : !              v_full_new,almo_scf_env%matrix_ov_full(ispin),&
    5443              : !              ispin,almo_scf_env)
    5444              : !
    5445              : !       ! init overlap and its functions
    5446              : !       CALL dbcsr_init(matrix_sigma_vv_full)
    5447              : !       CALL dbcsr_init(matrix_sigma_vv_full_sqrt)
    5448              : !       CALL dbcsr_init(matrix_sigma_vv_full_sqrt_inv)
    5449              : !       CALL dbcsr_create(matrix_sigma_vv_full,&
    5450              : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5451              : !               matrix_type=dbcsr_type_no_symmetry)
    5452              : !       CALL dbcsr_create(matrix_sigma_vv_full_sqrt,&
    5453              : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5454              : !               matrix_type=dbcsr_type_no_symmetry)
    5455              : !       CALL dbcsr_create(matrix_sigma_vv_full_sqrt_inv,&
    5456              : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5457              : !               matrix_type=dbcsr_type_no_symmetry)
    5458              : !
    5459              : !       ! construct VV overlap
    5460              : !       CALL almo_scf_mo_to_sigma(v_full_new,&
    5461              : !               matrix_sigma_vv_full,&
    5462              : !               almo_scf_env%matrix_s(1),&
    5463              : !               almo_scf_env%eps_filter)
    5464              : !
    5465              : !       IF (unit_nr>0) THEN
    5466              : !          WRITE(unit_nr,*) "sqrt and inv(sqrt) of the FULL virtual MO overlap"
    5467              : !       ENDIF
    5468              : !
    5469              : !       ! construct orthogonalization matrices
    5470              : !       CALL matrix_sqrt_Newton_Schulz(matrix_sigma_vv_full_sqrt,&
    5471              : !                                      matrix_sigma_vv_full_sqrt_inv,&
    5472              : !                                      matrix_sigma_vv_full,&
    5473              : !                                      threshold=almo_scf_env%eps_filter,&
    5474              : !                                      order=almo_scf_env%order_lanczos,&
    5475              : !                                      eps_lanczos=almo_scf_env%eps_lanczos,&
    5476              : !                                      max_iter_lanczos=almo_scf_env%max_iter_lanczos)
    5477              : !       IF (safe_mode) THEN
    5478              : !          CALL dbcsr_init(matrix_tmp1)
    5479              : !          CALL dbcsr_create(matrix_tmp1,template=matrix_sigma_vv_full,&
    5480              : !                               matrix_type=dbcsr_type_no_symmetry)
    5481              : !          CALL dbcsr_init(matrix_tmp2)
    5482              : !          CALL dbcsr_create(matrix_tmp2,template=matrix_sigma_vv_full,&
    5483              : !                               matrix_type=dbcsr_type_no_symmetry)
    5484              : !
    5485              : !          CALL dbcsr_multiply("N","N",1.0_dp,matrix_sigma_vv_full_sqrt_inv,&
    5486              : !                                 matrix_sigma_vv_full,&
    5487              : !                                 0.0_dp,matrix_tmp1,filter_eps=almo_scf_env%eps_filter)
    5488              : !          CALL dbcsr_multiply("N","N",1.0_dp,matrix_tmp1,&
    5489              : !                                 matrix_sigma_vv_full_sqrt_inv,&
    5490              : !                                 0.0_dp,matrix_tmp2,filter_eps=almo_scf_env%eps_filter)
    5491              : !
    5492              : !          frob_matrix_base=dbcsr_frobenius_norm(matrix_tmp2)
    5493              : !          CALL dbcsr_add_on_diag(matrix_tmp2,-1.0_dp)
    5494              : !          frob_matrix=dbcsr_frobenius_norm(matrix_tmp2)
    5495              : !          IF (unit_nr>0) THEN
    5496              : !             WRITE(unit_nr,*) "Error for (inv(sqrt(SIGVV))*SIGVV*inv(sqrt(SIGVV))-I)",frob_matrix/frob_matrix_base
    5497              : !          ENDIF
    5498              : !
    5499              : !          CALL dbcsr_release(matrix_tmp1)
    5500              : !          CALL dbcsr_release(matrix_tmp2)
    5501              : !       ENDIF
    5502              : !
    5503              : !       ! discard unnecessary overlap functions
    5504              : !       CALL dbcsr_release(matrix_sigma_vv_full)
    5505              : !       CALL dbcsr_release(matrix_sigma_vv_full_sqrt)
    5506              : !
    5507              : !! this can be re-written because we have (1-P)|v>
    5508              : !
    5509              : !       !!!!!!!!!!!!!!!!!!!
    5510              : !       ! 1. Compute F_ov
    5511              : !       CALL dbcsr_init(Fon)
    5512              : !       CALL dbcsr_create(Fon,&
    5513              : !               template=almo_scf_env%matrix_v_full_blk(ispin))
    5514              : !       CALL dbcsr_init(Fov)
    5515              : !       CALL dbcsr_create(Fov,&
    5516              : !               template=almo_scf_env%matrix_ov_full(ispin))
    5517              : !       CALL dbcsr_init(Fov_filtered)
    5518              : !       CALL dbcsr_create(Fov_filtered,&
    5519              : !               template=almo_scf_env%matrix_ov_full(ispin))
    5520              : !       CALL dbcsr_init(temp1_oo)
    5521              : !       CALL dbcsr_create(temp1_oo,&
    5522              : !               template=almo_scf_env%matrix_sigma(ispin),&
    5523              : !               !matrix_type=dbcsr_type_no_symmetry)
    5524              : !       CALL dbcsr_init(temp2_oo)
    5525              : !       CALL dbcsr_create(temp2_oo,&
    5526              : !               template=almo_scf_env%matrix_sigma(ispin),&
    5527              : !               matrix_type=dbcsr_type_no_symmetry)
    5528              : !
    5529              : !       CALL dbcsr_multiply("T","N",1.0_dp,almo_scf_env%matrix_t_blk(ispin),&
    5530              : !               almo_scf_env%matrix_ks_0deloc(ispin),&
    5531              : !               0.0_dp,Fon,filter_eps=almo_scf_env%eps_filter)
    5532              : !
    5533              : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5534              : !               almo_scf_env%matrix_v_full_blk(ispin),&
    5535              : !               0.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5536              : !
    5537              : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5538              : !               almo_scf_env%matrix_t_blk(ispin),&
    5539              : !               0.0_dp,temp1_oo,filter_eps=almo_scf_env%eps_filter)
    5540              : !
    5541              : !       CALL dbcsr_multiply("N","N",1.0_dp,temp1_oo,&
    5542              : !               almo_scf_env%matrix_sigma_inv(ispin),&
    5543              : !               0.0_dp,temp2_oo,filter_eps=almo_scf_env%eps_filter)
    5544              : !       CALL dbcsr_release(temp1_oo)
    5545              : !
    5546              : !       CALL dbcsr_multiply("T","N",1.0_dp,almo_scf_env%matrix_t_blk(ispin),&
    5547              : !               almo_scf_env%matrix_s(1),&
    5548              : !               0.0_dp,Fon,filter_eps=almo_scf_env%eps_filter)
    5549              : !
    5550              : !       CALL dbcsr_multiply("N","N",1.0_dp,Fon,&
    5551              : !               almo_scf_env%matrix_v_full_blk(ispin),&
    5552              : !               0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5553              : !       CALL dbcsr_release(Fon)
    5554              : !
    5555              : !       CALL dbcsr_multiply("N","N",-1.0_dp,temp2_oo,&
    5556              : !               Fov_filtered,&
    5557              : !               1.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5558              : !       CALL dbcsr_release(temp2_oo)
    5559              : !
    5560              : !       CALL dbcsr_multiply("N","N",1.0_dp,almo_scf_env%matrix_sigma_inv(ispin),&
    5561              : !               Fov,0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5562              : !
    5563              : !       CALL dbcsr_multiply("N","N",1.0_dp,Fov_filtered,&
    5564              : !               matrix_sigma_vv_full_sqrt_inv,&
    5565              : !               0.0_dp,Fov,filter_eps=almo_scf_env%eps_filter)
    5566              : !       !CALL dbcsr_copy(Fov,Fov_filtered)
    5567              : !CALL dbcsr_print(Fov)
    5568              : !
    5569              : !       IF (safe_mode) THEN
    5570              : !          CALL dbcsr_init(Fov_original)
    5571              : !          CALL dbcsr_create(Fov_original,template=Fov)
    5572              : !          CALL dbcsr_copy(Fov_original,Fov)
    5573              : !       ENDIF
    5574              : !
    5575              : !!! remove diagonal blocks
    5576              : !!CALL dbcsr_iterator_start(iter,Fov)
    5577              : !!DO WHILE (dbcsr_iterator_blocks_left(iter))
    5578              : !!
    5579              : !!   CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5580              : !!           row_size=iblock_row_size,col_size=iblock_col_size)
    5581              : !!
    5582              : !!   IF (iblock_row.eq.iblock_col) data_p(:,:)=0.0_dp
    5583              : !!
    5584              : !!ENDDO
    5585              : !!CALL dbcsr_iterator_stop(iter)
    5586              : !!CALL dbcsr_finalize(Fov)
    5587              : !
    5588              : !!! perform svd of blocks
    5589              : !!!!! THIS ROUTINE WORKS ONLY ON ONE CPU AND ONLY FOR 2 MOLECULES !!!
    5590              : !!CALL dbcsr_init(temp_u_v_full_blk)
    5591              : !!CALL dbcsr_create(temp_u_v_full_blk,&
    5592              : !!        template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5593              : !!        matrix_type=dbcsr_type_no_symmetry)
    5594              : !!
    5595              : !!CALL dbcsr_work_create(temp_u_v_full_blk,&
    5596              : !!        work_mutable=.TRUE.)
    5597              : !!CALL dbcsr_iterator_start(iter,Fov)
    5598              : !!DO WHILE (dbcsr_iterator_blocks_left(iter))
    5599              : !!
    5600              : !!   CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5601              : !!           row_size=iblock_row_size,col_size=iblock_col_size)
    5602              : !!
    5603              : !!   IF (iblock_row.ne.iblock_col) THEN
    5604              : !!
    5605              : !!      ! Prepare data
    5606              : !!      allocate(eigenvalues(min(iblock_row_size,iblock_col_size)))
    5607              : !!      allocate(data_copy(iblock_row_size,iblock_col_size))
    5608              : !!      allocate(left_vectors(iblock_row_size,iblock_row_size))
    5609              : !!      allocate(right_vectors(iblock_col_size,iblock_col_size))
    5610              : !!      data_copy(:,:)=data_p(:,:)
    5611              : !!
    5612              : !!      ! Query the optimal workspace for dgesvd
    5613              : !!      LWORK = -1
    5614              : !!      allocate(WORK(MAX(1,LWORK)))
    5615              : !!      CALL DGESVD('N','A',iblock_row_size,iblock_col_size,data_copy,&
    5616              : !!              iblock_row_size,eigenvalues,left_vectors,iblock_row_size,&
    5617              : !!              right_vectors,iblock_col_size,WORK,LWORK,INFO)
    5618              : !!      LWORK = INT(WORK( 1 ))
    5619              : !!      deallocate(WORK)
    5620              : !!
    5621              : !!      ! Allocate the workspace and perform svd
    5622              : !!      allocate(WORK(MAX(1,LWORK)))
    5623              : !!      CALL DGESVD('N','A',iblock_row_size,iblock_col_size,data_copy,&
    5624              : !!              iblock_row_size,eigenvalues,left_vectors,iblock_row_size,&
    5625              : !!              right_vectors,iblock_col_size,WORK,LWORK,INFO)
    5626              : !!      deallocate(WORK)
    5627              : !!      IF( INFO/=0 ) THEN
    5628              : !!         CPABORT("DGESVD failed")
    5629              : !!      END IF
    5630              : !!
    5631              : !!      ! copy right singular vectors into a unitary matrix
    5632              : !!      CALL dbcsr_put_block(temp_u_v_full_blk,iblock_col,iblock_col,right_vectors)
    5633              : !!
    5634              : !!      deallocate(eigenvalues)
    5635              : !!      deallocate(data_copy)
    5636              : !!      deallocate(left_vectors)
    5637              : !!      deallocate(right_vectors)
    5638              : !!
    5639              : !!   ENDIF
    5640              : !!ENDDO
    5641              : !!CALL dbcsr_iterator_stop(iter)
    5642              : !!CALL dbcsr_finalize(temp_u_v_full_blk)
    5643              : !!!CALL dbcsr_print(temp_u_v_full_blk)
    5644              : !!CALL dbcsr_multiply("N","T",1.0_dp,Fov,temp_u_v_full_blk,&
    5645              : !!        0.0_dp,Fov_filtered,filter_eps=almo_scf_env%eps_filter)
    5646              : !!
    5647              : !!CALL dbcsr_copy(Fov,Fov_filtered)
    5648              : !!CALL dbcsr_print(Fov)
    5649              : !
    5650              : !       !!!!!!!!!!!!!!!!!!!
    5651              : !       ! 2. Initialize variables
    5652              : !
    5653              : !       ! temp space
    5654              : !       CALL dbcsr_init(temp0_ov)
    5655              : !       CALL dbcsr_create(temp0_ov,&
    5656              : !               template=almo_scf_env%matrix_ov_full(ispin))
    5657              : !
    5658              : !       ! current unitary matrix
    5659              : !       CALL dbcsr_init(U_blk)
    5660              : !       CALL dbcsr_create(U_blk,&
    5661              : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5662              : !               matrix_type=dbcsr_type_no_symmetry)
    5663              : !
    5664              : !       ! unitary matrix accumulator
    5665              : !       CALL dbcsr_init(U_blk_tot)
    5666              : !       CALL dbcsr_create(U_blk_tot,&
    5667              : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5668              : !               matrix_type=dbcsr_type_no_symmetry)
    5669              : !       CALL dbcsr_add_on_diag(U_blk_tot,1.0_dp)
    5670              : !
    5671              : !!CALL dbcsr_add_on_diag(U_blk,1.0_dp)
    5672              : !!CALL dbcsr_multiply("N","T",1.0_dp,U_blk,temp_u_v_full_blk,&
    5673              : !!        0.0_dp,U_blk_tot,filter_eps=almo_scf_env%eps_filter)
    5674              : !!
    5675              : !!CALL dbcsr_release(temp_u_v_full_blk)
    5676              : !
    5677              : !       ! init gradient
    5678              : !       CALL dbcsr_init(grad_blk)
    5679              : !       CALL dbcsr_create(grad_blk,&
    5680              : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5681              : !               matrix_type=dbcsr_type_no_symmetry)
    5682              : !
    5683              : !       ! init step matrix
    5684              : !       CALL dbcsr_init(step_blk)
    5685              : !       CALL dbcsr_create(step_blk,&
    5686              : !               template=almo_scf_env%matrix_vv_full_blk(ispin),&
    5687              : !               matrix_type=dbcsr_type_no_symmetry)
    5688              : !
    5689              : !       ! "retain discarded" filter (0.0 - retain, 1.0 - discard)
    5690              : !       CALL dbcsr_init(matrix_filter)
    5691              : !       CALL dbcsr_create(matrix_filter,&
    5692              : !               template=almo_scf_env%matrix_ov_full(ispin))
    5693              : !       ! copy Fov into the filter matrix temporarily
    5694              : !       ! so we know which blocks contain significant elements
    5695              : !       CALL dbcsr_copy(matrix_filter,Fov)
    5696              : !
    5697              : !       ! fill out filter elements block-by-block
    5698              : !       CALL dbcsr_iterator_start(iter,matrix_filter)
    5699              : !       DO WHILE (dbcsr_iterator_blocks_left(iter))
    5700              : !
    5701              : !          CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5702              : !                  row_size=iblock_row_size,col_size=iblock_col_size)
    5703              : !
    5704              : !          retained_v=almo_scf_env%nvirt_of_domain(iblock_col,ispin)
    5705              : !
    5706              : !          data_p(:,1:retained_v)=0.0_dp
    5707              : !          data_p(:,(retained_v+1):iblock_col_size)=1.0_dp
    5708              : !
    5709              : !       ENDDO
    5710              : !       CALL dbcsr_iterator_stop(iter)
    5711              : !       CALL dbcsr_finalize(matrix_filter)
    5712              : !
    5713              : !       ! apply the filter
    5714              : !       CALL dbcsr_hadamard_product(Fov,matrix_filter,Fov_filtered)
    5715              : !
    5716              : !       !!!!!!!!!!!!!!!!!!!!!
    5717              : !       ! 3. start iterative minimization of the elements to be discarded
    5718              : !       iteration=0
    5719              : !       converged=.FALSE.
    5720              : !       prepare_to_exit=.FALSE.
    5721              : !       DO
    5722              : !
    5723              : !          iteration=iteration+1
    5724              : !
    5725              : !          !!!!!!!!!!!!!!!!!!!!!!!!!
    5726              : !          ! 4. compute the gradient
    5727              : !          CALL dbcsr_set(grad_blk,0.0_dp)
    5728              : !          ! create the diagonal blocks only
    5729              : !          CALL dbcsr_add_on_diag(grad_blk,1.0_dp)
    5730              : !
    5731              : !          CALL dbcsr_multiply("T","N",2.0_dp,Fov_filtered,Fov,&
    5732              : !                  0.0_dp,grad_blk,retain_sparsity=.TRUE.,&
    5733              : !                  filter_eps=almo_scf_env%eps_filter)
    5734              : !          CALL dbcsr_multiply("T","N",-2.0_dp,Fov,Fov_filtered,&
    5735              : !                  1.0_dp,grad_blk,retain_sparsity=.TRUE.,&
    5736              : !                  filter_eps=almo_scf_env%eps_filter)
    5737              : !
    5738              : !          !!!!!!!!!!!!!!!!!!!!!!!
    5739              : !          ! 5. check convergence
    5740              : !          obj_function = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5741              : !          grad_norm = dbcsr_frobenius_norm(grad_blk)
    5742              : !          converged=(grad_norm.lt.almo_scf_env%truncate_v_eps_convergence)
    5743              : !          IF (converged.OR.(iteration.ge.almo_scf_env%truncate_v_max_iter)) THEN
    5744              : !             prepare_to_exit=.TRUE.
    5745              : !          ENDIF
    5746              : !
    5747              : !          IF (.NOT.prepare_to_exit) THEN
    5748              : !
    5749              : !             !!!!!!!!!!!!!!!!!!!!!!!
    5750              : !             ! 6. perform steps in the direction of the gradient
    5751              : !             !    a. first, perform a trial step to "see" the parameters
    5752              : !             !       of the parabola along the gradient:
    5753              : !             !       a0 * x^2 + b0 * x + c0
    5754              : !             !    b. then perform the step to the bottom of the parabola
    5755              : !
    5756              : !             ! get c0
    5757              : !             c0 = obj_function
    5758              : !             ! get b0 <= d_f/d_alpha along grad
    5759              : !             !!!CALL dbcsr_multiply("N","N",4.0_dp,Fov,grad_blk,&
    5760              : !             !!!        0.0_dp,temp0_ov,&
    5761              : !             !!!        filter_eps=almo_scf_env%eps_filter)
    5762              : !             !!!CALL dbcsr_dot(Fov_filtered,temp0_ov,b0)
    5763              : !
    5764              : !             alpha=almo_scf_env%truncate_v_trial_step_size
    5765              : !
    5766              : !             line_search_step_last=3
    5767              : !             DO line_search_step=1,line_search_step_last
    5768              : !                CALL dbcsr_copy(step_blk,grad_blk)
    5769              : !                CALL dbcsr_scale(step_blk,-1.0_dp*alpha)
    5770              : !                CALL generator_to_unitary(step_blk,U_blk,&
    5771              : !                        almo_scf_env%eps_filter)
    5772              : !                CALL dbcsr_multiply("N","N",1.0_dp,Fov,U_blk,0.0_dp,temp0_ov,&
    5773              : !                        filter_eps=almo_scf_env%eps_filter)
    5774              : !                CALL dbcsr_hadamard_product(temp0_ov,matrix_filter,&
    5775              : !                        Fov_filtered)
    5776              : !
    5777              : !                obj_function_new = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5778              : !                IF (line_search_step.eq.1) THEN
    5779              : !                   ff1 = obj_function_new
    5780              : !                   step1 = alpha
    5781              : !                ELSE IF (line_search_step.eq.2) THEN
    5782              : !                   ff2 = obj_function_new
    5783              : !                   step2 = alpha
    5784              : !                ENDIF
    5785              : !
    5786              : !                IF (unit_nr>0.AND.(line_search_step.ne.line_search_step_last)) THEN
    5787              : !                   WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.3,E12.3,E12.3,E12.3)') &
    5788              : !                         "JOINT_SVD_lin",&
    5789              : !                         iteration,&
    5790              : !                         alpha,&
    5791              : !                         obj_function,&
    5792              : !                         obj_function_new,&
    5793              : !                         obj_function_new-obj_function
    5794              : !                ENDIF
    5795              : !
    5796              : !                IF (line_search_step.eq.1) THEN
    5797              : !                   alpha=2.0_dp*alpha
    5798              : !                ENDIF
    5799              : !                IF (line_search_step.eq.2) THEN
    5800              : !                   a0 = ((ff1-c0)/step1 - (ff2-c0)/step2) / (step1 - step2)
    5801              : !                   b0 = (ff1-c0)/step1 - a0*step1
    5802              : !                   ! step size in to the bottom of "the parabola"
    5803              : !                   alpha=-b0/(2.0_dp*a0)
    5804              : !                   ! update the default step size
    5805              : !                   almo_scf_env%truncate_v_trial_step_size=alpha
    5806              : !                ENDIF
    5807              : !                !!!IF (line_search_step.eq.1) THEN
    5808              : !                !!!   a0 = (obj_function_new - b0 * alpha - c0) / (alpha*alpha)
    5809              : !                !!!   ! step size in to the bottom of "the parabola"
    5810              : !                !!!   alpha=-b0/(2.0_dp*a0)
    5811              : !                !!!   !IF (alpha.gt.10.0_dp) alpha=10.0_dp
    5812              : !                !!!ENDIF
    5813              : !
    5814              : !             ENDDO
    5815              : !
    5816              : !             ! update Fov and U_blk_tot (use grad_blk as tmp storage)
    5817              : !             CALL dbcsr_copy(Fov,temp0_ov)
    5818              : !             CALL dbcsr_multiply("N","N",1.0_dp,U_blk_tot,U_blk,&
    5819              : !                     0.0_dp,grad_blk,&
    5820              : !                     filter_eps=almo_scf_env%eps_filter)
    5821              : !             CALL dbcsr_copy(U_blk_tot,grad_blk)
    5822              : !
    5823              : !          ENDIF
    5824              : !
    5825              : !          t2 = m_walltime()
    5826              : !
    5827              : !          IF (unit_nr>0) THEN
    5828              : !             WRITE(unit_nr,'(T6,A,1X,I3,1X,F10.3,E12.3,E12.3,E12.3,E12.3,F10.3)') &
    5829              : !                   "JOINT_SVD_itr",&
    5830              : !                   iteration,&
    5831              : !                   alpha,&
    5832              : !                   obj_function,&
    5833              : !                   obj_function_new,&
    5834              : !                   obj_function_new-obj_function,&
    5835              : !                   grad_norm,&
    5836              : !                   t2-t1
    5837              : !                   !(flop1+flop2)/(1.0E6_dp*(t2-t1))
    5838              : !             CALL m_flush(unit_nr)
    5839              : !          ENDIF
    5840              : !
    5841              : !          t1 = m_walltime()
    5842              : !
    5843              : !          IF (prepare_to_exit) EXIT
    5844              : !
    5845              : !       ENDDO ! stop iterations
    5846              : !
    5847              : !       IF (safe_mode) THEN
    5848              : !          CALL dbcsr_multiply("N","N",1.0_dp,Fov_original,&
    5849              : !                  U_blk_tot,0.0_dp,temp0_ov,&
    5850              : !                  filter_eps=almo_scf_env%eps_filter)
    5851              : !CALL dbcsr_print(temp0_ov)
    5852              : !          CALL dbcsr_hadamard_product(temp0_ov,matrix_filter,&
    5853              : !                  Fov_filtered)
    5854              : !          obj_function_new = 0.5_dp*(dbcsr_frobenius_norm(Fov_filtered))**2
    5855              : !
    5856              : !          IF (unit_nr>0) THEN
    5857              : !             WRITE(unit_nr,'(T6,A,1X,E12.3)') &
    5858              : !                   "SANITY CHECK:",&
    5859              : !                   obj_function_new
    5860              : !             CALL m_flush(unit_nr)
    5861              : !          ENDIF
    5862              : !
    5863              : !          CALL dbcsr_release(Fov_original)
    5864              : !       ENDIF
    5865              : !
    5866              : !       CALL dbcsr_release(temp0_ov)
    5867              : !       CALL dbcsr_release(U_blk)
    5868              : !       CALL dbcsr_release(grad_blk)
    5869              : !       CALL dbcsr_release(step_blk)
    5870              : !       CALL dbcsr_release(matrix_filter)
    5871              : !       CALL dbcsr_release(Fov)
    5872              : !       CALL dbcsr_release(Fov_filtered)
    5873              : !
    5874              : !       ! compute rotated virtual orbitals
    5875              : !       CALL dbcsr_init(v_full_tmp)
    5876              : !       CALL dbcsr_create(v_full_tmp,&
    5877              : !               template=almo_scf_env%matrix_v_full_blk(ispin),&
    5878              : !               matrix_type=dbcsr_type_no_symmetry)
    5879              : !       CALL dbcsr_multiply("N","N",1.0_dp,&
    5880              : !               v_full_new,&
    5881              : !               matrix_sigma_vv_full_sqrt_inv,0.0_dp,v_full_tmp,&
    5882              : !               filter_eps=almo_scf_env%eps_filter)
    5883              : !       CALL dbcsr_multiply("N","N",1.0_dp,&
    5884              : !               v_full_tmp,&
    5885              : !               U_blk_tot,0.0_dp,v_full_new,&
    5886              : !               filter_eps=almo_scf_env%eps_filter)
    5887              : !
    5888              : !       CALL dbcsr_release(matrix_sigma_vv_full_sqrt_inv)
    5889              : !       CALL dbcsr_release(v_full_tmp)
    5890              : !       CALL dbcsr_release(U_blk_tot)
    5891              : !
    5892              : !!!!! orthogonalized virtuals are not blocked
    5893              : !       ! copy new virtuals into the truncated matrix
    5894              : !       !CALL dbcsr_work_create(almo_scf_env%matrix_v_blk(ispin),&
    5895              : !       CALL dbcsr_work_create(almo_scf_env%matrix_v(ispin),&
    5896              : !               work_mutable=.TRUE.)
    5897              : !       CALL dbcsr_iterator_start(iter,v_full_new)
    5898              : !       DO WHILE (dbcsr_iterator_blocks_left(iter))
    5899              : !
    5900              : !          CALL dbcsr_iterator_next_block(iter,iblock_row,iblock_col,data_p,&
    5901              : !                  row_size=iblock_row_size,col_size=iblock_col_size)
    5902              : !
    5903              : !          retained_v=almo_scf_env%nvirt_of_domain(iblock_col,ispin)
    5904              : !
    5905              : !          CALL dbcsr_put_block(almo_scf_env%matrix_v(ispin), iblock_row,iblock_col,data_p(:,1:retained_v))
    5906              : !          CPASSERT(retained_v.gt.0)
    5907              : !
    5908              : !       ENDDO ! iterator
    5909              : !       CALL dbcsr_iterator_stop(iter)
    5910              : !       !!CALL dbcsr_finalize(almo_scf_env%matrix_v_blk(ispin))
    5911              : !       CALL dbcsr_finalize(almo_scf_env%matrix_v(ispin))
    5912              : !
    5913              : !       CALL dbcsr_release(v_full_new)
    5914              : !
    5915              : !    ENDDO ! ispin
    5916              : !
    5917              : !    CALL timestop(handle)
    5918              : !
    5919              : !  END SUBROUTINE truncate_subspace_v_blk
    5920              : 
    5921              : ! **************************************************************************************************
    5922              : !> \brief Compute the gradient wrt the main variable (e.g. Theta, X)
    5923              : !> \param m_grad_out ...
    5924              : !> \param m_ks ...
    5925              : !> \param m_s ...
    5926              : !> \param m_t ...
    5927              : !> \param m_t0 ...
    5928              : !> \param m_siginv ...
    5929              : !> \param m_quench_t ...
    5930              : !> \param m_FTsiginv ...
    5931              : !> \param m_siginvTFTsiginv ...
    5932              : !> \param m_ST ...
    5933              : !> \param m_STsiginv0 ...
    5934              : !> \param m_theta ...
    5935              : !> \param domain_s_inv ...
    5936              : !> \param domain_r_down ...
    5937              : !> \param cpu_of_domain ...
    5938              : !> \param domain_map ...
    5939              : !> \param assume_t0_q0x ...
    5940              : !> \param optimize_theta ...
    5941              : !> \param normalize_orbitals ...
    5942              : !> \param penalty_occ_vol ...
    5943              : !> \param penalty_occ_local ...
    5944              : !> \param penalty_occ_vol_prefactor ...
    5945              : !> \param envelope_amplitude ...
    5946              : !> \param eps_filter ...
    5947              : !> \param spin_factor ...
    5948              : !> \param special_case ...
    5949              : !> \param m_sig_sqrti_ii ...
    5950              : !> \param op_sm_set ...
    5951              : !> \param weights ...
    5952              : !> \param energy_coeff ...
    5953              : !> \param localiz_coeff ...
    5954              : !> \par History
    5955              : !>       2015.03 created [Rustam Z Khaliullin]
    5956              : !> \author Rustam Z Khaliullin
    5957              : ! **************************************************************************************************
    5958         1474 :    SUBROUTINE compute_gradient(m_grad_out, m_ks, m_s, m_t, m_t0, &
    5959              :                                m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_STsiginv0, &
    5960         1474 :                                m_theta, domain_s_inv, domain_r_down, &
    5961         1474 :                                cpu_of_domain, domain_map, assume_t0_q0x, optimize_theta, &
    5962              :                                normalize_orbitals, penalty_occ_vol, penalty_occ_local, &
    5963              :                                penalty_occ_vol_prefactor, envelope_amplitude, eps_filter, spin_factor, &
    5964         1474 :                                special_case, m_sig_sqrti_ii, op_sm_set, weights, energy_coeff, &
    5965              :                                localiz_coeff)
    5966              : 
    5967              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_grad_out, m_ks, m_s, m_t, m_t0, &
    5968              :                                                             m_siginv, m_quench_t, m_FTsiginv, &
    5969              :                                                             m_siginvTFTsiginv, m_ST, m_STsiginv0, &
    5970              :                                                             m_theta
    5971              :       TYPE(domain_submatrix_type), DIMENSION(:), &
    5972              :          INTENT(IN)                                      :: domain_s_inv, domain_r_down
    5973              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    5974              :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    5975              :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, optimize_theta, &
    5976              :                                                             normalize_orbitals, penalty_occ_vol
    5977              :       LOGICAL, INTENT(IN), OPTIONAL                      :: penalty_occ_local
    5978              :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, &
    5979              :                                                             envelope_amplitude, eps_filter, &
    5980              :                                                             spin_factor
    5981              :       INTEGER, INTENT(IN)                                :: special_case
    5982              :       TYPE(dbcsr_type), INTENT(IN), OPTIONAL             :: m_sig_sqrti_ii
    5983              :       TYPE(dbcsr_p_type), DIMENSION(:, :), OPTIONAL, &
    5984              :          POINTER                                         :: op_sm_set
    5985              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN), OPTIONAL  :: weights
    5986              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: energy_coeff, localiz_coeff
    5987              : 
    5988              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_gradient'
    5989              : 
    5990              :       INTEGER                                            :: dim0, handle, idim0, nao, reim
    5991              :       LOGICAL                                            :: my_penalty_local
    5992              :       REAL(KIND=dp)                                      :: coeff, energy_g_norm, my_energy_coeff, &
    5993              :                                                             my_localiz_coeff, &
    5994              :                                                             penalty_occ_vol_g_norm
    5995         1474 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal
    5996              :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_no_2, m_tmp_no_3, &
    5997              :                                                             m_tmp_oo_1, m_tmp_oo_2, temp1, temp2, &
    5998              :                                                             tempNOcc1, tempOccOcc1
    5999              : 
    6000         1474 :       CALL timeset(routineN, handle)
    6001              : 
    6002         1474 :       IF (normalize_orbitals .AND. (.NOT. PRESENT(m_sig_sqrti_ii))) THEN
    6003            0 :          CPABORT("Normalization matrix is required")
    6004              :       END IF
    6005              : 
    6006         1474 :       my_penalty_local = .FALSE.
    6007         1474 :       my_localiz_coeff = 1.0_dp
    6008         1474 :       my_energy_coeff = 0.0_dp
    6009         1474 :       IF (PRESENT(localiz_coeff)) THEN
    6010         1048 :          my_localiz_coeff = localiz_coeff
    6011              :       END IF
    6012         1474 :       IF (PRESENT(energy_coeff)) THEN
    6013         1048 :          my_energy_coeff = energy_coeff
    6014              :       END IF
    6015         1474 :       IF (PRESENT(penalty_occ_local)) THEN
    6016         1048 :          my_penalty_local = penalty_occ_local
    6017              :       END IF
    6018              : 
    6019              :       ! use this otherways unused variables
    6020         1474 :       CALL dbcsr_get_info(matrix=m_ks, nfullrows_total=nao)
    6021         1474 :       CALL dbcsr_get_info(matrix=m_s, nfullrows_total=nao)
    6022         1474 :       CALL dbcsr_get_info(matrix=m_t, nfullrows_total=nao)
    6023              : 
    6024              :       CALL dbcsr_create(m_tmp_no_1, &
    6025              :                         template=m_quench_t, &
    6026         1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6027              :       CALL dbcsr_create(m_tmp_no_2, &
    6028              :                         template=m_quench_t, &
    6029         1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6030              :       CALL dbcsr_create(m_tmp_no_3, &
    6031              :                         template=m_quench_t, &
    6032         1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6033              :       CALL dbcsr_create(m_tmp_oo_1, &
    6034              :                         template=m_siginv, &
    6035         1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6036              :       CALL dbcsr_create(m_tmp_oo_2, &
    6037              :                         template=m_siginv, &
    6038         1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6039              :       CALL dbcsr_create(tempNOcc1, &
    6040              :                         template=m_t, &
    6041         1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6042              :       CALL dbcsr_create(tempOccOcc1, &
    6043              :                         template=m_siginv, &
    6044         1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6045              :       CALL dbcsr_create(temp1, &
    6046              :                         template=m_t, &
    6047         1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6048              :       CALL dbcsr_create(temp2, &
    6049              :                         template=m_t, &
    6050         1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6051              : 
    6052              :       ! do d_E/d_T first
    6053              :       !IF (.NOT.PRESENT(m_FTsiginv)) THEN
    6054              :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6055              :       !           m_ks,&
    6056              :       !           m_t,&
    6057              :       !           0.0_dp,m_tmp_no_1,&
    6058              :       !           filter_eps=eps_filter)
    6059              :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6060              :       !           m_tmp_no_1,&
    6061              :       !           m_siginv,&
    6062              :       !           0.0_dp,m_FTsiginv,&
    6063              :       !           filter_eps=eps_filter)
    6064              :       !ENDIF
    6065              : 
    6066         1474 :       CALL dbcsr_copy(m_tmp_no_2, m_quench_t)
    6067         1474 :       CALL dbcsr_copy(m_tmp_no_2, m_FTsiginv, keep_sparsity=.TRUE.)
    6068              : 
    6069              :       !IF (.NOT.PRESENT(m_siginvTFTsiginv)) THEN
    6070              :       !   CALL dbcsr_multiply("T","N",1.0_dp,&
    6071              :       !           m_t,&
    6072              :       !           m_FTsiginv,&
    6073              :       !           0.0_dp,m_tmp_oo_1,&
    6074              :       !           filter_eps=eps_filter)
    6075              :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6076              :       !           m_siginv,&
    6077              :       !           m_tmp_oo_1,&
    6078              :       !           0.0_dp,m_siginvTFTsiginv,&
    6079              :       !           filter_eps=eps_filter)
    6080              :       !ENDIF
    6081              : 
    6082              :       !IF (.NOT.PRESENT(m_ST)) THEN
    6083              :       !   CALL dbcsr_multiply("N","N",1.0_dp,&
    6084              :       !           m_s,&
    6085              :       !           m_t,&
    6086              :       !           0.0_dp,m_ST,&
    6087              :       !           filter_eps=eps_filter)
    6088              :       !ENDIF
    6089              : 
    6090              :       CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6091              :                           m_ST, &
    6092              :                           m_siginvTFTsiginv, &
    6093              :                           1.0_dp, m_tmp_no_2, &
    6094         1474 :                           retain_sparsity=.TRUE.)
    6095         1474 :       CALL dbcsr_scale(m_tmp_no_2, 2.0_dp*spin_factor)
    6096              : 
    6097              :       ! LzL Add gradient for Localization
    6098         1474 :       IF (my_penalty_local) THEN
    6099              : 
    6100            0 :          CALL dbcsr_set(temp2, 0.0_dp) ! accumulate the localization gradient here
    6101              : 
    6102            0 :          DO idim0 = 1, SIZE(op_sm_set, 2) ! this loop is over miller ind
    6103              : 
    6104            0 :             DO reim = 1, SIZE(op_sm_set, 1) ! this loop is over Re/Im
    6105              : 
    6106              :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6107              :                                    op_sm_set(reim, idim0)%matrix, &
    6108              :                                    m_t, &
    6109              :                                    0.0_dp, tempNOcc1, &
    6110            0 :                                    filter_eps=eps_filter)
    6111              : 
    6112              :                ! warning - save time by computing only the diagonal elements
    6113              :                CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6114              :                                    m_t, &
    6115              :                                    tempNOcc1, &
    6116              :                                    0.0_dp, tempOccOcc1, &
    6117            0 :                                    filter_eps=eps_filter)
    6118              : 
    6119            0 :                CALL dbcsr_get_info(tempOccOcc1, nfullrows_total=dim0)
    6120            0 :                ALLOCATE (tg_diagonal(dim0))
    6121            0 :                CALL dbcsr_get_diag(tempOccOcc1, tg_diagonal)
    6122            0 :                CALL dbcsr_set(tempOccOcc1, 0.0_dp)
    6123            0 :                CALL dbcsr_set_diag(tempOccOcc1, tg_diagonal)
    6124            0 :                DEALLOCATE (tg_diagonal)
    6125              : 
    6126              :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6127              :                                    tempNOcc1, &
    6128              :                                    tempOccOcc1, &
    6129              :                                    0.0_dp, temp1, &
    6130            0 :                                    filter_eps=eps_filter)
    6131              : 
    6132              :             END DO
    6133              : 
    6134              :             SELECT CASE (2) ! allows for selection of different spread functionals
    6135              :             CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6136            0 :                CPABORT("Localization function is not implemented")
    6137              :                !coeff = -(weights(idim0)/z2(ielem))
    6138              :             CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6139            0 :                coeff = -weights(idim0)
    6140              :             CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6141              :                CPABORT("Localization function is not implemented")
    6142              :                !coeff = -(weights(idim0)/(2.0_dp*z2(ielem)))
    6143              :             END SELECT
    6144            0 :             CALL dbcsr_add(temp2, temp1, 1.0_dp, coeff)
    6145              :             !CALL dbcsr_add(grad_loc, temp1, 1.0_dp, 1.0_dp)
    6146              : 
    6147              :          END DO ! end loop over idim0
    6148            0 :          CALL dbcsr_add(m_tmp_no_2, temp2, my_energy_coeff, my_localiz_coeff*4.0_dp)
    6149              :       END IF
    6150              : 
    6151              :       ! add penalty on the occupied volume: det(sigma)
    6152         1474 :       IF (penalty_occ_vol) THEN
    6153              :          !RZK-warning CALL dbcsr_multiply("N","N",&
    6154              :          !RZK-warning         penalty_occ_vol_prefactor,&
    6155              :          !RZK-warning         m_ST,&
    6156              :          !RZK-warning         m_siginv,&
    6157              :          !RZK-warning         1.0_dp,m_tmp_no_2,&
    6158              :          !RZK-warning         retain_sparsity=.TRUE.,&
    6159              :          !RZK-warning         )
    6160            0 :          CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6161              :          CALL dbcsr_multiply("N", "N", &
    6162              :                              penalty_occ_vol_prefactor, &
    6163              :                              m_ST, &
    6164              :                              m_siginv, &
    6165              :                              0.0_dp, m_tmp_no_1, &
    6166            0 :                              retain_sparsity=.TRUE.)
    6167              :          ! this norm does not contain the normalization factors
    6168            0 :          penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_1)
    6169            0 :          energy_g_norm = dbcsr_maxabs(m_tmp_no_2)
    6170              :          !WRITE (*, "(A30,2F20.10)") "Energy/penalty g norms (no norm): ", energy_g_norm, penalty_occ_vol_g_norm
    6171            0 :          CALL dbcsr_add(m_tmp_no_2, m_tmp_no_1, 1.0_dp, 1.0_dp)
    6172              :       END IF
    6173              : 
    6174              :       ! take into account the factor from the normalization constraint
    6175         1474 :       IF (normalize_orbitals) THEN
    6176              : 
    6177              :          ! G = ( G - ST.[tr(T).G]_ii ) . [sig_sqrti]_ii
    6178              :          ! this expression can be simplified to
    6179              :          ! G = ( G - c0*ST ) . [sig_sqrti]_ii
    6180              :          ! where c0 = penalty_occ_vol_prefactor
    6181              :          ! This is because tr(T).G_Energy = 0 and
    6182              :          !                 tr(T).G_Penalty = c0*I
    6183              : 
    6184              :          !! faster way to take the norm into account (tested for vol penalty olny)
    6185              :          !!CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6186              :          !!CALL dbcsr_copy(m_tmp_no_1, m_ST, keep_sparsity=.TRUE.)
    6187              :          !!CALL dbcsr_add(m_tmp_no_2, m_tmp_no_1, 1.0_dp, -penalty_occ_vol_prefactor)
    6188              :          !!CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6189              :          !!CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6190              :          !!                    m_tmp_no_2, &
    6191              :          !!                    m_sig_sqrti_ii, &
    6192              :          !!                    0.0_dp, m_tmp_no_1, &
    6193              :          !!                    retain_sparsity=.TRUE.)
    6194              : 
    6195              :          ! slower way of taking the norm into account
    6196            0 :          CALL dbcsr_copy(m_tmp_no_1, m_quench_t)
    6197              :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6198              :                              m_tmp_no_2, &
    6199              :                              m_sig_sqrti_ii, &
    6200              :                              0.0_dp, m_tmp_no_1, &
    6201            0 :                              retain_sparsity=.TRUE.)
    6202              : 
    6203              :          ! get [tr(T).G]_ii
    6204            0 :          CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii)
    6205              :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6206              :                              m_t, &
    6207              :                              m_tmp_no_2, &
    6208              :                              0.0_dp, m_tmp_oo_1, &
    6209            0 :                              retain_sparsity=.TRUE.)
    6210              : 
    6211            0 :          CALL dbcsr_get_info(m_sig_sqrti_ii, nfullrows_total=dim0)
    6212            0 :          ALLOCATE (tg_diagonal(dim0))
    6213            0 :          CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    6214            0 :          CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    6215            0 :          CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    6216            0 :          DEALLOCATE (tg_diagonal)
    6217              : 
    6218              :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6219              :                              m_sig_sqrti_ii, &
    6220              :                              m_tmp_oo_1, &
    6221              :                              0.0_dp, m_tmp_oo_2, &
    6222            0 :                              filter_eps=eps_filter)
    6223              :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6224              :                              m_ST, &
    6225              :                              m_tmp_oo_2, &
    6226              :                              1.0_dp, m_tmp_no_1, &
    6227            0 :                              retain_sparsity=.TRUE.)
    6228              : 
    6229              :       ELSE
    6230              : 
    6231         1474 :          CALL dbcsr_copy(m_tmp_no_1, m_tmp_no_2)
    6232              : 
    6233              :       END IF ! normalize_orbitals
    6234              : 
    6235              :       ! project out the occupied space from the gradient
    6236         1474 :       IF (assume_t0_q0x) THEN
    6237          466 :          IF (special_case == xalmo_case_fully_deloc) THEN
    6238          160 :             CALL dbcsr_copy(m_grad_out, m_tmp_no_1)
    6239              :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6240              :                                 m_t0, &
    6241              :                                 m_grad_out, &
    6242              :                                 0.0_dp, m_tmp_oo_1, &
    6243          160 :                                 filter_eps=eps_filter)
    6244              :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6245              :                                 m_STsiginv0, &
    6246              :                                 m_tmp_oo_1, &
    6247              :                                 1.0_dp, m_grad_out, &
    6248          160 :                                 filter_eps=eps_filter)
    6249          306 :          ELSE IF (special_case == xalmo_case_block_diag) THEN
    6250            0 :             CPABORT("Cannot project the zero-order space from itself")
    6251              :          ELSE
    6252              :             ! no special case: normal xALMOs
    6253              :             CALL apply_domain_operators( &
    6254              :                matrix_in=m_tmp_no_1, &
    6255              :                matrix_out=m_grad_out, &
    6256              :                operator2=domain_r_down(:), &
    6257              :                operator1=domain_s_inv(:), &
    6258              :                dpattern=m_quench_t, &
    6259              :                map=domain_map, &
    6260              :                node_of_domain=cpu_of_domain, &
    6261              :                my_action=1, &
    6262              :                filter_eps=eps_filter, &
    6263              :                !matrix_trimmer=,&
    6264          306 :                use_trimmer=.FALSE.)
    6265              :          END IF ! my_special_case
    6266          466 :          CALL dbcsr_copy(m_tmp_no_1, m_grad_out)
    6267              :       END IF
    6268              : 
    6269              :       !! check whether the gradient lies entirely in R or Q
    6270              :       !CALL dbcsr_multiply("T","N",1.0_dp,&
    6271              :       !        m_t,&
    6272              :       !        m_tmp_no_1,&
    6273              :       !        0.0_dp,m_tmp_oo_1,&
    6274              :       !        filter_eps=eps_filter,&
    6275              :       !        )
    6276              :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    6277              :       !        m_siginv,&
    6278              :       !        m_tmp_oo_1,&
    6279              :       !        0.0_dp,m_tmp_oo_2,&
    6280              :       !        filter_eps=eps_filter,&
    6281              :       !        )
    6282              :       !CALL dbcsr_copy(m_tmp_no_2,m_tmp_no_1)
    6283              :       !CALL dbcsr_multiply("N","N",-1.0_dp,&
    6284              :       !        m_ST,&
    6285              :       !        m_tmp_oo_2,&
    6286              :       !        1.0_dp,m_tmp_no_2,&
    6287              :       !        retain_sparsity=.TRUE.,&
    6288              :       !        )
    6289              :       !penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_2)
    6290              :       !WRITE(*,"(A50,2F20.10)") "Virtual-space projection of the gradient", penalty_occ_vol_g_norm
    6291              :       !CALL dbcsr_add(m_tmp_no_2,m_tmp_no_1,1.0_dp,-1.0_dp)
    6292              :       !penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_2)
    6293              :       !WRITE(*,"(A50,2F20.10)") "Occupied-space projection of the gradient", penalty_occ_vol_g_norm
    6294              :       !penalty_occ_vol_g_norm = dbcsr_maxabs(m_tmp_no_1)
    6295              :       !WRITE(*,"(A50,2F20.10)") "Full gradient", penalty_occ_vol_g_norm
    6296              : 
    6297              :       ! transform d_E/d_T to d_E/d_theta
    6298         1474 :       IF (optimize_theta) THEN
    6299            0 :          CALL dbcsr_copy(m_tmp_no_2, m_theta)
    6300            0 :          CALL dtanh_of_elements(m_tmp_no_2, alpha=1.0_dp/envelope_amplitude)
    6301            0 :          CALL dbcsr_scale(m_tmp_no_2, envelope_amplitude)
    6302            0 :          CALL dbcsr_set(m_tmp_no_3, 0.0_dp)
    6303            0 :          CALL dbcsr_filter(m_tmp_no_3, eps_filter)
    6304              :          CALL dbcsr_hadamard_product(m_tmp_no_1, &
    6305              :                                      m_tmp_no_2, &
    6306            0 :                                      m_tmp_no_3)
    6307              :          CALL dbcsr_hadamard_product(m_tmp_no_3, &
    6308              :                                      m_quench_t, &
    6309            0 :                                      m_grad_out)
    6310              :       ELSE ! simply copy
    6311              :          CALL dbcsr_hadamard_product(m_tmp_no_1, &
    6312              :                                      m_quench_t, &
    6313         1474 :                                      m_grad_out)
    6314              :       END IF
    6315         1474 :       CALL dbcsr_filter(m_grad_out, eps_filter)
    6316              : 
    6317         1474 :       CALL dbcsr_release(m_tmp_no_1)
    6318         1474 :       CALL dbcsr_release(m_tmp_no_2)
    6319         1474 :       CALL dbcsr_release(m_tmp_no_3)
    6320         1474 :       CALL dbcsr_release(m_tmp_oo_1)
    6321         1474 :       CALL dbcsr_release(m_tmp_oo_2)
    6322         1474 :       CALL dbcsr_release(tempNOcc1)
    6323         1474 :       CALL dbcsr_release(tempOccOcc1)
    6324         1474 :       CALL dbcsr_release(temp1)
    6325         1474 :       CALL dbcsr_release(temp2)
    6326              : 
    6327         1474 :       CALL timestop(handle)
    6328              : 
    6329         2948 :    END SUBROUTINE compute_gradient
    6330              : 
    6331              : ! **************************************************************************************************
    6332              : !> \brief Serial code that prints matrices readable by Mathematica
    6333              : !> \param matrix - matrix to print
    6334              : !> \param filename ...
    6335              : !> \par History
    6336              : !>       2015.05 created [Rustam Z. Khaliullin]
    6337              : !> \author Rustam Z. Khaliullin
    6338              : ! **************************************************************************************************
    6339            0 :    SUBROUTINE print_mathematica_matrix(matrix, filename)
    6340              : 
    6341              :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix
    6342              :       CHARACTER(len=*), INTENT(IN)                       :: filename
    6343              : 
    6344              :       CHARACTER(len=*), PARAMETER :: routineN = 'print_mathematica_matrix'
    6345              : 
    6346              :       CHARACTER(LEN=20)                                  :: formatstr, Scols
    6347              :       INTEGER                                            :: col, fiunit, handle, hori_offset, jj, &
    6348              :                                                             nblkcols_tot, nblkrows_tot, Ncols, &
    6349              :                                                             ncores, Nrows, row, unit_nr, &
    6350              :                                                             vert_offset
    6351            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ao_block_sizes, mo_block_sizes
    6352            0 :       INTEGER, DIMENSION(:), POINTER                     :: ao_blk_sizes, mo_blk_sizes
    6353              :       LOGICAL                                            :: found
    6354            0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: H
    6355            0 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
    6356              :       TYPE(cp_logger_type), POINTER                      :: logger
    6357              :       TYPE(dbcsr_distribution_type)                      :: dist
    6358              :       TYPE(dbcsr_type)                                   :: matrix_asym
    6359              : 
    6360            0 :       CALL timeset(routineN, handle)
    6361              : 
    6362              :       ! get a useful output_unit
    6363            0 :       logger => cp_get_default_logger()
    6364            0 :       IF (logger%para_env%is_source()) THEN
    6365            0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    6366              :       ELSE
    6367              :          unit_nr = -1
    6368              :       END IF
    6369              : 
    6370              :       ! serial code only
    6371            0 :       CALL dbcsr_get_info(matrix, distribution=dist)
    6372            0 :       CALL dbcsr_distribution_get(dist, numnodes=ncores)
    6373            0 :       IF (ncores > 1) THEN
    6374            0 :          CPABORT("mathematica files: serial code only")
    6375              :       END IF
    6376              : 
    6377              :       CALL dbcsr_get_info(matrix, row_blk_size=ao_blk_sizes, col_blk_size=mo_blk_sizes, &
    6378            0 :                           nblkrows_total=nblkrows_tot, nblkcols_total=nblkcols_tot)
    6379            0 :       CPASSERT(nblkrows_tot == nblkcols_tot)
    6380            0 :       ALLOCATE (mo_block_sizes(nblkcols_tot), ao_block_sizes(nblkcols_tot))
    6381            0 :       mo_block_sizes(:) = mo_blk_sizes(:)
    6382            0 :       ao_block_sizes(:) = ao_blk_sizes(:)
    6383              : 
    6384              :       CALL dbcsr_create(matrix_asym, &
    6385              :                         template=matrix, &
    6386            0 :                         matrix_type=dbcsr_type_no_symmetry)
    6387            0 :       CALL dbcsr_desymmetrize(matrix, matrix_asym)
    6388              : 
    6389            0 :       Ncols = SUM(mo_block_sizes)
    6390            0 :       Nrows = SUM(ao_block_sizes)
    6391            0 :       ALLOCATE (H(Nrows, Ncols))
    6392            0 :       H(:, :) = 0.0_dp
    6393              : 
    6394            0 :       hori_offset = 0
    6395            0 :       DO col = 1, nblkcols_tot
    6396              : 
    6397            0 :          vert_offset = 0
    6398            0 :          DO row = 1, nblkrows_tot
    6399              : 
    6400            0 :             CALL dbcsr_get_block_p(matrix_asym, row, col, block_p, found)
    6401            0 :             IF (found) THEN
    6402              : 
    6403              :                H(vert_offset + 1:vert_offset + ao_block_sizes(row), &
    6404              :                  hori_offset + 1:hori_offset + mo_block_sizes(col)) &
    6405            0 :                   = block_p(:, :)
    6406              : 
    6407              :             END IF
    6408              : 
    6409            0 :             vert_offset = vert_offset + ao_block_sizes(row)
    6410              : 
    6411              :          END DO
    6412              : 
    6413            0 :          hori_offset = hori_offset + mo_block_sizes(col)
    6414              : 
    6415              :       END DO ! loop over electron blocks
    6416              : 
    6417            0 :       CALL dbcsr_release(matrix_asym)
    6418              : 
    6419            0 :       IF (unit_nr > 0) THEN
    6420            0 :          CALL open_file(filename, unit_number=fiunit, file_status='REPLACE')
    6421            0 :          WRITE (Scols, "(I10)") Ncols
    6422            0 :          formatstr = "("//TRIM(Scols)//"E27.17)"
    6423            0 :          DO jj = 1, Nrows
    6424            0 :             WRITE (fiunit, formatstr) H(jj, :)
    6425              :          END DO
    6426            0 :          CALL close_file(fiunit)
    6427              :       END IF
    6428              : 
    6429            0 :       DEALLOCATE (mo_block_sizes)
    6430            0 :       DEALLOCATE (ao_block_sizes)
    6431            0 :       DEALLOCATE (H)
    6432              : 
    6433            0 :       CALL timestop(handle)
    6434              : 
    6435            0 :    END SUBROUTINE print_mathematica_matrix
    6436              : 
    6437              : ! **************************************************************************************************
    6438              : !> \brief Compute the objective functional of NLMOs
    6439              : !> \param localization_obj_function_ispin ...
    6440              : !> \param penalty_func_ispin ...
    6441              : !> \param penalty_vol_prefactor ...
    6442              : !> \param overlap_determinant ...
    6443              : !> \param m_sigma ...
    6444              : !> \param nocc ...
    6445              : !> \param m_B0 ...
    6446              : !> \param m_theta_normalized ...
    6447              : !> \param template_matrix_mo ...
    6448              : !> \param weights ...
    6449              : !> \param m_S0 ...
    6450              : !> \param just_started ...
    6451              : !> \param penalty_amplitude ...
    6452              : !> \param eps_filter ...
    6453              : !> \par History
    6454              : !>       2020.01 created [Ziling Luo]
    6455              : !> \author Ziling Luo
    6456              : ! **************************************************************************************************
    6457           82 :    SUBROUTINE compute_obj_nlmos(localization_obj_function_ispin, penalty_func_ispin, &
    6458           82 :                                 penalty_vol_prefactor, overlap_determinant, m_sigma, nocc, m_B0, &
    6459           82 :                                 m_theta_normalized, template_matrix_mo, weights, m_S0, just_started, &
    6460              :                                 penalty_amplitude, eps_filter)
    6461              : 
    6462              :       REAL(KIND=dp), INTENT(INOUT) :: localization_obj_function_ispin, penalty_func_ispin, &
    6463              :          penalty_vol_prefactor, overlap_determinant
    6464              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_sigma
    6465              :       INTEGER, INTENT(IN)                                :: nocc
    6466              :       TYPE(dbcsr_type), DIMENSION(:, :), INTENT(IN)      :: m_B0
    6467              :       TYPE(dbcsr_type), INTENT(IN)                       :: m_theta_normalized, template_matrix_mo
    6468              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: weights
    6469              :       TYPE(dbcsr_type), INTENT(IN)                       :: m_S0
    6470              :       LOGICAL, INTENT(IN)                                :: just_started
    6471              :       REAL(KIND=dp), INTENT(IN)                          :: penalty_amplitude, eps_filter
    6472              : 
    6473              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_obj_nlmos'
    6474              : 
    6475              :       INTEGER                                            :: handle, idim0, ielem, reim
    6476              :       REAL(KIND=dp)                                      :: det1, fval
    6477           82 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: reim_diag, z2
    6478              :       TYPE(dbcsr_type)                                   :: tempNOcc1, tempOccOcc1, tempOccOcc2
    6479              :       TYPE(mp_comm_type)                                 :: group
    6480              : 
    6481           82 :       CALL timeset(routineN, handle)
    6482              : 
    6483              :       CALL dbcsr_create(tempNOcc1, &
    6484              :                         template=template_matrix_mo, &
    6485           82 :                         matrix_type=dbcsr_type_no_symmetry)
    6486              :       CALL dbcsr_create(tempOccOcc1, &
    6487              :                         template=m_theta_normalized, &
    6488           82 :                         matrix_type=dbcsr_type_no_symmetry)
    6489              :       CALL dbcsr_create(tempOccOcc2, &
    6490              :                         template=m_theta_normalized, &
    6491           82 :                         matrix_type=dbcsr_type_no_symmetry)
    6492              : 
    6493           82 :       localization_obj_function_ispin = 0.0_dp
    6494           82 :       penalty_func_ispin = 0.0_dp
    6495          246 :       ALLOCATE (z2(nocc))
    6496          164 :       ALLOCATE (reim_diag(nocc))
    6497              : 
    6498           82 :       CALL dbcsr_get_info(tempOccOcc2, group=group)
    6499              : 
    6500          842 :       DO idim0 = 1, SIZE(m_B0, 2) ! this loop is over miller ind
    6501              : 
    6502        12608 :          z2(:) = 0.0_dp
    6503              : 
    6504         1520 :          DO reim = 1, SIZE(m_B0, 1) ! this loop is over Re/Im
    6505              : 
    6506              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6507              :                                 m_B0(reim, idim0), &
    6508              :                                 m_theta_normalized, &
    6509              :                                 0.0_dp, tempOccOcc1, &
    6510          760 :                                 filter_eps=eps_filter)
    6511          760 :             CALL dbcsr_set(tempOccOcc2, 0.0_dp)
    6512          760 :             CALL dbcsr_add_on_diag(tempOccOcc2, 1.0_dp)
    6513              :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6514              :                                 m_theta_normalized, &
    6515              :                                 tempOccOcc1, &
    6516              :                                 0.0_dp, tempOccOcc2, &
    6517          760 :                                 retain_sparsity=.TRUE.)
    6518              : 
    6519        12608 :             reim_diag = 0.0_dp
    6520          760 :             CALL dbcsr_get_diag(tempOccOcc2, reim_diag)
    6521          760 :             CALL group%sum(reim_diag)
    6522        13368 :             z2(:) = z2(:) + reim_diag(:)*reim_diag(:)
    6523              : 
    6524              :          END DO
    6525              : 
    6526        12690 :          DO ielem = 1, nocc
    6527              :             SELECT CASE (2) ! allows for selection of different spread functionals
    6528              :             CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6529        11848 :                fval = -weights(idim0)*LOG(ABS(z2(ielem)))
    6530              :             CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6531        11848 :                fval = weights(idim0) - weights(idim0)*ABS(z2(ielem))
    6532              :             CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6533              :                fval = weights(idim0) - weights(idim0)*SQRT(ABS(z2(ielem)))
    6534              :             END SELECT
    6535        12608 :             localization_obj_function_ispin = localization_obj_function_ispin + fval
    6536              :          END DO
    6537              : 
    6538              :       END DO ! end loop over idim0
    6539              : 
    6540           82 :       DEALLOCATE (z2)
    6541           82 :       DEALLOCATE (reim_diag)
    6542              : 
    6543              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6544              :                           m_S0, &
    6545              :                           m_theta_normalized, &
    6546              :                           0.0_dp, tempOccOcc1, &
    6547           82 :                           filter_eps=eps_filter)
    6548              :       ! compute current sigma
    6549              :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6550              :                           m_theta_normalized, &
    6551              :                           tempOccOcc1, &
    6552              :                           0.0_dp, m_sigma, &
    6553           82 :                           filter_eps=eps_filter)
    6554              : 
    6555              :       CALL determinant(m_sigma, det1, &
    6556           82 :                        eps_filter)
    6557              :       ! save the current determinant
    6558           82 :       overlap_determinant = det1
    6559              : 
    6560           82 :       IF (just_started .AND. penalty_amplitude < 0.0_dp) THEN
    6561            4 :          penalty_vol_prefactor = -(-penalty_amplitude)*localization_obj_function_ispin
    6562              :       END IF
    6563           82 :       penalty_func_ispin = penalty_func_ispin + penalty_vol_prefactor*LOG(det1)
    6564              : 
    6565           82 :       CALL dbcsr_release(tempNOcc1)
    6566           82 :       CALL dbcsr_release(tempOccOcc1)
    6567           82 :       CALL dbcsr_release(tempOccOcc2)
    6568              : 
    6569           82 :       CALL timestop(handle)
    6570              : 
    6571          164 :    END SUBROUTINE compute_obj_nlmos
    6572              : 
    6573              : ! **************************************************************************************************
    6574              : !> \brief Compute the gradient wrt the main variable
    6575              : !> \param m_grad_out ...
    6576              : !> \param m_B0 ...
    6577              : !> \param weights ...
    6578              : !> \param m_S0 ...
    6579              : !> \param m_theta_normalized ...
    6580              : !> \param m_siginv ...
    6581              : !> \param m_sig_sqrti_ii ...
    6582              : !> \param penalty_vol_prefactor ...
    6583              : !> \param eps_filter ...
    6584              : !> \param suggested_vol_penalty ...
    6585              : !> \par History
    6586              : !>       2018.10 created [Ziling Luo]
    6587              : !> \author Ziling Luo
    6588              : ! **************************************************************************************************
    6589           82 :    SUBROUTINE compute_gradient_nlmos(m_grad_out, m_B0, weights, &
    6590              :                                      m_S0, m_theta_normalized, m_siginv, m_sig_sqrti_ii, &
    6591              :                                      penalty_vol_prefactor, eps_filter, suggested_vol_penalty)
    6592              : 
    6593              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_grad_out
    6594              :       TYPE(dbcsr_type), DIMENSION(:, :), INTENT(IN)      :: m_B0
    6595              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: weights
    6596              :       TYPE(dbcsr_type), INTENT(IN)                       :: m_S0, m_theta_normalized, m_siginv, &
    6597              :                                                             m_sig_sqrti_ii
    6598              :       REAL(KIND=dp), INTENT(IN)                          :: penalty_vol_prefactor, eps_filter
    6599              :       REAL(KIND=dp), INTENT(INOUT)                       :: suggested_vol_penalty
    6600              : 
    6601              :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_gradient_nlmos'
    6602              : 
    6603              :       INTEGER                                            :: dim0, handle, idim0, reim
    6604              :       REAL(KIND=dp)                                      :: norm_loc, norm_vol
    6605              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal, z2
    6606              :       TYPE(dbcsr_type)                                   :: m_temp_oo_1, m_temp_oo_2, m_temp_oo_3, &
    6607              :                                                             m_temp_oo_4
    6608              : 
    6609           82 :       CALL timeset(routineN, handle)
    6610              : 
    6611              :       CALL dbcsr_create(m_temp_oo_1, &
    6612              :                         template=m_theta_normalized, &
    6613           82 :                         matrix_type=dbcsr_type_no_symmetry)
    6614              :       CALL dbcsr_create(m_temp_oo_2, &
    6615              :                         template=m_theta_normalized, &
    6616           82 :                         matrix_type=dbcsr_type_no_symmetry)
    6617              :       CALL dbcsr_create(m_temp_oo_3, &
    6618              :                         template=m_theta_normalized, &
    6619           82 :                         matrix_type=dbcsr_type_no_symmetry)
    6620              :       CALL dbcsr_create(m_temp_oo_4, &
    6621              :                         template=m_theta_normalized, &
    6622           82 :                         matrix_type=dbcsr_type_no_symmetry)
    6623              : 
    6624           82 :       CALL dbcsr_get_info(m_siginv, nfullrows_total=dim0)
    6625          246 :       ALLOCATE (tg_diagonal(dim0))
    6626          164 :       ALLOCATE (z2(dim0))
    6627           82 :       CALL dbcsr_set(m_temp_oo_1, 0.0_dp) ! accumulate the gradient wrt a_norm here
    6628              : 
    6629              :       ! do d_Omega/d_a_normalized first
    6630          842 :       DO idim0 = 1, SIZE(m_B0, 2) ! this loop is over miller ind
    6631              : 
    6632        12608 :          z2(:) = 0.0_dp
    6633          760 :          CALL dbcsr_set(m_temp_oo_2, 0.0_dp) ! accumulate index gradient here
    6634         1520 :          DO reim = 1, SIZE(m_B0, 1) ! this loop is over Re/Im
    6635              : 
    6636              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6637              :                                 m_B0(reim, idim0), &
    6638              :                                 m_theta_normalized, &
    6639              :                                 0.0_dp, m_temp_oo_3, &
    6640          760 :                                 filter_eps=eps_filter)
    6641              : 
    6642              :             ! result contain Re/Im part of Z for the current Miller index
    6643              :             ! warning - save time by computing only the diagonal elements
    6644              :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6645              :                                 m_theta_normalized, &
    6646              :                                 m_temp_oo_3, &
    6647              :                                 0.0_dp, m_temp_oo_4, &
    6648          760 :                                 filter_eps=eps_filter)
    6649              : 
    6650        12608 :             tg_diagonal(:) = 0.0_dp
    6651          760 :             CALL dbcsr_get_diag(m_temp_oo_4, tg_diagonal)
    6652          760 :             CALL dbcsr_set(m_temp_oo_4, 0.0_dp)
    6653          760 :             CALL dbcsr_set_diag(m_temp_oo_4, tg_diagonal)
    6654              :             !CALL para_group%sum(tg_diagonal)
    6655        12608 :             z2(:) = z2(:) + tg_diagonal(:)*tg_diagonal(:)
    6656              : 
    6657              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6658              :                                 m_temp_oo_3, &
    6659              :                                 m_temp_oo_4, &
    6660              :                                 1.0_dp, m_temp_oo_2, &
    6661         1520 :                                 filter_eps=eps_filter)
    6662              : 
    6663              :          END DO
    6664              : 
    6665              :          ! TODO: because some elements are zeros on some MPI tasks the
    6666              :          ! gradient evaluation will fail for CASE 1 and 3
    6667              :          SELECT CASE (2) ! allows for selection of different spread functionals
    6668              :          CASE (1) ! functional =  -W_I * log( |z_I|^2 )
    6669              :             z2(:) = -weights(idim0)/z2(:)
    6670              :          CASE (2) ! functional =  W_I * ( 1 - |z_I|^2 )
    6671        12608 :             z2(:) = -weights(idim0)
    6672              :          CASE (3) ! functional =  W_I * ( 1 - |z_I| )
    6673              :             z2(:) = -weights(idim0)/(2*SQRT(z2(:)))
    6674              :          END SELECT
    6675          760 :          CALL dbcsr_set(m_temp_oo_3, 0.0_dp)
    6676          760 :          CALL dbcsr_set_diag(m_temp_oo_3, z2)
    6677              :          ! TODO: print this matrix to make sure its block structure is fine
    6678              :          ! and there are no unecessary elements
    6679              : 
    6680              :          CALL dbcsr_multiply("N", "N", 4.0_dp, &
    6681              :                              m_temp_oo_2, &
    6682              :                              m_temp_oo_3, &
    6683              :                              1.0_dp, m_temp_oo_1, &
    6684          842 :                              filter_eps=eps_filter)
    6685              : 
    6686              :       END DO ! end loop over idim0
    6687           82 :       DEALLOCATE (z2)
    6688              : 
    6689              :       ! sigma0.a_norm is necessary for the volume penalty and normalization
    6690              :       CALL dbcsr_multiply("N", "N", &
    6691              :                           1.0_dp, &
    6692              :                           m_S0, &
    6693              :                           m_theta_normalized, &
    6694              :                           0.0_dp, m_temp_oo_2, &
    6695           82 :                           filter_eps=eps_filter)
    6696              : 
    6697              :       ! add gradient of the penalty functional log[det(sigma)]
    6698              :       ! G = 2*prefactor*sigma0.a_norm.sigma_inv
    6699              :       CALL dbcsr_multiply("N", "N", &
    6700              :                           1.0_dp, &
    6701              :                           m_temp_oo_2, &
    6702              :                           m_siginv, &
    6703              :                           0.0_dp, m_temp_oo_3, &
    6704           82 :                           filter_eps=eps_filter)
    6705           82 :       norm_vol = dbcsr_maxabs(m_temp_oo_3)
    6706           82 :       norm_loc = dbcsr_maxabs(m_temp_oo_1)
    6707           82 :       suggested_vol_penalty = norm_loc/norm_vol
    6708              :       CALL dbcsr_add(m_temp_oo_1, m_temp_oo_3, &
    6709           82 :                      1.0_dp, 2.0_dp*penalty_vol_prefactor)
    6710              : 
    6711              :       ! take into account the factor from the normalization constraint
    6712              :       ! G = ( G - sigma0.a_norm.[tr(a_norm).G]_ii ) . [sig_sqrti]_ii
    6713              :       ! 1. get G.[sig_sqrti]_ii
    6714              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6715              :                           m_temp_oo_1, &
    6716              :                           m_sig_sqrti_ii, &
    6717              :                           0.0_dp, m_grad_out, &
    6718           82 :                           filter_eps=eps_filter)
    6719              : 
    6720              :       ! 2. get [tr(a_norm).G]_ii
    6721              :       ! it is possible to save time by computing only the diagonal elements
    6722              :       CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6723              :                           m_theta_normalized, &
    6724              :                           m_temp_oo_1, &
    6725              :                           0.0_dp, m_temp_oo_3, &
    6726           82 :                           filter_eps=eps_filter)
    6727           82 :       CALL dbcsr_get_diag(m_temp_oo_3, tg_diagonal)
    6728           82 :       CALL dbcsr_set(m_temp_oo_3, 0.0_dp)
    6729           82 :       CALL dbcsr_set_diag(m_temp_oo_3, tg_diagonal)
    6730              : 
    6731              :       ! 3. [X]_ii . [sig_sqrti]_ii
    6732              :       ! it is possible to save time by computing only the diagonal elements
    6733              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    6734              :                           m_sig_sqrti_ii, &
    6735              :                           m_temp_oo_3, &
    6736              :                           0.0_dp, m_temp_oo_1, &
    6737           82 :                           filter_eps=eps_filter)
    6738              :       ! 4. (sigma0*a_norm) .[X]_ii
    6739              :       CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6740              :                           m_temp_oo_2, &
    6741              :                           m_temp_oo_1, &
    6742              :                           1.0_dp, m_grad_out, &
    6743           82 :                           filter_eps=eps_filter)
    6744              : 
    6745           82 :       DEALLOCATE (tg_diagonal)
    6746           82 :       CALL dbcsr_release(m_temp_oo_1)
    6747           82 :       CALL dbcsr_release(m_temp_oo_2)
    6748           82 :       CALL dbcsr_release(m_temp_oo_3)
    6749           82 :       CALL dbcsr_release(m_temp_oo_4)
    6750              : 
    6751           82 :       CALL timestop(handle)
    6752              : 
    6753          164 :    END SUBROUTINE compute_gradient_nlmos
    6754              : 
    6755              : ! **************************************************************************************************
    6756              : !> \brief Compute MO coeffs from the main optimized variable (e.g. Theta, X)
    6757              : !> \param m_var_in ...
    6758              : !> \param m_t_out ...
    6759              : !> \param m_quench_t ...
    6760              : !> \param m_t0 ...
    6761              : !> \param m_oo_template ...
    6762              : !> \param m_STsiginv0 ...
    6763              : !> \param m_s ...
    6764              : !> \param m_sig_sqrti_ii_out ...
    6765              : !> \param domain_r_down ...
    6766              : !> \param domain_s_inv ...
    6767              : !> \param domain_map ...
    6768              : !> \param cpu_of_domain ...
    6769              : !> \param assume_t0_q0x ...
    6770              : !> \param just_started ...
    6771              : !> \param optimize_theta ...
    6772              : !> \param normalize_orbitals ...
    6773              : !> \param envelope_amplitude ...
    6774              : !> \param eps_filter ...
    6775              : !> \param special_case ...
    6776              : !> \param nocc_of_domain ...
    6777              : !> \param order_lanczos ...
    6778              : !> \param eps_lanczos ...
    6779              : !> \param max_iter_lanczos ...
    6780              : !> \par History
    6781              : !>       2015.03 created [Rustam Z Khaliullin]
    6782              : !> \author Rustam Z Khaliullin
    6783              : ! **************************************************************************************************
    6784         2948 :    SUBROUTINE compute_xalmos_from_main_var(m_var_in, m_t_out, m_quench_t, &
    6785         1474 :                                            m_t0, m_oo_template, m_STsiginv0, m_s, m_sig_sqrti_ii_out, domain_r_down, &
    6786         1474 :                                            domain_s_inv, domain_map, cpu_of_domain, assume_t0_q0x, just_started, &
    6787              :                                            optimize_theta, normalize_orbitals, envelope_amplitude, eps_filter, &
    6788         1474 :                                            special_case, nocc_of_domain, order_lanczos, eps_lanczos, max_iter_lanczos)
    6789              : 
    6790              :       TYPE(dbcsr_type), INTENT(IN)                       :: m_var_in
    6791              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_t_out, m_quench_t, m_t0, &
    6792              :                                                             m_oo_template, m_STsiginv0, m_s, &
    6793              :                                                             m_sig_sqrti_ii_out
    6794              :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6795              :          INTENT(IN)                                      :: domain_r_down, domain_s_inv
    6796              :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    6797              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    6798              :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, just_started, &
    6799              :                                                             optimize_theta, normalize_orbitals
    6800              :       REAL(KIND=dp), INTENT(IN)                          :: envelope_amplitude, eps_filter
    6801              :       INTEGER, INTENT(IN)                                :: special_case
    6802              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: nocc_of_domain
    6803              :       INTEGER, INTENT(IN)                                :: order_lanczos
    6804              :       REAL(KIND=dp), INTENT(IN)                          :: eps_lanczos
    6805              :       INTEGER, INTENT(IN)                                :: max_iter_lanczos
    6806              : 
    6807              :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_xalmos_from_main_var'
    6808              : 
    6809              :       INTEGER                                            :: handle, unit_nr
    6810              :       REAL(KIND=dp)                                      :: t_norm
    6811              :       TYPE(cp_logger_type), POINTER                      :: logger
    6812              :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_oo_1
    6813              : 
    6814         1474 :       CALL timeset(routineN, handle)
    6815              : 
    6816              :       ! get a useful output_unit
    6817         1474 :       logger => cp_get_default_logger()
    6818         1474 :       IF (logger%para_env%is_source()) THEN
    6819          737 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    6820              :       ELSE
    6821              :          unit_nr = -1
    6822              :       END IF
    6823              : 
    6824              :       CALL dbcsr_create(m_tmp_no_1, &
    6825              :                         template=m_quench_t, &
    6826         1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6827              :       CALL dbcsr_create(m_tmp_oo_1, &
    6828              :                         template=m_oo_template, &
    6829         1474 :                         matrix_type=dbcsr_type_no_symmetry)
    6830              : 
    6831         1474 :       CALL dbcsr_copy(m_tmp_no_1, m_var_in)
    6832         1474 :       IF (optimize_theta) THEN
    6833              :          ! check that all MO coefficients of the guess are less
    6834              :          ! than the maximum allowed amplitude
    6835            0 :          t_norm = dbcsr_maxabs(m_tmp_no_1)
    6836            0 :          IF (unit_nr > 0) THEN
    6837            0 :             WRITE (unit_nr, *) "Maximum norm of the initial guess: ", t_norm
    6838            0 :             WRITE (unit_nr, *) "Maximum allowed amplitude: ", &
    6839            0 :                envelope_amplitude
    6840              :          END IF
    6841            0 :          IF (t_norm > envelope_amplitude .AND. just_started) THEN
    6842            0 :             CPABORT("Max norm of the initial guess is too large")
    6843              :          END IF
    6844              :          ! use artanh to tame MOs
    6845            0 :          CALL tanh_of_elements(m_tmp_no_1, alpha=1.0_dp/envelope_amplitude)
    6846            0 :          CALL dbcsr_scale(m_tmp_no_1, envelope_amplitude)
    6847              :       END IF
    6848              :       CALL dbcsr_hadamard_product(m_tmp_no_1, m_quench_t, &
    6849         1474 :                                   m_t_out)
    6850              : 
    6851              :       ! project out R_0
    6852         1474 :       IF (assume_t0_q0x) THEN
    6853          466 :          IF (special_case == xalmo_case_fully_deloc) THEN
    6854              :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    6855              :                                 m_STsiginv0, &
    6856              :                                 m_t_out, &
    6857              :                                 0.0_dp, m_tmp_oo_1, &
    6858          160 :                                 filter_eps=eps_filter)
    6859              :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    6860              :                                 m_t0, &
    6861              :                                 m_tmp_oo_1, &
    6862              :                                 1.0_dp, m_t_out, &
    6863          160 :                                 filter_eps=eps_filter)
    6864          306 :          ELSE IF (special_case == xalmo_case_block_diag) THEN
    6865            0 :             CPABORT("cannot use projector with block-daigonal ALMOs")
    6866              :          ELSE
    6867              :             ! no special case
    6868              :             CALL apply_domain_operators( &
    6869              :                matrix_in=m_t_out, &
    6870              :                matrix_out=m_tmp_no_1, &
    6871              :                operator1=domain_r_down, &
    6872              :                operator2=domain_s_inv, &
    6873              :                dpattern=m_quench_t, &
    6874              :                map=domain_map, &
    6875              :                node_of_domain=cpu_of_domain, &
    6876              :                my_action=1, &
    6877              :                filter_eps=eps_filter, &
    6878          306 :                use_trimmer=.FALSE.)
    6879              :             CALL dbcsr_copy(m_t_out, &
    6880          306 :                             m_tmp_no_1)
    6881              :          END IF ! special case
    6882              :          CALL dbcsr_add(m_t_out, &
    6883          466 :                         m_t0, 1.0_dp, 1.0_dp)
    6884              :       END IF
    6885              : 
    6886         1474 :       IF (normalize_orbitals) THEN
    6887              :          CALL orthogonalize_mos( &
    6888              :             ket=m_t_out, &
    6889              :             overlap=m_tmp_oo_1, &
    6890              :             metric=m_s, &
    6891              :             retain_locality=.TRUE., &
    6892              :             only_normalize=.TRUE., &
    6893              :             nocc_of_domain=nocc_of_domain(:), &
    6894              :             eps_filter=eps_filter, &
    6895              :             order_lanczos=order_lanczos, &
    6896              :             eps_lanczos=eps_lanczos, &
    6897              :             max_iter_lanczos=max_iter_lanczos, &
    6898            0 :             overlap_sqrti=m_sig_sqrti_ii_out)
    6899              :       END IF
    6900              : 
    6901         1474 :       CALL dbcsr_filter(m_t_out, eps_filter)
    6902              : 
    6903         1474 :       CALL dbcsr_release(m_tmp_no_1)
    6904         1474 :       CALL dbcsr_release(m_tmp_oo_1)
    6905              : 
    6906         1474 :       CALL timestop(handle)
    6907              : 
    6908         1474 :    END SUBROUTINE compute_xalmos_from_main_var
    6909              : 
    6910              : ! **************************************************************************************************
    6911              : !> \brief Compute the preconditioner matrices and invert them if necessary
    6912              : !> \param domain_prec_out ...
    6913              : !> \param m_prec_out ...
    6914              : !> \param m_ks ...
    6915              : !> \param m_s ...
    6916              : !> \param m_siginv ...
    6917              : !> \param m_quench_t ...
    6918              : !> \param m_FTsiginv ...
    6919              : !> \param m_siginvTFTsiginv ...
    6920              : !> \param m_ST ...
    6921              : !> \param m_STsiginv_out ...
    6922              : !> \param m_s_vv_out ...
    6923              : !> \param m_f_vv_out ...
    6924              : !> \param para_env ...
    6925              : !> \param blacs_env ...
    6926              : !> \param nocc_of_domain ...
    6927              : !> \param domain_s_inv ...
    6928              : !> \param domain_s_inv_half ...
    6929              : !> \param domain_s_half ...
    6930              : !> \param domain_r_down ...
    6931              : !> \param cpu_of_domain ...
    6932              : !> \param domain_map ...
    6933              : !> \param assume_t0_q0x ...
    6934              : !> \param penalty_occ_vol ...
    6935              : !> \param penalty_occ_vol_prefactor ...
    6936              : !> \param eps_filter ...
    6937              : !> \param neg_thr ...
    6938              : !> \param spin_factor ...
    6939              : !> \param special_case ...
    6940              : !> \param bad_modes_projector_down_out ...
    6941              : !> \param skip_inversion ...
    6942              : !> \par History
    6943              : !>       2015.03 created [Rustam Z Khaliullin]
    6944              : !> \author Rustam Z Khaliullin
    6945              : ! **************************************************************************************************
    6946         1500 :    SUBROUTINE compute_preconditioner(domain_prec_out, m_prec_out, m_ks, m_s, &
    6947              :                                      m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, &
    6948              :                                      m_STsiginv_out, m_s_vv_out, m_f_vv_out, para_env, &
    6949         1000 :                                      blacs_env, nocc_of_domain, domain_s_inv, domain_s_inv_half, domain_s_half, &
    6950          500 :                                      domain_r_down, cpu_of_domain, &
    6951              :                                      domain_map, assume_t0_q0x, penalty_occ_vol, penalty_occ_vol_prefactor, &
    6952          500 :                                      eps_filter, neg_thr, spin_factor, special_case, bad_modes_projector_down_out, &
    6953              :                                      skip_inversion)
    6954              : 
    6955              :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6956              :          INTENT(INOUT)                                   :: domain_prec_out
    6957              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: m_prec_out, m_ks, m_s
    6958              :       TYPE(dbcsr_type), INTENT(IN)                       :: m_siginv, m_quench_t, m_FTsiginv, &
    6959              :                                                             m_siginvTFTsiginv, m_ST
    6960              :       TYPE(dbcsr_type), INTENT(INOUT), OPTIONAL          :: m_STsiginv_out, m_s_vv_out, m_f_vv_out
    6961              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    6962              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    6963              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: nocc_of_domain
    6964              :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6965              :          INTENT(IN)                                      :: domain_s_inv
    6966              :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6967              :          INTENT(IN), OPTIONAL                            :: domain_s_inv_half, domain_s_half
    6968              :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6969              :          INTENT(IN)                                      :: domain_r_down
    6970              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    6971              :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
    6972              :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, penalty_occ_vol
    6973              :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, eps_filter, &
    6974              :                                                             neg_thr, spin_factor
    6975              :       INTEGER, INTENT(IN)                                :: special_case
    6976              :       TYPE(domain_submatrix_type), DIMENSION(:), &
    6977              :          INTENT(INOUT), OPTIONAL                         :: bad_modes_projector_down_out
    6978              :       LOGICAL, INTENT(IN)                                :: skip_inversion
    6979              : 
    6980              :       CHARACTER(len=*), PARAMETER :: routineN = 'compute_preconditioner'
    6981              : 
    6982              :       INTEGER                                            :: handle, ndim, precond_domain_projector
    6983          500 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: nn_diagonal
    6984              :       TYPE(dbcsr_type)                                   :: m_tmp_nn_1, m_tmp_no_3
    6985              : 
    6986          500 :       CALL timeset(routineN, handle)
    6987              : 
    6988              :       CALL dbcsr_create(m_tmp_nn_1, &
    6989              :                         template=m_s, &
    6990          500 :                         matrix_type=dbcsr_type_no_symmetry)
    6991              :       CALL dbcsr_create(m_tmp_no_3, &
    6992              :                         template=m_quench_t, &
    6993          500 :                         matrix_type=dbcsr_type_no_symmetry)
    6994              : 
    6995              :       ! calculate (1-R)F(1-R) and S-SRS
    6996              :       ! RZK-warning take advantage: some elements will be removed by the quencher
    6997              :       ! RZK-warning S operations can be performed outside the spin loop to save time
    6998              :       ! IT IS REQUIRED THAT PRECONDITIONER DOES NOT BREAK THE LOCALITY!!!!
    6999              :       ! RZK-warning: further optimization is ABSOLUTELY NECESSARY
    7000              : 
    7001              :       ! First S-SRS
    7002              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7003              :                           m_ST, &
    7004              :                           m_siginv, &
    7005              :                           0.0_dp, m_tmp_no_3, &
    7006          500 :                           filter_eps=eps_filter)
    7007          500 :       CALL dbcsr_desymmetrize(m_s, m_tmp_nn_1)
    7008              :       ! return STsiginv if necessary
    7009          500 :       IF (PRESENT(m_STsiginv_out)) THEN
    7010            0 :          CALL dbcsr_copy(m_STsiginv_out, m_tmp_no_3)
    7011              :       END IF
    7012          500 :       IF (special_case == xalmo_case_fully_deloc) THEN
    7013              :          ! use S instead of S-SRS
    7014              :       ELSE
    7015              :          CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7016              :                              m_ST, &
    7017              :                              m_tmp_no_3, &
    7018              :                              1.0_dp, m_tmp_nn_1, &
    7019          456 :                              filter_eps=eps_filter)
    7020              :       END IF
    7021              :       ! return S_vv = (S or S-SRS) if necessary
    7022          500 :       IF (PRESENT(m_s_vv_out)) THEN
    7023            0 :          CALL dbcsr_copy(m_s_vv_out, m_tmp_nn_1)
    7024              :       END IF
    7025              : 
    7026              :       ! Second (1-R)F(1-R)
    7027              :       ! re-create matrix because desymmetrize is buggy -
    7028              :       ! it will create multiple copies of blocks
    7029          500 :       CALL dbcsr_desymmetrize(m_ks, m_prec_out)
    7030              :       CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7031              :                           m_FTsiginv, &
    7032              :                           m_ST, &
    7033              :                           1.0_dp, m_prec_out, &
    7034          500 :                           filter_eps=eps_filter)
    7035              :       CALL dbcsr_multiply("N", "T", -1.0_dp, &
    7036              :                           m_ST, &
    7037              :                           m_FTsiginv, &
    7038              :                           1.0_dp, m_prec_out, &
    7039          500 :                           filter_eps=eps_filter)
    7040              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7041              :                           m_ST, &
    7042              :                           m_siginvTFTsiginv, &
    7043              :                           0.0_dp, m_tmp_no_3, &
    7044          500 :                           filter_eps=eps_filter)
    7045              :       CALL dbcsr_multiply("N", "T", 1.0_dp, &
    7046              :                           m_tmp_no_3, &
    7047              :                           m_ST, &
    7048              :                           1.0_dp, m_prec_out, &
    7049          500 :                           filter_eps=eps_filter)
    7050              :       ! return F_vv = (I-SR)F(I-RS) if necessary
    7051          500 :       IF (PRESENT(m_f_vv_out)) THEN
    7052            0 :          CALL dbcsr_copy(m_f_vv_out, m_prec_out)
    7053              :       END IF
    7054              : 
    7055              : #if 0
    7056              : !penalty_only=.TRUE.
    7057              :       WRITE (unit_nr, *) "prefactor0:", penalty_occ_vol_prefactor
    7058              :       !IF (penalty_occ_vol) THEN
    7059              :       CALL dbcsr_desymmetrize(m_s, &
    7060              :                               m_prec_out)
    7061              :       !CALL dbcsr_scale(m_prec_out,-penalty_occ_vol_prefactor)
    7062              :       !ENDIF
    7063              : #else
    7064              :       ! sum up the F_vv and S_vv terms
    7065              :       CALL dbcsr_add(m_prec_out, m_tmp_nn_1, &
    7066          500 :                      1.0_dp, 1.0_dp)
    7067              :       ! Scale to obtain unit step length
    7068          500 :       CALL dbcsr_scale(m_prec_out, 2.0_dp*spin_factor)
    7069              : 
    7070              :       ! add the contribution from the penalty on the occupied volume
    7071          500 :       IF (penalty_occ_vol) THEN
    7072              :          CALL dbcsr_add(m_prec_out, m_tmp_nn_1, &
    7073            0 :                         1.0_dp, penalty_occ_vol_prefactor)
    7074              :       END IF
    7075              : #endif
    7076              : 
    7077          500 :       CALL dbcsr_copy(m_tmp_nn_1, m_prec_out)
    7078              : 
    7079              :       ! invert using various algorithms
    7080          500 :       IF (special_case == xalmo_case_block_diag) THEN ! non-overlapping diagonal blocks
    7081              : 
    7082           96 :          IF (skip_inversion) THEN
    7083              : 
    7084              :             ! impose block-diagonal structure
    7085           92 :             CALL dbcsr_get_info(m_s, nfullrows_total=ndim)
    7086          276 :             ALLOCATE (nn_diagonal(ndim))
    7087           92 :             CALL dbcsr_get_diag(m_s, nn_diagonal)
    7088           92 :             CALL dbcsr_set(m_prec_out, 0.0_dp)
    7089           92 :             CALL dbcsr_set_diag(m_prec_out, nn_diagonal)
    7090           92 :             CALL dbcsr_filter(m_prec_out, eps_filter)
    7091           92 :             DEALLOCATE (nn_diagonal)
    7092              : 
    7093          184 :             CALL dbcsr_copy(m_prec_out, m_tmp_nn_1, keep_sparsity=.TRUE.)
    7094              : 
    7095              :          ELSE
    7096              : 
    7097              :             CALL pseudo_invert_diagonal_blk( &
    7098              :                matrix_in=m_tmp_nn_1, &
    7099              :                matrix_out=m_prec_out, &
    7100              :                nocc=nocc_of_domain(:) &
    7101            4 :                )
    7102              : 
    7103              :          END IF
    7104              : 
    7105          404 :       ELSE IF (special_case == xalmo_case_fully_deloc) THEN ! the entire system is a block
    7106              : 
    7107           44 :          IF (skip_inversion) THEN
    7108            0 :             CALL dbcsr_copy(m_prec_out, m_tmp_nn_1)
    7109              :          ELSE
    7110              : 
    7111              :             ! invert using cholesky (works with S matrix, will not work with S-SRS matrix)
    7112              :             CALL cp_dbcsr_cholesky_decompose(m_prec_out, &
    7113              :                                              para_env=para_env, &
    7114           44 :                                              blacs_env=blacs_env)
    7115              :             CALL cp_dbcsr_cholesky_invert(m_prec_out, &
    7116              :                                           para_env=para_env, &
    7117              :                                           blacs_env=blacs_env, &
    7118           44 :                                           uplo_to_full=.TRUE.)
    7119              :          END IF !skip_inversion
    7120              : 
    7121           44 :          CALL dbcsr_filter(m_prec_out, eps_filter)
    7122              : 
    7123              :       ELSE
    7124              : 
    7125              :          !!! use a true domain preconditioner with overlapping domains
    7126          360 :          IF (assume_t0_q0x) THEN
    7127           26 :             precond_domain_projector = -1
    7128              :          ELSE
    7129          334 :             precond_domain_projector = 0
    7130              :          END IF
    7131              :          !! RZK-warning: use PRESENT to make two nearly-identical calls
    7132              :          !! this is done because intel compiler does not seem to conform
    7133              :          !! to the FORTRAN standard for passing through optional arguments
    7134          360 :          IF (PRESENT(bad_modes_projector_down_out)) THEN
    7135              :             CALL construct_domain_preconditioner( &
    7136              :                matrix_main=m_tmp_nn_1, &
    7137              :                subm_s_inv=domain_s_inv(:), &
    7138              :                subm_s_inv_half=domain_s_inv_half(:), &
    7139              :                subm_s_half=domain_s_half(:), &
    7140              :                subm_r_down=domain_r_down(:), &
    7141              :                matrix_trimmer=m_quench_t, &
    7142              :                dpattern=m_quench_t, &
    7143              :                map=domain_map, &
    7144              :                node_of_domain=cpu_of_domain, &
    7145              :                preconditioner=domain_prec_out(:), &
    7146              :                use_trimmer=.FALSE., &
    7147              :                bad_modes_projector_down=bad_modes_projector_down_out(:), &
    7148              :                eps_zero_eigenvalues=neg_thr, &
    7149              :                my_action=precond_domain_projector, &
    7150              :                skip_inversion=skip_inversion &
    7151           18 :                )
    7152              :          ELSE
    7153              :             CALL construct_domain_preconditioner( &
    7154              :                matrix_main=m_tmp_nn_1, &
    7155              :                subm_s_inv=domain_s_inv(:), &
    7156              :                subm_r_down=domain_r_down(:), &
    7157              :                matrix_trimmer=m_quench_t, &
    7158              :                dpattern=m_quench_t, &
    7159              :                map=domain_map, &
    7160              :                node_of_domain=cpu_of_domain, &
    7161              :                preconditioner=domain_prec_out(:), &
    7162              :                use_trimmer=.FALSE., &
    7163              :                !eps_zero_eigenvalues=neg_thr,&
    7164              :                my_action=precond_domain_projector, &
    7165              :                skip_inversion=skip_inversion &
    7166          342 :                )
    7167              :          END IF
    7168              : 
    7169              :       END IF ! special_case
    7170              : 
    7171              :       ! invert using cholesky (works with S matrix, will not work with S-SRS matrix)
    7172              :       !!!CALL cp_dbcsr_cholesky_decompose(prec_vv,&
    7173              :       !!!        para_env=almo_scf_env%para_env,&
    7174              :       !!!        blacs_env=almo_scf_env%blacs_env)
    7175              :       !!!CALL cp_dbcsr_cholesky_invert(prec_vv,&
    7176              :       !!!        para_env=almo_scf_env%para_env,&
    7177              :       !!!        blacs_env=almo_scf_env%blacs_env,&
    7178              :       !!!        uplo_to_full=.TRUE.)
    7179              :       !!!CALL dbcsr_filter(prec_vv,&
    7180              :       !!!        almo_scf_env%eps_filter)
    7181              :       !!!
    7182              : 
    7183              :       ! re-create the matrix because desymmetrize is buggy -
    7184              :       ! it will create multiple copies of blocks
    7185              :       !!!DESYM!CALL dbcsr_create(prec_vv,&
    7186              :       !!!DESYM!        template=almo_scf_env%matrix_s(1),&
    7187              :       !!!DESYM!        matrix_type=dbcsr_type_no_symmetry)
    7188              :       !!!DESYM!CALL dbcsr_desymmetrize(almo_scf_env%matrix_s(1),&
    7189              :       !!!DESYM!        prec_vv)
    7190              :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    7191              :       !        almo_scf_env%matrix_s(1),&
    7192              :       !        matrix_t_out(ispin),&
    7193              :       !        0.0_dp,m_tmp_no_1,&
    7194              :       !        filter_eps=almo_scf_env%eps_filter)
    7195              :       !CALL dbcsr_multiply("N","N",1.0_dp,&
    7196              :       !        m_tmp_no_1,&
    7197              :       !        almo_scf_env%matrix_sigma_inv(ispin),&
    7198              :       !        0.0_dp,m_tmp_no_3,&
    7199              :       !        filter_eps=almo_scf_env%eps_filter)
    7200              :       !CALL dbcsr_multiply("N","T",-1.0_dp,&
    7201              :       !        m_tmp_no_3,&
    7202              :       !        m_tmp_no_1,&
    7203              :       !        1.0_dp,prec_vv,&
    7204              :       !        filter_eps=almo_scf_env%eps_filter)
    7205              :       !CALL dbcsr_add_on_diag(prec_vv,&
    7206              :       !        prec_sf_mixing_s)
    7207              : 
    7208              :       !CALL dbcsr_create(prec_oo,&
    7209              :       !        template=almo_scf_env%matrix_sigma(ispin),&
    7210              :       !        matrix_type=dbcsr_type_no_symmetry)
    7211              :       !CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma(ispin),&
    7212              :       !        matrix_type=dbcsr_type_no_symmetry)
    7213              :       !CALL dbcsr_desymmetrize(almo_scf_env%matrix_sigma(ispin),&
    7214              :       !        prec_oo)
    7215              :       !CALL dbcsr_filter(prec_oo,&
    7216              :       !        almo_scf_env%eps_filter)
    7217              : 
    7218              :       !! invert using cholesky
    7219              :       !CALL dbcsr_create(prec_oo_inv,&
    7220              :       !        template=prec_oo,&
    7221              :       !        matrix_type=dbcsr_type_no_symmetry)
    7222              :       !CALL dbcsr_desymmetrize(prec_oo,&
    7223              :       !        prec_oo_inv)
    7224              :       !CALL cp_dbcsr_cholesky_decompose(prec_oo_inv,&
    7225              :       !        para_env=almo_scf_env%para_env,&
    7226              :       !        blacs_env=almo_scf_env%blacs_env)
    7227              :       !CALL cp_dbcsr_cholesky_invert(prec_oo_inv,&
    7228              :       !        para_env=almo_scf_env%para_env,&
    7229              :       !        blacs_env=almo_scf_env%blacs_env,&
    7230              :       !        uplo_to_full=.TRUE.)
    7231              : 
    7232          500 :       CALL dbcsr_release(m_tmp_nn_1)
    7233          500 :       CALL dbcsr_release(m_tmp_no_3)
    7234              : 
    7235          500 :       CALL timestop(handle)
    7236              : 
    7237         1000 :    END SUBROUTINE compute_preconditioner
    7238              : 
    7239              : ! **************************************************************************************************
    7240              : !> \brief Compute beta for conjugate gradient algorithms
    7241              : !> \param beta ...
    7242              : !> \param numer ...
    7243              : !> \param denom ...
    7244              : !> \param reset_conjugator ...
    7245              : !> \param conjugator ...
    7246              : !> \param grad ...
    7247              : !> \param prev_grad ...
    7248              : !> \param step ...
    7249              : !> \param prev_step ...
    7250              : !> \param prev_minus_prec_grad ...
    7251              : !> \par History
    7252              : !>       2015.04 created [Rustam Z Khaliullin]
    7253              : !> \author Rustam Z Khaliullin
    7254              : ! **************************************************************************************************
    7255         1016 :    SUBROUTINE compute_cg_beta(beta, numer, denom, reset_conjugator, conjugator, &
    7256          508 :                               grad, prev_grad, step, prev_step, prev_minus_prec_grad)
    7257              : 
    7258              :       REAL(KIND=dp), INTENT(INOUT)                       :: beta
    7259              :       REAL(KIND=dp), INTENT(INOUT), OPTIONAL             :: numer, denom
    7260              :       LOGICAL, INTENT(INOUT)                             :: reset_conjugator
    7261              :       INTEGER, INTENT(IN)                                :: conjugator
    7262              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: grad, prev_grad, step, prev_step
    7263              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT), &
    7264              :          OPTIONAL                                        :: prev_minus_prec_grad
    7265              : 
    7266              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'compute_cg_beta'
    7267              : 
    7268              :       INTEGER                                            :: handle, i, nsize, unit_nr
    7269              :       REAL(KIND=dp)                                      :: den, kappa, my_denom, my_numer, &
    7270              :                                                             my_numer2, my_numer3, num, num2, num3, &
    7271              :                                                             tau
    7272              :       TYPE(cp_logger_type), POINTER                      :: logger
    7273              :       TYPE(dbcsr_type)                                   :: m_tmp_no_1
    7274              : 
    7275          508 :       CALL timeset(routineN, handle)
    7276              : 
    7277              :       ! get a useful output_unit
    7278          508 :       logger => cp_get_default_logger()
    7279          508 :       IF (logger%para_env%is_source()) THEN
    7280          254 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    7281              :       ELSE
    7282              :          unit_nr = -1
    7283              :       END IF
    7284              : 
    7285          508 :       IF (.NOT. PRESENT(prev_minus_prec_grad)) THEN
    7286              :          IF (conjugator == cg_fletcher_reeves .OR. &
    7287           82 :              conjugator == cg_polak_ribiere .OR. &
    7288              :              conjugator == cg_hager_zhang) THEN
    7289            0 :             CPABORT("conjugator needs more input")
    7290              :          END IF
    7291              :       END IF
    7292              : 
    7293              :       ! return num denom so beta can be calculated spin-by-spin
    7294          508 :       IF (PRESENT(numer) .OR. PRESENT(denom)) THEN
    7295              :          IF (conjugator == cg_hestenes_stiefel .OR. &
    7296            0 :              conjugator == cg_dai_yuan .OR. &
    7297              :              conjugator == cg_hager_zhang) THEN
    7298            0 :             CPABORT("cannot return numer/denom")
    7299              :          END IF
    7300              :       END IF
    7301              : 
    7302          508 :       nsize = SIZE(grad)
    7303              : 
    7304          508 :       my_numer = 0.0_dp
    7305          508 :       my_numer2 = 0.0_dp
    7306          508 :       my_numer3 = 0.0_dp
    7307          508 :       my_denom = 0.0_dp
    7308              : 
    7309         1016 :       DO i = 1, nsize
    7310              : 
    7311              :          CALL dbcsr_create(m_tmp_no_1, &
    7312              :                            template=grad(i), &
    7313          508 :                            matrix_type=dbcsr_type_no_symmetry)
    7314              : 
    7315          570 :          SELECT CASE (conjugator)
    7316              :          CASE (cg_hestenes_stiefel)
    7317           62 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7318              :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), &
    7319           62 :                            1.0_dp, -1.0_dp)
    7320           62 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7321          156 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7322              :          CASE (cg_fletcher_reeves)
    7323           94 :             CALL dbcsr_dot(grad(i), step(i), num)
    7324          124 :             CALL dbcsr_dot(prev_grad(i), prev_minus_prec_grad(i), den)
    7325              :          CASE (cg_polak_ribiere)
    7326           30 :             CALL dbcsr_dot(prev_grad(i), prev_minus_prec_grad(i), den)
    7327           30 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7328           30 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7329          202 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7330              :          CASE (cg_fletcher)
    7331          172 :             CALL dbcsr_dot(grad(i), step(i), num)
    7332          192 :             CALL dbcsr_dot(prev_grad(i), prev_step(i), den)
    7333              :          CASE (cg_liu_storey)
    7334           20 :             CALL dbcsr_dot(prev_grad(i), prev_step(i), den)
    7335           20 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7336           20 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7337           54 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num)
    7338              :          CASE (cg_dai_yuan)
    7339           34 :             CALL dbcsr_dot(grad(i), step(i), num)
    7340           34 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7341           34 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7342          106 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7343              :          CASE (cg_hager_zhang)
    7344           72 :             CALL dbcsr_copy(m_tmp_no_1, grad(i))
    7345           72 :             CALL dbcsr_add(m_tmp_no_1, prev_grad(i), 1.0_dp, -1.0_dp)
    7346           72 :             CALL dbcsr_dot(m_tmp_no_1, prev_step(i), den)
    7347           72 :             CALL dbcsr_dot(m_tmp_no_1, prev_minus_prec_grad(i), num)
    7348           72 :             CALL dbcsr_dot(m_tmp_no_1, step(i), num2)
    7349           72 :             CALL dbcsr_dot(prev_step(i), grad(i), num3)
    7350           72 :             my_numer2 = my_numer2 + num2
    7351           72 :             my_numer3 = my_numer3 + num3
    7352              :          CASE (cg_zero)
    7353           24 :             num = 0.0_dp
    7354           24 :             den = 1.0_dp
    7355              :          CASE DEFAULT
    7356          726 :             CPABORT("illegal conjugator")
    7357              :          END SELECT
    7358          508 :          my_numer = my_numer + num
    7359          508 :          my_denom = my_denom + den
    7360              : 
    7361         1016 :          CALL dbcsr_release(m_tmp_no_1)
    7362              : 
    7363              :       END DO ! i - nsize
    7364              : 
    7365         1016 :       DO i = 1, nsize
    7366              : 
    7367          508 :          SELECT CASE (conjugator)
    7368              :          CASE (cg_hestenes_stiefel, cg_dai_yuan)
    7369           96 :             beta = -1.0_dp*my_numer/my_denom
    7370              :          CASE (cg_fletcher_reeves, cg_polak_ribiere, cg_fletcher, cg_liu_storey)
    7371          316 :             beta = my_numer/my_denom
    7372              :          CASE (cg_hager_zhang)
    7373           72 :             kappa = -2.0_dp*my_numer/my_denom
    7374           72 :             tau = -1.0_dp*my_numer2/my_denom
    7375           72 :             beta = tau - kappa*my_numer3/my_denom
    7376              :          CASE (cg_zero)
    7377           24 :             beta = 0.0_dp
    7378              :          CASE DEFAULT
    7379          508 :             CPABORT("illegal conjugator")
    7380              :          END SELECT
    7381              : 
    7382              :       END DO ! i - nsize
    7383              : 
    7384          508 :       IF (beta < 0.0_dp) THEN
    7385            0 :          IF (unit_nr > 0) THEN
    7386            0 :             WRITE (unit_nr, *) " Resetting conjugator because beta is negative: ", beta
    7387              :          END IF
    7388            0 :          reset_conjugator = .TRUE.
    7389              :       END IF
    7390              : 
    7391          508 :       IF (PRESENT(numer)) THEN
    7392            0 :          numer = my_numer
    7393              :       END IF
    7394          508 :       IF (PRESENT(denom)) THEN
    7395            0 :          denom = my_denom
    7396              :       END IF
    7397              : 
    7398          508 :       CALL timestop(handle)
    7399              : 
    7400          508 :    END SUBROUTINE compute_cg_beta
    7401              : 
    7402              : ! **************************************************************************************************
    7403              : !> \brief computes the step matrix from the gradient and Hessian using the Newton-Raphson method
    7404              : !> \param optimizer ...
    7405              : !> \param m_grad ...
    7406              : !> \param m_delta ...
    7407              : !> \param m_s ...
    7408              : !> \param m_ks ...
    7409              : !> \param m_siginv ...
    7410              : !> \param m_quench_t ...
    7411              : !> \param m_FTsiginv ...
    7412              : !> \param m_siginvTFTsiginv ...
    7413              : !> \param m_ST ...
    7414              : !> \param m_t ...
    7415              : !> \param m_sig_sqrti_ii ...
    7416              : !> \param domain_s_inv ...
    7417              : !> \param domain_r_down ...
    7418              : !> \param domain_map ...
    7419              : !> \param cpu_of_domain ...
    7420              : !> \param nocc_of_domain ...
    7421              : !> \param para_env ...
    7422              : !> \param blacs_env ...
    7423              : !> \param eps_filter ...
    7424              : !> \param optimize_theta ...
    7425              : !> \param penalty_occ_vol ...
    7426              : !> \param normalize_orbitals ...
    7427              : !> \param penalty_occ_vol_prefactor ...
    7428              : !> \param penalty_occ_vol_pf2 ...
    7429              : !> \param special_case ...
    7430              : !> \par History
    7431              : !>       2015.04 created [Rustam Z. Khaliullin]
    7432              : !> \author Rustam Z. Khaliullin
    7433              : ! **************************************************************************************************
    7434            0 :    SUBROUTINE newton_grad_to_step(optimizer, m_grad, m_delta, m_s, m_ks, &
    7435            0 :                                   m_siginv, m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_t, &
    7436            0 :                                   m_sig_sqrti_ii, domain_s_inv, domain_r_down, domain_map, cpu_of_domain, &
    7437            0 :                                   nocc_of_domain, para_env, blacs_env, eps_filter, optimize_theta, &
    7438            0 :                                   penalty_occ_vol, normalize_orbitals, penalty_occ_vol_prefactor, &
    7439            0 :                                   penalty_occ_vol_pf2, special_case)
    7440              : 
    7441              :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
    7442              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_grad
    7443              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_delta, m_s, m_ks, m_siginv, m_quench_t
    7444              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_FTsiginv, m_siginvTFTsiginv, m_ST, &
    7445              :                                                             m_t, m_sig_sqrti_ii
    7446              :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
    7447              :          INTENT(IN)                                      :: domain_s_inv, domain_r_down
    7448              :       TYPE(domain_map_type), DIMENSION(:), INTENT(IN)    :: domain_map
    7449              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
    7450              :       INTEGER, DIMENSION(:, :), INTENT(IN)               :: nocc_of_domain
    7451              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    7452              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    7453              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    7454              :       LOGICAL, INTENT(IN)                                :: optimize_theta, penalty_occ_vol, &
    7455              :                                                             normalize_orbitals
    7456              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: penalty_occ_vol_prefactor, &
    7457              :                                                             penalty_occ_vol_pf2
    7458              :       INTEGER, INTENT(IN)                                :: special_case
    7459              : 
    7460              :       CHARACTER(len=*), PARAMETER :: routineN = 'newton_grad_to_step'
    7461              : 
    7462              :       CHARACTER(LEN=20)                                  :: iter_type
    7463              :       INTEGER                                            :: handle, ispin, iteration, max_iter, &
    7464              :                                                             ndomains, nspins, outer_iteration, &
    7465              :                                                             outer_max_iter, unit_nr
    7466              :       LOGICAL :: converged, do_exact_inversion, outer_prepare_to_exit, prepare_to_exit, &
    7467              :          reset_conjugator, use_preconditioner
    7468              :       REAL(KIND=dp)                                      :: alpha, beta, denom, denom_ispin, &
    7469              :                                                             eps_error_target, numer, numer_ispin, &
    7470              :                                                             residue_norm, spin_factor, t1, t2
    7471            0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: residue_max_norm
    7472              :       TYPE(cp_logger_type), POINTER                      :: logger
    7473              :       TYPE(dbcsr_type)                                   :: m_tmp_oo_1, m_tmp_oo_2
    7474            0 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_f_vo, m_f_vv, m_Hstep, m_prec, &
    7475            0 :                                                             m_residue, m_residue_prev, m_s_vv, &
    7476            0 :                                                             m_step, m_STsiginv, m_zet, m_zet_prev
    7477              :       TYPE(domain_submatrix_type), ALLOCATABLE, &
    7478            0 :          DIMENSION(:, :)                                 :: domain_prec
    7479              : 
    7480            0 :       CALL timeset(routineN, handle)
    7481              : 
    7482              :       ! get a useful output_unit
    7483            0 :       logger => cp_get_default_logger()
    7484            0 :       IF (logger%para_env%is_source()) THEN
    7485            0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    7486              :       ELSE
    7487              :          unit_nr = -1
    7488              :       END IF
    7489              : 
    7490              :       !!! Currently for non-theta only
    7491            0 :       IF (optimize_theta) THEN
    7492            0 :          CPABORT("theta is NYI")
    7493              :       END IF
    7494              : 
    7495              :       ! set optimizer options
    7496            0 :       use_preconditioner = (optimizer%preconditioner /= xalmo_prec_zero)
    7497            0 :       outer_max_iter = optimizer%max_iter_outer_loop
    7498            0 :       max_iter = optimizer%max_iter
    7499            0 :       eps_error_target = optimizer%eps_error
    7500              : 
    7501              :       ! set key dimensions
    7502            0 :       nspins = SIZE(m_ks)
    7503            0 :       ndomains = SIZE(domain_s_inv, 1)
    7504              : 
    7505            0 :       IF (nspins == 1) THEN
    7506            0 :          spin_factor = 2.0_dp
    7507              :       ELSE
    7508            0 :          spin_factor = 1.0_dp
    7509              :       END IF
    7510              : 
    7511            0 :       ALLOCATE (domain_prec(ndomains, nspins))
    7512            0 :       CALL init_submatrices(domain_prec)
    7513              : 
    7514              :       ! allocate matrices
    7515            0 :       ALLOCATE (m_residue(nspins))
    7516            0 :       ALLOCATE (m_residue_prev(nspins))
    7517            0 :       ALLOCATE (m_step(nspins))
    7518            0 :       ALLOCATE (m_zet(nspins))
    7519            0 :       ALLOCATE (m_zet_prev(nspins))
    7520            0 :       ALLOCATE (m_Hstep(nspins))
    7521            0 :       ALLOCATE (m_prec(nspins))
    7522            0 :       ALLOCATE (m_s_vv(nspins))
    7523            0 :       ALLOCATE (m_f_vv(nspins))
    7524            0 :       ALLOCATE (m_f_vo(nspins))
    7525            0 :       ALLOCATE (m_STsiginv(nspins))
    7526              : 
    7527            0 :       ALLOCATE (residue_max_norm(nspins))
    7528              : 
    7529              :       ! initiate objects before iterations
    7530            0 :       DO ispin = 1, nspins
    7531              : 
    7532              :          ! init matrices
    7533              :          CALL dbcsr_create(m_residue(ispin), &
    7534              :                            template=m_quench_t(ispin), &
    7535            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7536              :          CALL dbcsr_create(m_residue_prev(ispin), &
    7537              :                            template=m_quench_t(ispin), &
    7538            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7539              :          CALL dbcsr_create(m_step(ispin), &
    7540              :                            template=m_quench_t(ispin), &
    7541            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7542              :          CALL dbcsr_create(m_zet_prev(ispin), &
    7543              :                            template=m_quench_t(ispin), &
    7544            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7545              :          CALL dbcsr_create(m_zet(ispin), &
    7546              :                            template=m_quench_t(ispin), &
    7547            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7548              :          CALL dbcsr_create(m_Hstep(ispin), &
    7549              :                            template=m_quench_t(ispin), &
    7550            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7551              :          CALL dbcsr_create(m_f_vo(ispin), &
    7552              :                            template=m_quench_t(ispin), &
    7553            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7554              :          CALL dbcsr_create(m_STsiginv(ispin), &
    7555              :                            template=m_quench_t(ispin), &
    7556            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7557              :          CALL dbcsr_create(m_f_vv(ispin), &
    7558              :                            template=m_ks(ispin), &
    7559            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7560              :          CALL dbcsr_create(m_s_vv(ispin), &
    7561              :                            template=m_s(1), &
    7562            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7563              :          CALL dbcsr_create(m_prec(ispin), &
    7564              :                            template=m_ks(ispin), &
    7565            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7566              : 
    7567              :          ! compute the full "gradient" - it is necessary to
    7568              :          ! evaluate Hessian.X
    7569            0 :          CALL dbcsr_copy(m_f_vo(ispin), m_FTsiginv(ispin))
    7570              :          CALL dbcsr_multiply("N", "N", -1.0_dp, &
    7571              :                              m_ST(ispin), &
    7572              :                              m_siginvTFTsiginv(ispin), &
    7573              :                              1.0_dp, m_f_vo(ispin), &
    7574            0 :                              filter_eps=eps_filter)
    7575              : 
    7576              : ! RZK-warning
    7577              : ! compute preconditioner even if we do not use it
    7578              : ! this is for debugging because compute_preconditioner includes
    7579              : ! computing F_vv and S_vv necessary for
    7580              : !       IF ( use_preconditioner ) THEN
    7581              : 
    7582              : ! domain_s_inv and domain_r_down are never used with assume_t0_q0x=FALSE
    7583              :          CALL compute_preconditioner( &
    7584              :             domain_prec_out=domain_prec(:, ispin), &
    7585              :             m_prec_out=m_prec(ispin), &
    7586              :             m_ks=m_ks(ispin), &
    7587              :             m_s=m_s(1), &
    7588              :             m_siginv=m_siginv(ispin), &
    7589              :             m_quench_t=m_quench_t(ispin), &
    7590              :             m_FTsiginv=m_FTsiginv(ispin), &
    7591              :             m_siginvTFTsiginv=m_siginvTFTsiginv(ispin), &
    7592              :             m_ST=m_ST(ispin), &
    7593              :             m_STsiginv_out=m_STsiginv(ispin), &
    7594              :             m_s_vv_out=m_s_vv(ispin), &
    7595              :             m_f_vv_out=m_f_vv(ispin), &
    7596              :             para_env=para_env, &
    7597              :             blacs_env=blacs_env, &
    7598              :             nocc_of_domain=nocc_of_domain(:, ispin), &
    7599              :             domain_s_inv=domain_s_inv(:, ispin), &
    7600              :             domain_r_down=domain_r_down(:, ispin), &
    7601              :             cpu_of_domain=cpu_of_domain(:), &
    7602              :             domain_map=domain_map(ispin), &
    7603              :             assume_t0_q0x=.FALSE., &
    7604              :             penalty_occ_vol=penalty_occ_vol, &
    7605              :             penalty_occ_vol_prefactor=penalty_occ_vol_prefactor(ispin), &
    7606              :             eps_filter=eps_filter, &
    7607              :             neg_thr=0.5_dp, &
    7608              :             spin_factor=spin_factor, &
    7609              :             special_case=special_case, &
    7610              :             skip_inversion=.FALSE. &
    7611            0 :             )
    7612              : 
    7613              : !       ENDIF ! use_preconditioner
    7614              : 
    7615              :          ! initial guess
    7616            0 :          CALL dbcsr_copy(m_delta(ispin), m_quench_t(ispin))
    7617              :          ! in order to use dbcsr_set matrix blocks must exist
    7618            0 :          CALL dbcsr_set(m_delta(ispin), 0.0_dp)
    7619            0 :          CALL dbcsr_copy(m_residue(ispin), m_grad(ispin))
    7620            0 :          CALL dbcsr_scale(m_residue(ispin), -1.0_dp)
    7621              : 
    7622            0 :          do_exact_inversion = .FALSE.
    7623              :          IF (do_exact_inversion) THEN
    7624              : 
    7625              :             ! copy grad to m_step temporarily
    7626              :             ! use m_step as input to the inversion routine
    7627              :             CALL dbcsr_copy(m_step(ispin), m_grad(ispin))
    7628              : 
    7629              :             ! expensive "exact" inversion of the "nearly-exact" Hessian
    7630              :             ! hopefully returns Z=-H^(-1).G
    7631              :             CALL hessian_diag_apply( &
    7632              :                matrix_grad=m_step(ispin), &
    7633              :                matrix_step=m_zet(ispin), &
    7634              :                matrix_S_ao=m_s_vv(ispin), &
    7635              :                matrix_F_ao=m_f_vv(ispin), &
    7636              :                !matrix_S_ao=m_s(ispin),&
    7637              :                !matrix_F_ao=m_ks(ispin),&
    7638              :                matrix_S_mo=m_siginv(ispin), &
    7639              :                matrix_F_mo=m_siginvTFTsiginv(ispin), &
    7640              :                matrix_S_vo=m_STsiginv(ispin), &
    7641              :                matrix_F_vo=m_f_vo(ispin), &
    7642              :                quench_t=m_quench_t(ispin), &
    7643              :                spin_factor=spin_factor, &
    7644              :                eps_zero=eps_filter*10.0_dp, &
    7645              :                penalty_occ_vol=penalty_occ_vol, &
    7646              :                penalty_occ_vol_prefactor=penalty_occ_vol_prefactor(ispin), &
    7647              :                penalty_occ_vol_pf2=penalty_occ_vol_pf2(ispin), &
    7648              :                m_s=m_s(1), &
    7649              :                para_env=para_env, &
    7650              :                blacs_env=blacs_env &
    7651              :                )
    7652              :             ! correct solution by the spin factor
    7653              :             !CALL dbcsr_scale(m_zet(ispin),1.0_dp/(2.0_dp*spin_factor))
    7654              : 
    7655              :          ELSE ! use PCG to solve H.D=-G
    7656              : 
    7657            0 :             IF (use_preconditioner) THEN
    7658              : 
    7659            0 :                IF (special_case == xalmo_case_block_diag .OR. &
    7660              :                    special_case == xalmo_case_fully_deloc) THEN
    7661              : 
    7662              :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7663              :                                       m_prec(ispin), &
    7664              :                                       m_residue(ispin), &
    7665              :                                       0.0_dp, m_zet(ispin), &
    7666            0 :                                       filter_eps=eps_filter)
    7667              : 
    7668              :                ELSE
    7669              : 
    7670              :                   CALL apply_domain_operators( &
    7671              :                      matrix_in=m_residue(ispin), &
    7672              :                      matrix_out=m_zet(ispin), &
    7673              :                      operator1=domain_prec(:, ispin), &
    7674              :                      dpattern=m_quench_t(ispin), &
    7675              :                      map=domain_map(ispin), &
    7676              :                      node_of_domain=cpu_of_domain(:), &
    7677              :                      my_action=0, &
    7678              :                      filter_eps=eps_filter &
    7679              :                      !matrix_trimmer=,&
    7680              :                      !use_trimmer=.FALSE.,&
    7681            0 :                      )
    7682              : 
    7683              :                END IF ! special_case
    7684              : 
    7685              :             ELSE ! do not use preconditioner
    7686              : 
    7687            0 :                CALL dbcsr_copy(m_zet(ispin), m_residue(ispin))
    7688              : 
    7689              :             END IF ! use_preconditioner
    7690              : 
    7691              :          END IF ! do_exact_inversion
    7692              : 
    7693            0 :          CALL dbcsr_copy(m_step(ispin), m_zet(ispin))
    7694              : 
    7695              :       END DO !ispin
    7696              : 
    7697              :       ! start the outer SCF loop
    7698            0 :       outer_prepare_to_exit = .FALSE.
    7699            0 :       outer_iteration = 0
    7700            0 :       residue_norm = 0.0_dp
    7701              : 
    7702              :       DO
    7703              : 
    7704              :          ! start the inner SCF loop
    7705            0 :          prepare_to_exit = .FALSE.
    7706            0 :          converged = .FALSE.
    7707            0 :          iteration = 0
    7708            0 :          t1 = m_walltime()
    7709              : 
    7710              :          DO
    7711              : 
    7712              :             ! apply hessian to the step matrix
    7713              :             CALL apply_hessian( &
    7714              :                m_x_in=m_step, &
    7715              :                m_x_out=m_Hstep, &
    7716              :                m_ks=m_ks, &
    7717              :                m_s=m_s, &
    7718              :                m_siginv=m_siginv, &
    7719              :                m_quench_t=m_quench_t, &
    7720              :                m_FTsiginv=m_FTsiginv, &
    7721              :                m_siginvTFTsiginv=m_siginvTFTsiginv, &
    7722              :                m_ST=m_ST, &
    7723              :                m_STsiginv=m_STsiginv, &
    7724              :                m_s_vv=m_s_vv, &
    7725              :                m_ks_vv=m_f_vv, &
    7726              :                !m_s_vv=m_s,&
    7727              :                !m_ks_vv=m_ks,&
    7728              :                m_g_full=m_f_vo, &
    7729              :                m_t=m_t, &
    7730              :                m_sig_sqrti_ii=m_sig_sqrti_ii, &
    7731              :                penalty_occ_vol=penalty_occ_vol, &
    7732              :                normalize_orbitals=normalize_orbitals, &
    7733              :                penalty_occ_vol_prefactor=penalty_occ_vol_prefactor, &
    7734              :                eps_filter=eps_filter, &
    7735              :                path_num=hessian_path_reuse &
    7736            0 :                )
    7737              : 
    7738              :             ! alpha is computed outside the spin loop
    7739            0 :             numer = 0.0_dp
    7740            0 :             denom = 0.0_dp
    7741            0 :             DO ispin = 1, nspins
    7742              : 
    7743            0 :                CALL dbcsr_dot(m_residue(ispin), m_zet(ispin), numer_ispin)
    7744            0 :                CALL dbcsr_dot(m_step(ispin), m_Hstep(ispin), denom_ispin)
    7745              : 
    7746            0 :                numer = numer + numer_ispin
    7747            0 :                denom = denom + denom_ispin
    7748              : 
    7749              :             END DO !ispin
    7750              : 
    7751            0 :             alpha = numer/denom
    7752              : 
    7753            0 :             DO ispin = 1, nspins
    7754              : 
    7755              :                ! update the variable
    7756            0 :                CALL dbcsr_add(m_delta(ispin), m_step(ispin), 1.0_dp, alpha)
    7757            0 :                CALL dbcsr_copy(m_residue_prev(ispin), m_residue(ispin))
    7758              :                CALL dbcsr_add(m_residue(ispin), m_Hstep(ispin), &
    7759            0 :                               1.0_dp, -1.0_dp*alpha)
    7760            0 :                residue_max_norm(ispin) = dbcsr_maxabs(m_residue(ispin))
    7761              : 
    7762              :             END DO ! ispin
    7763              : 
    7764              :             ! check convergence and other exit criteria
    7765            0 :             residue_norm = MAXVAL(residue_max_norm)
    7766            0 :             converged = (residue_norm < eps_error_target)
    7767            0 :             IF (converged .OR. (iteration >= max_iter)) THEN
    7768              :                prepare_to_exit = .TRUE.
    7769              :             END IF
    7770              : 
    7771            0 :             IF (.NOT. prepare_to_exit) THEN
    7772              : 
    7773            0 :                DO ispin = 1, nspins
    7774              : 
    7775              :                   ! save current z before the update
    7776            0 :                   CALL dbcsr_copy(m_zet_prev(ispin), m_zet(ispin))
    7777              : 
    7778              :                   ! compute the new step (apply preconditioner if available)
    7779            0 :                   IF (use_preconditioner) THEN
    7780              : 
    7781              :                      !IF (unit_nr>0) THEN
    7782              :                      !   WRITE(unit_nr,*) "....applying preconditioner...."
    7783              :                      !ENDIF
    7784              : 
    7785            0 :                      IF (special_case == xalmo_case_block_diag .OR. &
    7786              :                          special_case == xalmo_case_fully_deloc) THEN
    7787              : 
    7788              :                         CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7789              :                                             m_prec(ispin), &
    7790              :                                             m_residue(ispin), &
    7791              :                                             0.0_dp, m_zet(ispin), &
    7792            0 :                                             filter_eps=eps_filter)
    7793              : 
    7794              :                      ELSE
    7795              : 
    7796              :                         CALL apply_domain_operators( &
    7797              :                            matrix_in=m_residue(ispin), &
    7798              :                            matrix_out=m_zet(ispin), &
    7799              :                            operator1=domain_prec(:, ispin), &
    7800              :                            dpattern=m_quench_t(ispin), &
    7801              :                            map=domain_map(ispin), &
    7802              :                            node_of_domain=cpu_of_domain(:), &
    7803              :                            my_action=0, &
    7804              :                            filter_eps=eps_filter &
    7805              :                            !matrix_trimmer=,&
    7806              :                            !use_trimmer=.FALSE.,&
    7807            0 :                            )
    7808              : 
    7809              :                      END IF ! special case
    7810              : 
    7811              :                   ELSE
    7812              : 
    7813            0 :                      CALL dbcsr_copy(m_zet(ispin), m_residue(ispin))
    7814              : 
    7815              :                   END IF
    7816              : 
    7817              :                END DO !ispin
    7818              : 
    7819              :                ! compute the conjugation coefficient - beta
    7820              :                CALL compute_cg_beta( &
    7821              :                   beta=beta, &
    7822              :                   reset_conjugator=reset_conjugator, &
    7823              :                   conjugator=cg_fletcher, &
    7824              :                   grad=m_residue, &
    7825              :                   prev_grad=m_residue_prev, &
    7826              :                   step=m_zet, &
    7827            0 :                   prev_step=m_zet_prev)
    7828              : 
    7829            0 :                DO ispin = 1, nspins
    7830              : 
    7831              :                   ! conjugate the step direction
    7832            0 :                   CALL dbcsr_add(m_step(ispin), m_zet(ispin), beta, 1.0_dp)
    7833              : 
    7834              :                END DO !ispin
    7835              : 
    7836              :             END IF ! not.prepare_to_exit
    7837              : 
    7838            0 :             t2 = m_walltime()
    7839            0 :             IF (unit_nr > 0) THEN
    7840              :                !iter_type=TRIM("ALMO SCF "//iter_type)
    7841            0 :                iter_type = TRIM("NR STEP")
    7842              :                WRITE (unit_nr, '(T6,A9,I6,F14.5,F14.5,F15.10,F9.2)') &
    7843            0 :                   iter_type, iteration, &
    7844            0 :                   alpha, beta, residue_norm, &
    7845            0 :                   t2 - t1
    7846              :             END IF
    7847            0 :             t1 = m_walltime()
    7848              : 
    7849            0 :             iteration = iteration + 1
    7850            0 :             IF (prepare_to_exit) EXIT
    7851              : 
    7852              :          END DO ! inner loop
    7853              : 
    7854            0 :          IF (converged .OR. (outer_iteration >= outer_max_iter)) THEN
    7855            0 :             outer_prepare_to_exit = .TRUE.
    7856              :          END IF
    7857              : 
    7858            0 :          outer_iteration = outer_iteration + 1
    7859            0 :          IF (outer_prepare_to_exit) EXIT
    7860              : 
    7861              :       END DO ! outer loop
    7862              : 
    7863              : ! is not necessary if penalty_occ_vol_pf2=0.0
    7864              : #if 0
    7865              : 
    7866              :       IF (penalty_occ_vol) THEN
    7867              : 
    7868              :          DO ispin = 1, nspins
    7869              : 
    7870              :             CALL dbcsr_copy(m_zet(ispin), m_grad(ispin))
    7871              :             CALL dbcsr_dot(m_delta(ispin), m_zet(ispin), alpha)
    7872              :             WRITE (unit_nr, *) "trace(grad.delta): ", alpha
    7873              :             alpha = -1.0_dp/(penalty_occ_vol_pf2(ispin)*alpha - 1.0_dp)
    7874              :             WRITE (unit_nr, *) "correction alpha: ", alpha
    7875              :             CALL dbcsr_scale(m_delta(ispin), alpha)
    7876              : 
    7877              :          END DO
    7878              : 
    7879              :       END IF
    7880              : 
    7881              : #endif
    7882              : 
    7883            0 :       DO ispin = 1, nspins
    7884              : 
    7885              :          ! check whether the step lies entirely in R or Q
    7886              :          CALL dbcsr_create(m_tmp_oo_1, &
    7887              :                            template=m_siginv(ispin), &
    7888            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7889              :          CALL dbcsr_create(m_tmp_oo_2, &
    7890              :                            template=m_siginv(ispin), &
    7891            0 :                            matrix_type=dbcsr_type_no_symmetry)
    7892              :          CALL dbcsr_multiply("T", "N", 1.0_dp, &
    7893              :                              m_ST(ispin), &
    7894              :                              m_delta(ispin), &
    7895              :                              0.0_dp, m_tmp_oo_1, &
    7896            0 :                              filter_eps=eps_filter)
    7897              :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7898              :                              m_siginv(ispin), &
    7899              :                              m_tmp_oo_1, &
    7900              :                              0.0_dp, m_tmp_oo_2, &
    7901            0 :                              filter_eps=eps_filter)
    7902            0 :          CALL dbcsr_copy(m_zet(ispin), m_quench_t(ispin))
    7903              :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
    7904              :                              m_t(ispin), &
    7905              :                              m_tmp_oo_2, &
    7906              :                              0.0_dp, m_zet(ispin), &
    7907            0 :                              retain_sparsity=.TRUE.)
    7908            0 :          alpha = dbcsr_maxabs(m_zet(ispin))
    7909            0 :          WRITE (unit_nr, "(A50,2F20.10)") "Occupied-space projection of the step", alpha
    7910            0 :          CALL dbcsr_add(m_zet(ispin), m_delta(ispin), -1.0_dp, 1.0_dp)
    7911            0 :          alpha = dbcsr_maxabs(m_zet(ispin))
    7912            0 :          WRITE (unit_nr, "(A50,2F20.10)") "Virtual-space projection of the step", alpha
    7913            0 :          alpha = dbcsr_maxabs(m_delta(ispin))
    7914            0 :          WRITE (unit_nr, "(A50,2F20.10)") "Full step", alpha
    7915            0 :          CALL dbcsr_release(m_tmp_oo_1)
    7916            0 :          CALL dbcsr_release(m_tmp_oo_2)
    7917              : 
    7918              :       END DO
    7919              : 
    7920              :       ! clean up
    7921            0 :       DO ispin = 1, nspins
    7922            0 :          CALL release_submatrices(domain_prec(:, ispin))
    7923            0 :          CALL dbcsr_release(m_residue(ispin))
    7924            0 :          CALL dbcsr_release(m_residue_prev(ispin))
    7925            0 :          CALL dbcsr_release(m_step(ispin))
    7926            0 :          CALL dbcsr_release(m_zet(ispin))
    7927            0 :          CALL dbcsr_release(m_zet_prev(ispin))
    7928            0 :          CALL dbcsr_release(m_Hstep(ispin))
    7929            0 :          CALL dbcsr_release(m_f_vo(ispin))
    7930            0 :          CALL dbcsr_release(m_f_vv(ispin))
    7931            0 :          CALL dbcsr_release(m_s_vv(ispin))
    7932            0 :          CALL dbcsr_release(m_prec(ispin))
    7933            0 :          CALL dbcsr_release(m_STsiginv(ispin))
    7934              :       END DO !ispin
    7935            0 :       DEALLOCATE (domain_prec)
    7936            0 :       DEALLOCATE (m_residue)
    7937            0 :       DEALLOCATE (m_residue_prev)
    7938            0 :       DEALLOCATE (m_step)
    7939            0 :       DEALLOCATE (m_zet)
    7940            0 :       DEALLOCATE (m_zet_prev)
    7941            0 :       DEALLOCATE (m_prec)
    7942            0 :       DEALLOCATE (m_Hstep)
    7943            0 :       DEALLOCATE (m_s_vv)
    7944            0 :       DEALLOCATE (m_f_vv)
    7945            0 :       DEALLOCATE (m_f_vo)
    7946            0 :       DEALLOCATE (m_STsiginv)
    7947            0 :       DEALLOCATE (residue_max_norm)
    7948              : 
    7949            0 :       IF (.NOT. converged) THEN
    7950            0 :          CPABORT("Optimization not converged!")
    7951              :       END IF
    7952              : 
    7953              :       ! check that the step satisfies H.step=-grad
    7954              : 
    7955            0 :       CALL timestop(handle)
    7956              : 
    7957            0 :    END SUBROUTINE newton_grad_to_step
    7958              : 
    7959              : ! *****************************************************************************
    7960              : !> \brief Computes Hessian.X
    7961              : !> \param m_x_in ...
    7962              : !> \param m_x_out ...
    7963              : !> \param m_ks ...
    7964              : !> \param m_s ...
    7965              : !> \param m_siginv ...
    7966              : !> \param m_quench_t ...
    7967              : !> \param m_FTsiginv ...
    7968              : !> \param m_siginvTFTsiginv ...
    7969              : !> \param m_ST ...
    7970              : !> \param m_STsiginv ...
    7971              : !> \param m_s_vv ...
    7972              : !> \param m_ks_vv ...
    7973              : !> \param m_g_full ...
    7974              : !> \param m_t ...
    7975              : !> \param m_sig_sqrti_ii ...
    7976              : !> \param penalty_occ_vol ...
    7977              : !> \param normalize_orbitals ...
    7978              : !> \param penalty_occ_vol_prefactor ...
    7979              : !> \param eps_filter ...
    7980              : !> \param path_num ...
    7981              : !> \par History
    7982              : !>       2015.04 created [Rustam Z Khaliullin]
    7983              : !> \author Rustam Z Khaliullin
    7984              : ! **************************************************************************************************
    7985            0 :    SUBROUTINE apply_hessian(m_x_in, m_x_out, m_ks, m_s, m_siginv, &
    7986            0 :                             m_quench_t, m_FTsiginv, m_siginvTFTsiginv, m_ST, m_STsiginv, m_s_vv, &
    7987            0 :                             m_ks_vv, m_g_full, m_t, m_sig_sqrti_ii, penalty_occ_vol, &
    7988            0 :                             normalize_orbitals, penalty_occ_vol_prefactor, eps_filter, path_num)
    7989              : 
    7990              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_x_in, m_x_out, m_ks, m_s
    7991              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_siginv, m_quench_t, m_FTsiginv, &
    7992              :                                                             m_siginvTFTsiginv, m_ST, m_STsiginv
    7993              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_s_vv, m_ks_vv, m_g_full
    7994              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_t, m_sig_sqrti_ii
    7995              :       LOGICAL, INTENT(IN)                                :: penalty_occ_vol, normalize_orbitals
    7996              :       REAL(KIND=dp), DIMENSION(:), INTENT(IN)            :: penalty_occ_vol_prefactor
    7997              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
    7998              :       INTEGER, INTENT(IN)                                :: path_num
    7999              : 
    8000              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'apply_hessian'
    8001              : 
    8002              :       INTEGER                                            :: dim0, handle, ispin, nspins
    8003              :       REAL(KIND=dp)                                      :: penalty_prefactor_local, spin_factor
    8004            0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: tg_diagonal
    8005              :       TYPE(dbcsr_type)                                   :: m_tmp_no_1, m_tmp_no_2, m_tmp_oo_1, &
    8006              :                                                             m_tmp_x_in
    8007              : 
    8008            0 :       CALL timeset(routineN, handle)
    8009              : 
    8010              :       !JHU: test and use for unused debug variables
    8011            0 :       IF (penalty_occ_vol) penalty_prefactor_local = 1._dp
    8012            0 :       CPASSERT(SIZE(m_STsiginv) >= 0)
    8013            0 :       CPASSERT(SIZE(m_siginvTFTsiginv) >= 0)
    8014            0 :       CPASSERT(SIZE(m_s) >= 0)
    8015            0 :       CPASSERT(SIZE(m_g_full) >= 0)
    8016            0 :       CPASSERT(SIZE(m_FTsiginv) >= 0)
    8017              :       MARK_USED(m_siginvTFTsiginv)
    8018              :       MARK_USED(m_STsiginv)
    8019              :       MARK_USED(m_FTsiginv)
    8020              :       MARK_USED(m_g_full)
    8021              :       MARK_USED(m_s)
    8022              : 
    8023            0 :       nspins = SIZE(m_ks)
    8024              : 
    8025            0 :       IF (nspins == 1) THEN
    8026              :          spin_factor = 2.0_dp
    8027              :       ELSE
    8028            0 :          spin_factor = 1.0_dp
    8029              :       END IF
    8030              : 
    8031            0 :       DO ispin = 1, nspins
    8032              : 
    8033            0 :          penalty_prefactor_local = penalty_occ_vol_prefactor(ispin)/(2.0_dp*spin_factor)
    8034              : 
    8035              :          CALL dbcsr_create(m_tmp_oo_1, &
    8036              :                            template=m_siginv(ispin), &
    8037            0 :                            matrix_type=dbcsr_type_no_symmetry)
    8038              :          CALL dbcsr_create(m_tmp_no_1, &
    8039              :                            template=m_quench_t(ispin), &
    8040            0 :                            matrix_type=dbcsr_type_no_symmetry)
    8041              :          CALL dbcsr_create(m_tmp_no_2, &
    8042              :                            template=m_quench_t(ispin), &
    8043            0 :                            matrix_type=dbcsr_type_no_symmetry)
    8044              :          CALL dbcsr_create(m_tmp_x_in, &
    8045              :                            template=m_quench_t(ispin), &
    8046            0 :                            matrix_type=dbcsr_type_no_symmetry)
    8047              : 
    8048              :          ! transform the input X to take into account the normalization constraint
    8049            0 :          IF (normalize_orbitals) THEN
    8050              : 
    8051              :             ! H.D = ( (H.D) - ST.[tr(T).(H.D)]_ii ) . [sig_sqrti]_ii
    8052              : 
    8053              :             ! get [tr(T).HD]_ii
    8054            0 :             CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii(ispin))
    8055              :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    8056              :                                 m_x_in(ispin), &
    8057              :                                 m_ST(ispin), &
    8058              :                                 0.0_dp, m_tmp_oo_1, &
    8059            0 :                                 retain_sparsity=.TRUE.)
    8060            0 :             CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), nfullrows_total=dim0)
    8061            0 :             ALLOCATE (tg_diagonal(dim0))
    8062            0 :             CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    8063            0 :             CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    8064            0 :             CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    8065            0 :             DEALLOCATE (tg_diagonal)
    8066              : 
    8067            0 :             CALL dbcsr_copy(m_tmp_no_1, m_x_in(ispin))
    8068              :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    8069              :                                 m_t(ispin), &
    8070              :                                 m_tmp_oo_1, &
    8071              :                                 1.0_dp, m_tmp_no_1, &
    8072            0 :                                 filter_eps=eps_filter)
    8073              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8074              :                                 m_tmp_no_1, &
    8075              :                                 m_sig_sqrti_ii(ispin), &
    8076              :                                 0.0_dp, m_tmp_x_in, &
    8077            0 :                                 filter_eps=eps_filter)
    8078              : 
    8079              :          ELSE
    8080              : 
    8081            0 :             CALL dbcsr_copy(m_tmp_x_in, m_x_in(ispin))
    8082              : 
    8083              :          END IF ! normalize_orbitals
    8084              : 
    8085            0 :          IF (path_num == hessian_path_reuse) THEN
    8086              : 
    8087              :             ! apply pre-computed F_vv and S_vv to X
    8088              : 
    8089              : #if 0
    8090              : ! RZK-warning: negative sign at penalty_prefactor_local is that
    8091              : ! magical fix for the negative definite problem
    8092              : ! (since penalty_prefactor_local<0 the coeff before S_vv must
    8093              : ! be multiplied by -1 to take the step in the right direction)
    8094              : !CALL dbcsr_multiply("N","N",-4.0_dp*penalty_prefactor_local,&
    8095              : !        m_s_vv(ispin),&
    8096              : !        m_tmp_x_in,&
    8097              : !        0.0_dp,m_tmp_no_1,&
    8098              : !        filter_eps=eps_filter)
    8099              : !CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8100              : !CALL dbcsr_multiply("N","N",1.0_dp,&
    8101              : !        m_tmp_no_1,&
    8102              : !        m_siginv(ispin),&
    8103              : !        0.0_dp,m_x_out(ispin),&
    8104              : !        retain_sparsity=.TRUE.)
    8105              : 
    8106              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8107              :                                 m_s(1), &
    8108              :                                 m_tmp_x_in, &
    8109              :                                 0.0_dp, m_tmp_no_1, &
    8110              :                                 filter_eps=eps_filter)
    8111              :             CALL dbcsr_copy(m_x_out(ispin), m_quench_t(ispin))
    8112              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8113              :                                 m_tmp_no_1, &
    8114              :                                 m_siginv(ispin), &
    8115              :                                 0.0_dp, m_x_out(ispin), &
    8116              :                                 retain_sparsity=.TRUE.)
    8117              : 
    8118              : !CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8119              : !CALL dbcsr_multiply("N","N",1.0_dp,&
    8120              : !        m_s(1),&
    8121              : !        m_tmp_x_in,&
    8122              : !        0.0_dp,m_x_out(ispin),&
    8123              : !        retain_sparsity=.TRUE.)
    8124              : 
    8125              : #else
    8126              : 
    8127              :             ! debugging: only vv matrices, oo matrices are kronecker
    8128            0 :             CALL dbcsr_copy(m_x_out(ispin), m_quench_t(ispin))
    8129              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8130              :                                 m_ks_vv(ispin), &
    8131              :                                 m_tmp_x_in, &
    8132              :                                 0.0_dp, m_x_out(ispin), &
    8133            0 :                                 retain_sparsity=.TRUE.)
    8134              : 
    8135            0 :             CALL dbcsr_copy(m_tmp_no_2, m_quench_t(ispin))
    8136              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8137              :                                 m_s_vv(ispin), &
    8138              :                                 m_tmp_x_in, &
    8139              :                                 0.0_dp, m_tmp_no_2, &
    8140            0 :                                 retain_sparsity=.TRUE.)
    8141              :             CALL dbcsr_add(m_x_out(ispin), m_tmp_no_2, &
    8142            0 :                            1.0_dp, -4.0_dp*penalty_prefactor_local + 1.0_dp)
    8143              : #endif
    8144              : 
    8145              : !          ! F_vv.X.S_oo
    8146              : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8147              : !                  m_ks_vv(ispin),&
    8148              : !                  m_tmp_x_in,&
    8149              : !                  0.0_dp,m_tmp_no_1,&
    8150              : !                  filter_eps=eps_filter,&
    8151              : !                  )
    8152              : !          CALL dbcsr_copy(m_x_out(ispin),m_quench_t(ispin))
    8153              : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8154              : !                  m_tmp_no_1,&
    8155              : !                  m_siginv(ispin),&
    8156              : !                  0.0_dp,m_x_out(ispin),&
    8157              : !                  retain_sparsity=.TRUE.,&
    8158              : !                  )
    8159              : !
    8160              : !          ! S_vv.X.F_oo
    8161              : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8162              : !                  m_s_vv(ispin),&
    8163              : !                  m_tmp_x_in,&
    8164              : !                  0.0_dp,m_tmp_no_1,&
    8165              : !                  filter_eps=eps_filter,&
    8166              : !                  )
    8167              : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8168              : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8169              : !                  m_tmp_no_1,&
    8170              : !                  m_siginvTFTsiginv(ispin),&
    8171              : !                  0.0_dp,m_tmp_no_2,&
    8172              : !                  retain_sparsity=.TRUE.,&
    8173              : !                  )
    8174              : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8175              : !               1.0_dp,-1.0_dp)
    8176              : !! we have to add occ voll penalty here (the Svv termi (i.e. both Svv.D.Soo)
    8177              : !!  and STsiginv terms)
    8178              : !
    8179              : !         ! S_vo.X^t.F_vo
    8180              : !          CALL dbcsr_multiply("T","N",1.0_dp,&
    8181              : !                  m_tmp_x_in,&
    8182              : !                  m_g_full(ispin),&
    8183              : !                  0.0_dp,m_tmp_oo_1,&
    8184              : !                  filter_eps=eps_filter,&
    8185              : !                  )
    8186              : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8187              : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8188              : !                  m_STsiginv(ispin),&
    8189              : !                  m_tmp_oo_1,&
    8190              : !                  0.0_dp,m_tmp_no_2,&
    8191              : !                  retain_sparsity=.TRUE.,&
    8192              : !                  )
    8193              : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8194              : !                  1.0_dp,-1.0_dp)
    8195              : !
    8196              : !          ! S_vo.X^t.F_vo
    8197              : !          CALL dbcsr_multiply("T","N",1.0_dp,&
    8198              : !                  m_tmp_x_in,&
    8199              : !                  m_STsiginv(ispin),&
    8200              : !                  0.0_dp,m_tmp_oo_1,&
    8201              : !                  filter_eps=eps_filter,&
    8202              : !                  )
    8203              : !          CALL dbcsr_copy(m_tmp_no_2,m_quench_t(ispin))
    8204              : !          CALL dbcsr_multiply("N","N",1.0_dp,&
    8205              : !                  m_g_full(ispin),&
    8206              : !                  m_tmp_oo_1,&
    8207              : !                  0.0_dp,m_tmp_no_2,&
    8208              : !                  retain_sparsity=.TRUE.,&
    8209              : !                  )
    8210              : !          CALL dbcsr_add(m_x_out(ispin),m_tmp_no_2,&
    8211              : !                  1.0_dp,-1.0_dp)
    8212              : 
    8213            0 :          ELSE IF (path_num == hessian_path_assemble) THEN
    8214              : 
    8215              :             ! compute F_vv.X and S_vv.X directly
    8216              :             ! this path will be advantageous if the number
    8217              :             ! of PCG iterations is small
    8218            0 :             CPABORT("path is NYI")
    8219              : 
    8220              :          ELSE
    8221            0 :             CPABORT("illegal path")
    8222              :          END IF ! path
    8223              : 
    8224              :          ! transform the output to take into account the normalization constraint
    8225            0 :          IF (normalize_orbitals) THEN
    8226              : 
    8227              :             ! H.D = ( (H.D) - ST.[tr(T).(H.D)]_ii ) . [sig_sqrti]_ii
    8228              : 
    8229              :             ! get [tr(T).HD]_ii
    8230            0 :             CALL dbcsr_copy(m_tmp_oo_1, m_sig_sqrti_ii(ispin))
    8231              :             CALL dbcsr_multiply("T", "N", 1.0_dp, &
    8232              :                                 m_t(ispin), &
    8233              :                                 m_x_out(ispin), &
    8234              :                                 0.0_dp, m_tmp_oo_1, &
    8235            0 :                                 retain_sparsity=.TRUE.)
    8236            0 :             CALL dbcsr_get_info(m_sig_sqrti_ii(ispin), nfullrows_total=dim0)
    8237            0 :             ALLOCATE (tg_diagonal(dim0))
    8238            0 :             CALL dbcsr_get_diag(m_tmp_oo_1, tg_diagonal)
    8239            0 :             CALL dbcsr_set(m_tmp_oo_1, 0.0_dp)
    8240            0 :             CALL dbcsr_set_diag(m_tmp_oo_1, tg_diagonal)
    8241            0 :             DEALLOCATE (tg_diagonal)
    8242              : 
    8243              :             CALL dbcsr_multiply("N", "N", -1.0_dp, &
    8244              :                                 m_ST(ispin), &
    8245              :                                 m_tmp_oo_1, &
    8246              :                                 1.0_dp, m_x_out(ispin), &
    8247            0 :                                 retain_sparsity=.TRUE.)
    8248            0 :             CALL dbcsr_copy(m_tmp_no_1, m_x_out(ispin))
    8249              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
    8250              :                                 m_tmp_no_1, &
    8251              :                                 m_sig_sqrti_ii(ispin), &
    8252              :                                 0.0_dp, m_x_out(ispin), &
    8253            0 :                                 retain_sparsity=.TRUE.)
    8254              : 
    8255              :          END IF ! normalize_orbitals
    8256              : 
    8257              :          CALL dbcsr_scale(m_x_out(ispin), &
    8258            0 :                           2.0_dp*spin_factor)
    8259              : 
    8260            0 :          CALL dbcsr_release(m_tmp_oo_1)
    8261            0 :          CALL dbcsr_release(m_tmp_no_1)
    8262            0 :          CALL dbcsr_release(m_tmp_no_2)
    8263            0 :          CALL dbcsr_release(m_tmp_x_in)
    8264              : 
    8265              :       END DO !ispin
    8266              : 
    8267              :       ! there is one more part of the hessian that comes
    8268              :       ! from T-dependence of the KS matrix
    8269              :       ! it is neglected here
    8270              : 
    8271            0 :       CALL timestop(handle)
    8272              : 
    8273            0 :    END SUBROUTINE apply_hessian
    8274              : 
    8275              : ! *****************************************************************************
    8276              : !> \brief Serial code that constructs an approximate Hessian
    8277              : !> \param matrix_grad ...
    8278              : !> \param matrix_step ...
    8279              : !> \param matrix_S_ao ...
    8280              : !> \param matrix_F_ao ...
    8281              : !> \param matrix_S_mo ...
    8282              : !> \param matrix_F_mo ...
    8283              : !> \param matrix_S_vo ...
    8284              : !> \param matrix_F_vo ...
    8285              : !> \param quench_t ...
    8286              : !> \param penalty_occ_vol ...
    8287              : !> \param penalty_occ_vol_prefactor ...
    8288              : !> \param penalty_occ_vol_pf2 ...
    8289              : !> \param spin_factor ...
    8290              : !> \param eps_zero ...
    8291              : !> \param m_s ...
    8292              : !> \param para_env ...
    8293              : !> \param blacs_env ...
    8294              : !> \par History
    8295              : !>       2012.02 created [Rustam Z. Khaliullin]
    8296              : !> \author Rustam Z. Khaliullin
    8297              : ! **************************************************************************************************
    8298            0 :    SUBROUTINE hessian_diag_apply(matrix_grad, matrix_step, matrix_S_ao, &
    8299              :                                  matrix_F_ao, matrix_S_mo, matrix_F_mo, matrix_S_vo, matrix_F_vo, quench_t, &
    8300              :                                  penalty_occ_vol, penalty_occ_vol_prefactor, penalty_occ_vol_pf2, &
    8301              :                                  spin_factor, eps_zero, m_s, para_env, blacs_env)
    8302              : 
    8303              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_grad, matrix_step, matrix_S_ao, &
    8304              :                                                             matrix_F_ao, matrix_S_mo
    8305              :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_F_mo
    8306              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_S_vo, matrix_F_vo, quench_t
    8307              :       LOGICAL, INTENT(IN)                                :: penalty_occ_vol
    8308              :       REAL(KIND=dp), INTENT(IN)                          :: penalty_occ_vol_prefactor, &
    8309              :                                                             penalty_occ_vol_pf2, spin_factor, &
    8310              :                                                             eps_zero
    8311              :       TYPE(dbcsr_type), INTENT(IN)                       :: m_s
    8312              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    8313              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    8314              : 
    8315              :       CHARACTER(len=*), PARAMETER :: routineN = 'hessian_diag_apply'
    8316              : 
    8317              :       INTEGER :: ao_hori_offset, ao_vert_offset, block_col, block_row, col, H_size, handle, ii, &
    8318              :          INFO, jj, lev1_hori_offset, lev1_vert_offset, lev2_hori_offset, lev2_vert_offset, LWORK, &
    8319              :          nblkcols_tot, nblkrows_tot, ncores, orb_i, orb_j, row, unit_nr, zero_neg_eiv
    8320            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: ao_block_sizes, ao_domain_sizes, &
    8321            0 :                                                             mo_block_sizes
    8322            0 :       INTEGER, DIMENSION(:), POINTER                     :: ao_blk_sizes, mo_blk_sizes
    8323              :       LOGICAL                                            :: found, found_col, found_row
    8324              :       REAL(KIND=dp)                                      :: penalty_prefactor_local, test_error
    8325            0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: eigenvalues, Grad_vec, Step_vec, tmp, &
    8326            0 :                                                             tmpr, work
    8327            0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: F_ao_block, F_mo_block, H, Hinv, &
    8328            0 :                                                             new_block, S_ao_block, S_mo_block, &
    8329            0 :                                                             test, test2
    8330            0 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
    8331              :       TYPE(cp_logger_type), POINTER                      :: logger
    8332              :       TYPE(dbcsr_distribution_type)                      :: main_dist
    8333              :       TYPE(dbcsr_type)                                   :: matrix_F_ao_sym, matrix_F_mo_sym, &
    8334              :                                                             matrix_S_ao_sym, matrix_S_mo_sym
    8335              : 
    8336            0 :       CALL timeset(routineN, handle)
    8337              : 
    8338              :       ! get a useful output_unit
    8339            0 :       logger => cp_get_default_logger()
    8340            0 :       IF (logger%para_env%is_source()) THEN
    8341            0 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    8342              :       ELSE
    8343              :          unit_nr = -1
    8344              :       END IF
    8345              : 
    8346              :       !JHU use and test for unused debug variables
    8347            0 :       CPASSERT(ASSOCIATED(blacs_env))
    8348            0 :       CPASSERT(ASSOCIATED(para_env))
    8349              :       MARK_USED(blacs_env)
    8350              :       MARK_USED(para_env)
    8351              : 
    8352            0 :       CALL dbcsr_get_info(m_s, row_blk_size=ao_blk_sizes)
    8353            0 :       CALL dbcsr_get_info(matrix_S_vo, row_blk_size=ao_blk_sizes)
    8354            0 :       CALL dbcsr_get_info(matrix_F_vo, row_blk_size=ao_blk_sizes)
    8355              : 
    8356              :       ! serial code only
    8357            0 :       CALL dbcsr_get_info(matrix=matrix_S_ao, distribution=main_dist)
    8358            0 :       CALL dbcsr_distribution_get(main_dist, numnodes=ncores)
    8359            0 :       IF (ncores > 1) THEN
    8360            0 :          CPABORT("serial code only")
    8361              :       END IF
    8362              : 
    8363              :       CALL dbcsr_get_info(quench_t, row_blk_size=ao_blk_sizes, col_blk_size=mo_blk_sizes, &
    8364            0 :                           nblkrows_total=nblkrows_tot, nblkcols_total=nblkcols_tot)
    8365            0 :       CPASSERT(nblkrows_tot == nblkcols_tot)
    8366            0 :       ALLOCATE (mo_block_sizes(nblkcols_tot), ao_block_sizes(nblkcols_tot))
    8367            0 :       ALLOCATE (ao_domain_sizes(nblkcols_tot))
    8368            0 :       mo_block_sizes(:) = mo_blk_sizes(:)
    8369            0 :       ao_block_sizes(:) = ao_blk_sizes(:)
    8370            0 :       ao_domain_sizes(:) = 0
    8371              : 
    8372              :       CALL dbcsr_create(matrix_S_ao_sym, &
    8373              :                         template=matrix_S_ao, &
    8374            0 :                         matrix_type=dbcsr_type_no_symmetry)
    8375            0 :       CALL dbcsr_desymmetrize(matrix_S_ao, matrix_S_ao_sym)
    8376            0 :       CALL dbcsr_scale(matrix_S_ao_sym, 2.0_dp*spin_factor)
    8377              : 
    8378              :       CALL dbcsr_create(matrix_F_ao_sym, &
    8379              :                         template=matrix_F_ao, &
    8380            0 :                         matrix_type=dbcsr_type_no_symmetry)
    8381            0 :       CALL dbcsr_desymmetrize(matrix_F_ao, matrix_F_ao_sym)
    8382            0 :       CALL dbcsr_scale(matrix_F_ao_sym, 2.0_dp*spin_factor)
    8383              : 
    8384              :       CALL dbcsr_create(matrix_S_mo_sym, &
    8385              :                         template=matrix_S_mo, &
    8386            0 :                         matrix_type=dbcsr_type_no_symmetry)
    8387            0 :       CALL dbcsr_desymmetrize(matrix_S_mo, matrix_S_mo_sym)
    8388              : 
    8389              :       CALL dbcsr_create(matrix_F_mo_sym, &
    8390              :                         template=matrix_F_mo, &
    8391            0 :                         matrix_type=dbcsr_type_no_symmetry)
    8392            0 :       CALL dbcsr_desymmetrize(matrix_F_mo, matrix_F_mo_sym)
    8393              : 
    8394            0 :       IF (penalty_occ_vol) THEN
    8395            0 :          penalty_prefactor_local = penalty_occ_vol_prefactor/(2.0_dp*spin_factor)
    8396              :       ELSE
    8397            0 :          penalty_prefactor_local = 0.0_dp
    8398              :       END IF
    8399              : 
    8400            0 :       WRITE (unit_nr, *) "penalty_prefactor_local: ", penalty_prefactor_local
    8401            0 :       WRITE (unit_nr, *) "penalty_prefactor_2: ", penalty_occ_vol_pf2
    8402              : 
    8403              :       !CALL dbcsr_print(matrix_grad)
    8404              :       !CALL dbcsr_print(matrix_F_ao_sym)
    8405              :       !CALL dbcsr_print(matrix_S_ao_sym)
    8406              :       !CALL dbcsr_print(matrix_F_mo_sym)
    8407              :       !CALL dbcsr_print(matrix_S_mo_sym)
    8408              : 
    8409              :       ! loop over domains to find the size of the Hessian
    8410            0 :       H_size = 0
    8411            0 :       DO col = 1, nblkcols_tot
    8412              : 
    8413              :          ! find sizes of AO submatrices
    8414            0 :          DO row = 1, nblkrows_tot
    8415              : 
    8416              :             CALL dbcsr_get_block_p(quench_t, &
    8417            0 :                                    row, col, block_p, found)
    8418            0 :             IF (found) THEN
    8419            0 :                ao_domain_sizes(col) = ao_domain_sizes(col) + ao_blk_sizes(row)
    8420              :             END IF
    8421              : 
    8422              :          END DO
    8423              : 
    8424            0 :          H_size = H_size + ao_domain_sizes(col)*mo_block_sizes(col)
    8425              : 
    8426              :       END DO
    8427              : 
    8428            0 :       ALLOCATE (H(H_size, H_size))
    8429            0 :       H(:, :) = 0.0_dp
    8430              : 
    8431              :       ! fill the Hessian matrix
    8432            0 :       lev1_vert_offset = 0
    8433              :       ! loop over all pairs of fragments
    8434            0 :       DO row = 1, nblkcols_tot
    8435              : 
    8436            0 :          lev1_hori_offset = 0
    8437            0 :          DO col = 1, nblkcols_tot
    8438              : 
    8439              :             ! prepare blocks for the current row-column fragment pair
    8440            0 :             ALLOCATE (F_ao_block(ao_domain_sizes(row), ao_domain_sizes(col)))
    8441            0 :             ALLOCATE (S_ao_block(ao_domain_sizes(row), ao_domain_sizes(col)))
    8442            0 :             ALLOCATE (F_mo_block(mo_block_sizes(row), mo_block_sizes(col)))
    8443            0 :             ALLOCATE (S_mo_block(mo_block_sizes(row), mo_block_sizes(col)))
    8444              : 
    8445            0 :             F_ao_block(:, :) = 0.0_dp
    8446            0 :             S_ao_block(:, :) = 0.0_dp
    8447            0 :             F_mo_block(:, :) = 0.0_dp
    8448            0 :             S_mo_block(:, :) = 0.0_dp
    8449              : 
    8450              :             ! fill AO submatrices
    8451              :             ! loop over all blocks of the AO dbcsr matrix
    8452            0 :             ao_vert_offset = 0
    8453            0 :             DO block_row = 1, nblkcols_tot
    8454              : 
    8455              :                CALL dbcsr_get_block_p(quench_t, &
    8456            0 :                                       block_row, row, block_p, found_row)
    8457            0 :                IF (found_row) THEN
    8458              : 
    8459            0 :                   ao_hori_offset = 0
    8460            0 :                   DO block_col = 1, nblkcols_tot
    8461              : 
    8462              :                      CALL dbcsr_get_block_p(quench_t, &
    8463            0 :                                             block_col, col, block_p, found_col)
    8464            0 :                      IF (found_col) THEN
    8465              : 
    8466              :                         CALL dbcsr_get_block_p(matrix_F_ao_sym, &
    8467            0 :                                                block_row, block_col, block_p, found)
    8468            0 :                         IF (found) THEN
    8469              :                            ! copy the block into the submatrix
    8470              :                            F_ao_block(ao_vert_offset + 1:ao_vert_offset + ao_block_sizes(block_row), &
    8471              :                                       ao_hori_offset + 1:ao_hori_offset + ao_block_sizes(block_col)) &
    8472            0 :                               = block_p(:, :)
    8473              :                         END IF
    8474              : 
    8475              :                         CALL dbcsr_get_block_p(matrix_S_ao_sym, &
    8476            0 :                                                block_row, block_col, block_p, found)
    8477            0 :                         IF (found) THEN
    8478              :                            ! copy the block into the submatrix
    8479              :                            S_ao_block(ao_vert_offset + 1:ao_vert_offset + ao_block_sizes(block_row), &
    8480              :                                       ao_hori_offset + 1:ao_hori_offset + ao_block_sizes(block_col)) &
    8481            0 :                               = block_p(:, :)
    8482              :                         END IF
    8483              : 
    8484            0 :                         ao_hori_offset = ao_hori_offset + ao_block_sizes(block_col)
    8485              : 
    8486              :                      END IF
    8487              : 
    8488              :                   END DO
    8489              : 
    8490            0 :                   ao_vert_offset = ao_vert_offset + ao_block_sizes(block_row)
    8491              : 
    8492              :                END IF
    8493              : 
    8494              :             END DO
    8495              : 
    8496              :             ! fill MO submatrices
    8497            0 :             CALL dbcsr_get_block_p(matrix_F_mo_sym, row, col, block_p, found)
    8498            0 :             IF (found) THEN
    8499              :                ! copy the block into the submatrix
    8500            0 :                F_mo_block(1:mo_block_sizes(row), 1:mo_block_sizes(col)) = block_p(:, :)
    8501              :             END IF
    8502            0 :             CALL dbcsr_get_block_p(matrix_S_mo_sym, row, col, block_p, found)
    8503            0 :             IF (found) THEN
    8504              :                ! copy the block into the submatrix
    8505            0 :                S_mo_block(1:mo_block_sizes(row), 1:mo_block_sizes(col)) = block_p(:, :)
    8506              :             END IF
    8507              : 
    8508              :             !WRITE(*,*) "F_AO_BLOCK", row, col, ao_domain_sizes(row), ao_domain_sizes(col)
    8509              :             !DO ii=1,ao_domain_sizes(row)
    8510              :             !  WRITE(*,'(100F13.9)') F_ao_block(ii,:)
    8511              :             !ENDDO
    8512              :             !WRITE(*,*) "S_AO_BLOCK", row, col
    8513              :             !DO ii=1,ao_domain_sizes(row)
    8514              :             !  WRITE(*,'(100F13.9)') S_ao_block(ii,:)
    8515              :             !ENDDO
    8516              :             !WRITE(*,*) "F_MO_BLOCK", row, col
    8517              :             !DO ii=1,mo_block_sizes(row)
    8518              :             !  WRITE(*,'(100F13.9)') F_mo_block(ii,:)
    8519              :             !ENDDO
    8520              :             !WRITE(*,*) "S_MO_BLOCK", row, col, mo_block_sizes(row), mo_block_sizes(col)
    8521              :             !DO ii=1,mo_block_sizes(row)
    8522              :             !  WRITE(*,'(100F13.9)') S_mo_block(ii,:)
    8523              :             !ENDDO
    8524              : 
    8525              :             ! construct tensor products for the current row-column fragment pair
    8526              :             lev2_vert_offset = 0
    8527            0 :             DO orb_j = 1, mo_block_sizes(row)
    8528              : 
    8529              :                lev2_hori_offset = 0
    8530            0 :                DO orb_i = 1, mo_block_sizes(col)
    8531            0 :                   IF (orb_i == orb_j .AND. row == col) THEN
    8532              :                      H(lev1_vert_offset + lev2_vert_offset + 1:lev1_vert_offset + lev2_vert_offset + ao_domain_sizes(row), &
    8533              :                        lev1_hori_offset + lev2_hori_offset + 1:lev1_hori_offset + lev2_hori_offset + ao_domain_sizes(col)) &
    8534              :                         != -penalty_prefactor_local*S_ao_block(:,:)
    8535            0 :                         = F_ao_block(:, :) + S_ao_block(:, :)
    8536              : !=S_ao_block(:,:)
    8537              : !RZK-warning               =F_ao_block(:,:)+( 1.0_dp + penalty_prefactor_local )*S_ao_block(:,:)
    8538              : !               =S_mo_block(orb_j,orb_i)*F_ao_block(:,:)&
    8539              : !               -F_mo_block(orb_j,orb_i)*S_ao_block(:,:)&
    8540              : !               +penalty_prefactor_local*S_mo_block(orb_j,orb_i)*S_ao_block(:,:)
    8541              :                   END IF
    8542              :                   !WRITE(*,*) row, col, orb_j, orb_i, lev1_vert_offset+lev2_vert_offset+1, ao_domain_sizes(row),&
    8543              :                   !   lev1_hori_offset+lev2_hori_offset+1, ao_domain_sizes(col), S_mo_block(orb_j,orb_i)
    8544              : 
    8545            0 :                   lev2_hori_offset = lev2_hori_offset + ao_domain_sizes(col)
    8546              : 
    8547              :                END DO
    8548              : 
    8549            0 :                lev2_vert_offset = lev2_vert_offset + ao_domain_sizes(row)
    8550              : 
    8551              :             END DO
    8552              : 
    8553            0 :             lev1_hori_offset = lev1_hori_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8554              : 
    8555            0 :             DEALLOCATE (F_ao_block)
    8556            0 :             DEALLOCATE (S_ao_block)
    8557            0 :             DEALLOCATE (F_mo_block)
    8558            0 :             DEALLOCATE (S_mo_block)
    8559              : 
    8560              :          END DO ! col fragment
    8561              : 
    8562            0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(row)*mo_block_sizes(row)
    8563              : 
    8564              :       END DO ! row fragment
    8565              : 
    8566            0 :       CALL dbcsr_release(matrix_S_ao_sym)
    8567            0 :       CALL dbcsr_release(matrix_F_ao_sym)
    8568            0 :       CALL dbcsr_release(matrix_S_mo_sym)
    8569            0 :       CALL dbcsr_release(matrix_F_mo_sym)
    8570              : 
    8571              : !!    ! Two more terms of the Hessian: S_vo.D.F_vo and F_vo.D.S_vo
    8572              : !!    ! It seems that these terms break positive definite property of the Hessian
    8573              : !!    ALLOCATE(H1(H_size,H_size))
    8574              : !!    ALLOCATE(H2(H_size,H_size))
    8575              : !!    H1=0.0_dp
    8576              : !!    H2=0.0_dp
    8577              : !!    DO row = 1, nblkcols_tot
    8578              : !!
    8579              : !!       lev1_hori_offset=0
    8580              : !!       DO col = 1, nblkcols_tot
    8581              : !!
    8582              : !!          CALL dbcsr_get_block_p(matrix_F_vo,&
    8583              : !!                  row, col, block_p, found)
    8584              : !!          CALL dbcsr_get_block_p(matrix_S_vo,&
    8585              : !!                  row, col, block_p2, found2)
    8586              : !!
    8587              : !!          lev1_vert_offset=0
    8588              : !!          DO block_col = 1, nblkcols_tot
    8589              : !!
    8590              : !!             CALL dbcsr_get_block_p(quench_t,&
    8591              : !!                     row, block_col, p_new_block, found_row)
    8592              : !!
    8593              : !!             IF (found_row) THEN
    8594              : !!
    8595              : !!                ! determine offset in this short loop
    8596              : !!                lev2_vert_offset=0
    8597              : !!                DO block_row=1,row-1
    8598              : !!                   CALL dbcsr_get_block_p(quench_t,&
    8599              : !!                           block_row, block_col, p_new_block, found_col)
    8600              : !!                   IF (found_col) lev2_vert_offset=lev2_vert_offset+ao_block_sizes(block_row)
    8601              : !!                ENDDO
    8602              : !!                !!!!!!!! short loop
    8603              : !!
    8604              : !!                ! over all electrons of the block
    8605              : !!                DO orb_i=1, mo_block_sizes(col)
    8606              : !!
    8607              : !!                   ! into all possible locations
    8608              : !!                   DO orb_j=1, mo_block_sizes(block_col)
    8609              : !!
    8610              : !!                      ! column is copied several times
    8611              : !!                      DO copy=1, ao_domain_sizes(col)
    8612              : !!
    8613              : !!                         IF (found) THEN
    8614              : !!
    8615              : !!                            !WRITE(*,*) row, col, block_col, orb_i, orb_j, copy,&
    8616              : !!                            ! lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1,&
    8617              : !!                            ! lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy
    8618              : !!
    8619              : !!                            H1( lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1:&
    8620              : !!                                lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+ao_block_sizes(row),&
    8621              : !!                                lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy )&
    8622              : !!                              =block_p(:,orb_i)
    8623              : !!
    8624              : !!                         ENDIF ! found block in the data matrix
    8625              : !!
    8626              : !!                         IF (found2) THEN
    8627              : !!
    8628              : !!                            H2( lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+1:&
    8629              : !!                                lev1_vert_offset+(orb_j-1)*ao_domain_sizes(block_col)+lev2_vert_offset+ao_block_sizes(row),&
    8630              : !!                                lev1_hori_offset+(orb_i-1)*ao_domain_sizes(col)+copy )&
    8631              : !!                              =block_p2(:,orb_i)
    8632              : !!
    8633              : !!                         ENDIF ! found block in the data matrix
    8634              : !!
    8635              : !!                      ENDDO
    8636              : !!
    8637              : !!                   ENDDO
    8638              : !!
    8639              : !!                ENDDO
    8640              : !!
    8641              : !!                !lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8642              : !!
    8643              : !!             ENDIF ! found block in the quench matrix
    8644              : !!
    8645              : !!             lev1_vert_offset=lev1_vert_offset+&
    8646              : !!                ao_domain_sizes(block_col)*mo_block_sizes(block_col)
    8647              : !!
    8648              : !!          ENDDO
    8649              : !!
    8650              : !!          lev1_hori_offset=lev1_hori_offset+&
    8651              : !!             ao_domain_sizes(col)*mo_block_sizes(col)
    8652              : !!
    8653              : !!       ENDDO
    8654              : !!
    8655              : !!       !lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8656              : !!
    8657              : !!    ENDDO
    8658              : !!    H1(:,:)=H1(:,:)*2.0_dp*spin_factor
    8659              : !!    !!!WRITE(*,*) "F_vo"
    8660              : !!    !!!DO ii=1,H_size
    8661              : !!    !!! WRITE(*,'(100F13.9)') H1(ii,:)
    8662              : !!    !!!ENDDO
    8663              : !!    !!!WRITE(*,*) "S_vo"
    8664              : !!    !!!DO ii=1,H_size
    8665              : !!    !!! WRITE(*,'(100F13.9)') H2(ii,:)
    8666              : !!    !!!ENDDO
    8667              : !!    !!!!! add terms to the hessian
    8668              : !!    DO ii=1,H_size
    8669              : !!       DO jj=1,H_size
    8670              : !!! add penalty_occ_vol term
    8671              : !!          H(ii,jj)=H(ii,jj)-H1(ii,jj)*H2(jj,ii)-H1(jj,ii)*H2(ii,jj)
    8672              : !!       ENDDO
    8673              : !!    ENDDO
    8674              : !!    DEALLOCATE(H1)
    8675              : !!    DEALLOCATE(H2)
    8676              : 
    8677              : !!    ! S_vo.S_vo diagonal component due to determiant constraint
    8678              : !!    ! use grad vector temporarily
    8679              : !!    IF (penalty_occ_vol) THEN
    8680              : !!       ALLOCATE(Grad_vec(H_size))
    8681              : !!       Grad_vec(:)=0.0_dp
    8682              : !!       lev1_vert_offset=0
    8683              : !!       ! loop over all electron blocks
    8684              : !!       DO col = 1, nblkcols_tot
    8685              : !!
    8686              : !!          ! loop over AO-rows of the dbcsr matrix
    8687              : !!          lev2_vert_offset=0
    8688              : !!          DO row = 1, nblkrows_tot
    8689              : !!
    8690              : !!             CALL dbcsr_get_block_p(quench_t,&
    8691              : !!                     row, col, block_p, found_row)
    8692              : !!             IF (found_row) THEN
    8693              : !!
    8694              : !!                CALL dbcsr_get_block_p(matrix_S_vo,&
    8695              : !!                        row, col, block_p, found)
    8696              : !!                IF (found) THEN
    8697              : !!                   ! copy the data into the vector, column by column
    8698              : !!                   DO orb_i=1, mo_block_sizes(col)
    8699              : !!                      Grad_vec(lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1:&
    8700              : !!                               lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+ao_block_sizes(row))&
    8701              : !!                               =block_p(:,orb_i)
    8702              : !!                   ENDDO
    8703              : !!
    8704              : !!                ENDIF
    8705              : !!
    8706              : !!                lev2_vert_offset=lev2_vert_offset+ao_block_sizes(row)
    8707              : !!
    8708              : !!             ENDIF
    8709              : !!
    8710              : !!          ENDDO
    8711              : !!
    8712              : !!          lev1_vert_offset=lev1_vert_offset+ao_domain_sizes(col)*mo_block_sizes(col)
    8713              : !!
    8714              : !!       ENDDO ! loop over electron blocks
    8715              : !!       ! update H now
    8716              : !!       DO ii=1,H_size
    8717              : !!          DO jj=1,H_size
    8718              : !!             H(ii,jj)=H(ii,jj)+penalty_occ_vol_prefactor*&
    8719              : !!                      penalty_occ_vol_pf2*Grad_vec(ii)*Grad_vec(jj)
    8720              : !!          ENDDO
    8721              : !!       ENDDO
    8722              : !!       DEALLOCATE(Grad_vec)
    8723              : !!    ENDIF ! penalty_occ_vol
    8724              : 
    8725              : !S-1.G ! invert S using cholesky
    8726              : !S-1.G CALL dbcsr_create(m_prec_out,&
    8727              : !S-1.G         template=m_s,&
    8728              : !S-1.G         matrix_type=dbcsr_type_no_symmetry)
    8729              : !S-1.G CALL dbcsr_copy(m_prec_out,m_s)
    8730              : !S-1.G CALL dbcsr_cholesky_decompose(m_prec_out,&
    8731              : !S-1.G         para_env=para_env,&
    8732              : !S-1.G         blacs_env=blacs_env)
    8733              : !S-1.G CALL dbcsr_cholesky_invert(m_prec_out,&
    8734              : !S-1.G         para_env=para_env,&
    8735              : !S-1.G         blacs_env=blacs_env,&
    8736              : !S-1.G         uplo_to_full=.TRUE.)
    8737              : !S-1.G CALL dbcsr_multiply("N","N",1.0_dp,&
    8738              : !S-1.G         m_prec_out,&
    8739              : !S-1.G         matrix_grad,&
    8740              : !S-1.G         0.0_dp,matrix_step,&
    8741              : !S-1.G         filter_eps=1.0E-10_dp)
    8742              : !S-1.G !CALL dbcsr_release(m_prec_out)
    8743              : !S-1.G ALLOCATE(test3(H_size))
    8744              : 
    8745              :       ! convert gradient from the dbcsr matrix to the vector form
    8746            0 :       ALLOCATE (Grad_vec(H_size))
    8747            0 :       Grad_vec(:) = 0.0_dp
    8748            0 :       lev1_vert_offset = 0
    8749              :       ! loop over all electron blocks
    8750            0 :       DO col = 1, nblkcols_tot
    8751              : 
    8752              :          ! loop over AO-rows of the dbcsr matrix
    8753            0 :          lev2_vert_offset = 0
    8754            0 :          DO row = 1, nblkrows_tot
    8755              : 
    8756              :             CALL dbcsr_get_block_p(quench_t, &
    8757            0 :                                    row, col, block_p, found_row)
    8758            0 :             IF (found_row) THEN
    8759              : 
    8760              :                CALL dbcsr_get_block_p(matrix_grad, &
    8761            0 :                                       row, col, block_p, found)
    8762            0 :                IF (found) THEN
    8763              :                   ! copy the data into the vector, column by column
    8764            0 :                   DO orb_i = 1, mo_block_sizes(col)
    8765              :                      Grad_vec(lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + 1: &
    8766              :                               lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + ao_block_sizes(row)) &
    8767            0 :                         = block_p(:, orb_i)
    8768              : !WRITE(*,*) "GRAD: ", row, col, orb_i, lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1, ao_block_sizes(row)
    8769              :                   END DO
    8770              : 
    8771              :                END IF
    8772              : 
    8773              : !S-1.G CALL dbcsr_get_block_p(matrix_step,&
    8774              : !S-1.G         row, col, block_p, found)
    8775              : !S-1.G IF (found) THEN
    8776              : !S-1.G    ! copy the data into the vector, column by column
    8777              : !S-1.G    DO orb_i=1, mo_block_sizes(col)
    8778              : !S-1.G       test3(lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+1:&
    8779              : !S-1.G                lev1_vert_offset+ao_domain_sizes(col)*(orb_i-1)+lev2_vert_offset+ao_block_sizes(row))&
    8780              : !S-1.G                =block_p(:,orb_i)
    8781              : !S-1.G    ENDDO
    8782              : !S-1.G ENDIF
    8783              : 
    8784            0 :                lev2_vert_offset = lev2_vert_offset + ao_block_sizes(row)
    8785              : 
    8786              :             END IF
    8787              : 
    8788              :          END DO
    8789              : 
    8790            0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8791              : 
    8792              :       END DO ! loop over electron blocks
    8793              : 
    8794              :       !WRITE(*,*) "HESSIAN"
    8795              :       !DO ii=1,H_size
    8796              :       ! WRITE(*,*) ii
    8797              :       ! WRITE(*,'(20F14.10)') H(ii,:)
    8798              :       !ENDDO
    8799              : 
    8800              :       ! invert the Hessian
    8801            0 :       INFO = 0
    8802            0 :       ALLOCATE (Hinv(H_size, H_size))
    8803            0 :       Hinv(:, :) = H(:, :)
    8804              : 
    8805              :       ! before inverting diagonalize
    8806            0 :       ALLOCATE (eigenvalues(H_size))
    8807              :       ! Query the optimal workspace for dsyev
    8808            0 :       LWORK = -1
    8809            0 :       ALLOCATE (WORK(MAX(1, LWORK)))
    8810            0 :       CALL dsyev('V', 'L', H_size, Hinv, H_size, eigenvalues, WORK, LWORK, INFO)
    8811            0 :       LWORK = INT(WORK(1))
    8812            0 :       DEALLOCATE (WORK)
    8813              :       ! Allocate the workspace and solve the eigenproblem
    8814            0 :       ALLOCATE (WORK(MAX(1, LWORK)))
    8815            0 :       CALL dsyev('V', 'L', H_size, Hinv, H_size, eigenvalues, WORK, LWORK, INFO)
    8816            0 :       IF (INFO /= 0) THEN
    8817            0 :          WRITE (unit_nr, *) 'DSYEV ERROR MESSAGE: ', INFO
    8818            0 :          CPABORT("DSYEV failed")
    8819              :       END IF
    8820            0 :       DEALLOCATE (WORK)
    8821              : 
    8822              :       ! compute grad vector in the basis of Hessian eigenvectors
    8823            0 :       ALLOCATE (Step_vec(H_size))
    8824              :       ! Step_vec contains Grad_vec here
    8825            0 :       Step_vec(:) = MATMUL(TRANSPOSE(Hinv), Grad_vec)
    8826              : 
    8827              :       ! compute U.tr(U)-1 = error
    8828              :       !ALLOCATE(test(H_size,H_size))
    8829              :       !test(:,:)=MATMUL(TRANSPOSE(Hinv),Hinv)
    8830              :       !DO ii=1,H_size
    8831              :       !   test(ii,ii)=test(ii,ii)-1.0_dp
    8832              :       !ENDDO
    8833              :       !test_error=0.0_dp
    8834              :       !DO ii=1,H_size
    8835              :       !   DO jj=1,H_size
    8836              :       !      test_error=test_error+test(jj,ii)*test(jj,ii)
    8837              :       !   ENDDO
    8838              :       !ENDDO
    8839              :       !WRITE(*,*) "U.tr(U)-1 error: ", SQRT(test_error)
    8840              :       !DEALLOCATE(test)
    8841              : 
    8842              :       ! invert eigenvalues and use eigenvectors to compute the Hessian inverse
    8843              :       ! project out zero-eigenvalue directions
    8844            0 :       ALLOCATE (test(H_size, H_size))
    8845            0 :       zero_neg_eiv = 0
    8846            0 :       DO jj = 1, H_size
    8847            0 :          WRITE (unit_nr, "(I10,F20.10,F20.10)") jj, eigenvalues(jj), Step_vec(jj)
    8848            0 :          IF (eigenvalues(jj) > eps_zero) THEN
    8849            0 :             test(jj, :) = Hinv(:, jj)/eigenvalues(jj)
    8850              :          ELSE
    8851            0 :             test(jj, :) = Hinv(:, jj)*0.0_dp
    8852            0 :             zero_neg_eiv = zero_neg_eiv + 1
    8853              :          END IF
    8854              :       END DO
    8855            0 :       WRITE (unit_nr, *) 'ZERO OR NEGATIVE EIGENVALUES: ', zero_neg_eiv
    8856            0 :       DEALLOCATE (Step_vec)
    8857              : 
    8858            0 :       ALLOCATE (test2(H_size, H_size))
    8859            0 :       test2(:, :) = MATMUL(Hinv, test)
    8860            0 :       Hinv(:, :) = test2(:, :)
    8861            0 :       DEALLOCATE (test, test2)
    8862              : 
    8863              :       !! shift to kill singularity
    8864              :       !shift=0.0_dp
    8865              :       !IF (eigenvalues(1).lt.0.0_dp) THEN
    8866              :       !   CPABORT("Negative eigenvalue(s)")
    8867              :       !   shift=abs(eigenvalues(1))
    8868              :       !   WRITE(*,*) "Lowest eigenvalue: ", eigenvalues(1)
    8869              :       !ENDIF
    8870              :       !DO ii=1, H_size
    8871              :       !   IF (eigenvalues(ii).gt.eps_zero) THEN
    8872              :       !      shift=shift+min(1.0_dp,eigenvalues(ii))*1.0E-4_dp
    8873              :       !      EXIT
    8874              :       !   ENDIF
    8875              :       !ENDDO
    8876              :       !WRITE(*,*) "Hessian shift: ", shift
    8877              :       !DO ii=1, H_size
    8878              :       !   H(ii,ii)=H(ii,ii)+shift
    8879              :       !ENDDO
    8880              :       !! end shift
    8881              : 
    8882            0 :       DEALLOCATE (eigenvalues)
    8883              : 
    8884              : !!!!    Hinv=H
    8885              : !!!!    INFO=0
    8886              : !!!!    CALL dpotrf('L', H_size, Hinv, H_size, INFO )
    8887              : !!!!    IF( INFO/=0 ) THEN
    8888              : !!!!       WRITE(*,*) 'DPOTRF ERROR MESSAGE: ', INFO
    8889              : !!!!       CPABORT("DPOTRF failed")
    8890              : !!!!    END IF
    8891              : !!!!    CALL dpotri('L', H_size, Hinv, H_size, INFO )
    8892              : !!!!    IF( INFO/=0 ) THEN
    8893              : !!!!       WRITE(*,*) 'DPOTRI ERROR MESSAGE: ', INFO
    8894              : !!!!       CPABORT("DPOTRI failed")
    8895              : !!!!    END IF
    8896              : !!!!    ! complete the matrix
    8897              : !!!!    DO ii=1,H_size
    8898              : !!!!       DO jj=ii+1,H_size
    8899              : !!!!          Hinv(ii,jj)=Hinv(jj,ii)
    8900              : !!!!       ENDDO
    8901              : !!!!    ENDDO
    8902              : 
    8903              :       ! compute the inversion error
    8904            0 :       ALLOCATE (test(H_size, H_size))
    8905            0 :       test(:, :) = MATMUL(Hinv, H)
    8906            0 :       DO ii = 1, H_size
    8907            0 :          test(ii, ii) = test(ii, ii) - 1.0_dp
    8908              :       END DO
    8909            0 :       test_error = 0.0_dp
    8910            0 :       DO ii = 1, H_size
    8911            0 :          DO jj = 1, H_size
    8912            0 :             test_error = test_error + test(jj, ii)*test(jj, ii)
    8913              :          END DO
    8914              :       END DO
    8915            0 :       WRITE (unit_nr, *) "Hessian inversion error: ", SQRT(test_error)
    8916            0 :       DEALLOCATE (test)
    8917              : 
    8918              :       ! prepare the output vector
    8919            0 :       ALLOCATE (Step_vec(H_size))
    8920            0 :       ALLOCATE (tmp(H_size))
    8921            0 :       tmp(:) = MATMUL(Hinv, Grad_vec)
    8922              :       !tmp(:)=MATMUL(Hinv,test3)
    8923            0 :       Step_vec(:) = -1.0_dp*tmp(:)
    8924              : 
    8925            0 :       ALLOCATE (tmpr(H_size))
    8926            0 :       tmpr(:) = MATMUL(H, Step_vec)
    8927            0 :       tmp(:) = tmpr(:) + Grad_vec(:)
    8928            0 :       DEALLOCATE (tmpr)
    8929            0 :       WRITE (unit_nr, *) "NEWTOV step error: ", MAXVAL(ABS(tmp))
    8930              : 
    8931            0 :       DEALLOCATE (tmp)
    8932              : 
    8933            0 :       DEALLOCATE (H)
    8934            0 :       DEALLOCATE (Hinv)
    8935            0 :       DEALLOCATE (Grad_vec)
    8936              : 
    8937              : !S-1.G DEALLOCATE(test3)
    8938              : 
    8939              :       ! copy the step from the vector into the dbcsr matrix
    8940              : 
    8941              :       ! re-create the step matrix to remove all blocks
    8942              :       CALL dbcsr_create(matrix_step, &
    8943              :                         template=matrix_grad, &
    8944            0 :                         matrix_type=dbcsr_type_no_symmetry)
    8945            0 :       CALL dbcsr_work_create(matrix_step, work_mutable=.TRUE.)
    8946              : 
    8947            0 :       lev1_vert_offset = 0
    8948              :       ! loop over all electron blocks
    8949            0 :       DO col = 1, nblkcols_tot
    8950              : 
    8951              :          ! loop over AO-rows of the dbcsr matrix
    8952            0 :          lev2_vert_offset = 0
    8953            0 :          DO row = 1, nblkrows_tot
    8954              : 
    8955              :             CALL dbcsr_get_block_p(quench_t, &
    8956            0 :                                    row, col, block_p, found_row)
    8957            0 :             IF (found_row) THEN
    8958              :                ! copy the data column by column
    8959            0 :                ALLOCATE (new_block(ao_block_sizes(row), mo_block_sizes(col)))
    8960            0 :                DO orb_i = 1, mo_block_sizes(col)
    8961              :                   new_block(:, orb_i) = &
    8962              :                      Step_vec(lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + 1: &
    8963            0 :                               lev1_vert_offset + ao_domain_sizes(col)*(orb_i - 1) + lev2_vert_offset + ao_block_sizes(row))
    8964              :                END DO
    8965            0 :                CALL dbcsr_put_block(matrix_step, row, col, new_block)
    8966            0 :                DEALLOCATE (new_block)
    8967            0 :                lev2_vert_offset = lev2_vert_offset + ao_block_sizes(row)
    8968              :             END IF
    8969              : 
    8970              :          END DO
    8971              : 
    8972            0 :          lev1_vert_offset = lev1_vert_offset + ao_domain_sizes(col)*mo_block_sizes(col)
    8973              : 
    8974              :       END DO ! loop over electron blocks
    8975              : 
    8976            0 :       DEALLOCATE (Step_vec)
    8977              : 
    8978            0 :       CALL dbcsr_finalize(matrix_step)
    8979              : 
    8980              : !S-1.G CALL dbcsr_create(m_tmp_no_1,&
    8981              : !S-1.G         template=matrix_step,&
    8982              : !S-1.G         matrix_type=dbcsr_type_no_symmetry)
    8983              : !S-1.G CALL dbcsr_multiply("N","N",1.0_dp,&
    8984              : !S-1.G         m_prec_out,&
    8985              : !S-1.G         matrix_step,&
    8986              : !S-1.G         0.0_dp,m_tmp_no_1,&
    8987              : !S-1.G         filter_eps=1.0E-10_dp,&
    8988              : !S-1.G         )
    8989              : !S-1.G CALL dbcsr_copy(matrix_step,m_tmp_no_1)
    8990              : !S-1.G CALL dbcsr_release(m_tmp_no_1)
    8991              : !S-1.G CALL dbcsr_release(m_prec_out)
    8992              : 
    8993            0 :       DEALLOCATE (mo_block_sizes, ao_block_sizes)
    8994            0 :       DEALLOCATE (ao_domain_sizes)
    8995              : 
    8996              :       CALL dbcsr_create(matrix_S_ao_sym, &
    8997              :                         template=quench_t, &
    8998            0 :                         matrix_type=dbcsr_type_no_symmetry)
    8999            0 :       CALL dbcsr_copy(matrix_S_ao_sym, quench_t)
    9000              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9001              :                           matrix_F_ao, &
    9002              :                           matrix_step, &
    9003              :                           0.0_dp, matrix_S_ao_sym, &
    9004            0 :                           retain_sparsity=.TRUE.)
    9005              :       CALL dbcsr_create(matrix_F_ao_sym, &
    9006              :                         template=quench_t, &
    9007            0 :                         matrix_type=dbcsr_type_no_symmetry)
    9008            0 :       CALL dbcsr_copy(matrix_F_ao_sym, quench_t)
    9009              :       CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9010              :                           matrix_S_ao, &
    9011              :                           matrix_step, &
    9012              :                           0.0_dp, matrix_F_ao_sym, &
    9013            0 :                           retain_sparsity=.TRUE.)
    9014              :       CALL dbcsr_add(matrix_S_ao_sym, matrix_F_ao_sym, &
    9015            0 :                      1.0_dp, 1.0_dp)
    9016            0 :       CALL dbcsr_scale(matrix_S_ao_sym, 2.0_dp*spin_factor)
    9017              :       CALL dbcsr_add(matrix_S_ao_sym, matrix_grad, &
    9018            0 :                      1.0_dp, 1.0_dp)
    9019            0 :       test_error = dbcsr_maxabs(matrix_S_ao_sym)
    9020            0 :       WRITE (unit_nr, *) "NEWTOL step error: ", test_error
    9021            0 :       CALL dbcsr_release(matrix_S_ao_sym)
    9022            0 :       CALL dbcsr_release(matrix_F_ao_sym)
    9023              : 
    9024            0 :       CALL timestop(handle)
    9025              : 
    9026            0 :    END SUBROUTINE hessian_diag_apply
    9027              : 
    9028              : ! **************************************************************************************************
    9029              : !> \brief Optimization of ALMOs using trust region minimizers
    9030              : !> \param qs_env ...
    9031              : !> \param almo_scf_env ...
    9032              : !> \param optimizer   controls the optimization algorithm
    9033              : !> \param quench_t ...
    9034              : !> \param matrix_t_in ...
    9035              : !> \param matrix_t_out ...
    9036              : !> \param perturbation_only - perturbative (do not update Hamiltonian)
    9037              : !> \param special_case   to reduce the overhead special cases are implemented:
    9038              : !>                       xalmo_case_normal - no special case (i.e. xALMOs)
    9039              : !>                       xalmo_case_block_diag
    9040              : !>                       xalmo_case_fully_deloc
    9041              : !> \par History
    9042              : !>       2020.01 created [Rustam Z Khaliullin]
    9043              : !> \author Rustam Z Khaliullin
    9044              : ! **************************************************************************************************
    9045           18 :    SUBROUTINE almo_scf_xalmo_trustr(qs_env, almo_scf_env, optimizer, quench_t, &
    9046              :                                     matrix_t_in, matrix_t_out, perturbation_only, &
    9047              :                                     special_case)
    9048              : 
    9049              :       TYPE(qs_environment_type), POINTER                 :: qs_env
    9050              :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
    9051              :       TYPE(optimizer_options_type), INTENT(IN)           :: optimizer
    9052              :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: quench_t, matrix_t_in, matrix_t_out
    9053              :       LOGICAL, INTENT(IN)                                :: perturbation_only
    9054              :       INTEGER, INTENT(IN), OPTIONAL                      :: special_case
    9055              : 
    9056              :       CHARACTER(len=*), PARAMETER :: routineN = 'almo_scf_xalmo_trustr'
    9057              : 
    9058              :       INTEGER :: handle, ispin, iteration, iteration_type_to_report, my_special_case, ndomains, &
    9059              :          nspins, outer_iteration, prec_type, unit_nr
    9060           18 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
    9061              :       LOGICAL :: assume_t0_q0x, border_reached, inner_loop_success, normalize_orbitals, &
    9062              :          optimize_theta, penalty_occ_vol, reset_conjugator, same_position, scf_converged
    9063              :       REAL(kind=dp) :: beta, energy_start, energy_trial, eta, expected_reduction, &
    9064              :          fake_step_size_to_report, grad_norm_ratio, grad_norm_ref, loss_change_to_report, &
    9065              :          loss_start, loss_trial, model_grad_norm, penalty_amplitude, penalty_start, penalty_trial, &
    9066              :          radius_current, radius_max, real_temp, rho, spin_factor, step_norm, step_size, t1, &
    9067              :          t1outer, t2, t2outer, y_scalar
    9068           18 :       REAL(kind=dp), ALLOCATABLE, DIMENSION(:)           :: grad_norm_spin, &
    9069           18 :                                                             penalty_occ_vol_g_prefactor, &
    9070           18 :                                                             penalty_occ_vol_h_prefactor
    9071              :       TYPE(cp_logger_type), POINTER                      :: logger
    9072              :       TYPE(dbcsr_type)                                   :: m_s_inv
    9073           18 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:) :: FTsiginv, grad, m_model_Bd, m_model_d, &
    9074           18 :          m_model_hessian, m_model_hessian_inv, m_model_r, m_model_r_prev, m_model_rt, &
    9075           18 :          m_model_rt_prev, m_sig_sqrti_ii, m_theta, m_theta_trial, prev_step, siginvTFTsiginv, ST, &
    9076           18 :          step, STsiginv_0
    9077              :       TYPE(domain_submatrix_type), ALLOCATABLE, &
    9078           18 :          DIMENSION(:, :)                                 :: domain_model_hessian_inv, domain_r_down
    9079              : 
    9080              :       ! RZK-warning: number of temporary storage matrices can be reduced
    9081           18 :       CALL timeset(routineN, handle)
    9082              : 
    9083           18 :       t1outer = m_walltime()
    9084              : 
    9085           18 :       my_special_case = xalmo_case_normal
    9086           18 :       IF (PRESENT(special_case)) my_special_case = special_case
    9087              : 
    9088              :       ! get a useful output_unit
    9089           18 :       logger => cp_get_default_logger()
    9090           18 :       IF (logger%para_env%is_source()) THEN
    9091            9 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    9092              :       ELSE
    9093            9 :          unit_nr = -1
    9094              :       END IF
    9095              : 
    9096              :       ! Trust radius code is written to obviate the need in projected orbitals
    9097           18 :       assume_t0_q0x = .FALSE.
    9098              :       ! Smoothing of the orbitals have not been implemented
    9099           18 :       optimize_theta = .FALSE.
    9100              : 
    9101           18 :       nspins = almo_scf_env%nspins
    9102           18 :       IF (nspins == 1) THEN
    9103           18 :          spin_factor = 2.0_dp
    9104              :       ELSE
    9105            0 :          spin_factor = 1.0_dp
    9106              :       END IF
    9107              : 
    9108           18 :       IF (unit_nr > 0) THEN
    9109            9 :          WRITE (unit_nr, *)
    9110            1 :          SELECT CASE (my_special_case)
    9111              :          CASE (xalmo_case_block_diag)
    9112            1 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
    9113            2 :                " Optimization of block-diagonal ALMOs ", REPEAT("-", 21)
    9114              :          CASE (xalmo_case_fully_deloc)
    9115            0 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 20), &
    9116            0 :                " Optimization of fully delocalized MOs ", REPEAT("-", 20)
    9117              :          CASE (xalmo_case_normal)
    9118            8 :             WRITE (unit_nr, '(T2,A,A,A)') REPEAT("-", 27), &
    9119           17 :                " Optimization of XALMOs ", REPEAT("-", 28)
    9120              :          END SELECT
    9121            9 :          WRITE (unit_nr, *)
    9122              :          CALL trust_r_report(unit_nr, &
    9123              :                              iter_type=0, & ! print header, all values are ignored
    9124              :                              iteration=0, &
    9125              :                              radius=0.0_dp, &
    9126              :                              loss=0.0_dp, &
    9127              :                              delta_loss=0.0_dp, &
    9128              :                              grad_norm=0.0_dp, &
    9129              :                              predicted_reduction=0.0_dp, &
    9130              :                              rho=0.0_dp, &
    9131              :                              new=.TRUE., &
    9132            9 :                              time=0.0_dp)
    9133            9 :          WRITE (unit_nr, '(T2,A)') REPEAT("-", 79)
    9134              :       END IF
    9135              : 
    9136              :       ! penalty amplitude adjusts the strength of volume conservation
    9137           18 :       penalty_occ_vol = .FALSE.
    9138              :       !(almo_scf_env%penalty%occ_vol_method /= almo_occ_vol_penalty_none .AND. &
    9139              :       !                   my_special_case == xalmo_case_fully_deloc)
    9140           18 :       normalize_orbitals = penalty_occ_vol
    9141           18 :       penalty_amplitude = 0.0_dp !almo_scf_env%penalty%occ_vol_coeff
    9142           54 :       ALLOCATE (penalty_occ_vol_g_prefactor(nspins))
    9143           36 :       ALLOCATE (penalty_occ_vol_h_prefactor(nspins))
    9144           36 :       penalty_occ_vol_g_prefactor(:) = 0.0_dp
    9145           36 :       penalty_occ_vol_h_prefactor(:) = 0.0_dp
    9146              : 
    9147              :       ! here preconditioner is the Hessian of model function
    9148           18 :       prec_type = optimizer%preconditioner
    9149              : 
    9150           36 :       ALLOCATE (grad_norm_spin(nspins))
    9151           54 :       ALLOCATE (nocc(nspins))
    9152              : 
    9153              :       ! m_theta contains a set of variational parameters
    9154              :       ! that define one-electron orbitals (simple, projected, etc.)
    9155           72 :       ALLOCATE (m_theta(nspins))
    9156           36 :       DO ispin = 1, nspins
    9157              :          CALL dbcsr_create(m_theta(ispin), &
    9158              :                            template=matrix_t_out(ispin), &
    9159           36 :                            matrix_type=dbcsr_type_no_symmetry)
    9160              :       END DO
    9161              : 
    9162              :       ! create initial guess from the initial orbitals
    9163              :       CALL xalmo_initial_guess(m_guess=m_theta, &
    9164              :                                m_t_in=matrix_t_in, &
    9165              :                                m_t0=almo_scf_env%matrix_t_blk, &
    9166              :                                m_quench_t=quench_t, &
    9167              :                                m_overlap=almo_scf_env%matrix_s(1), &
    9168              :                                m_sigma_tmpl=almo_scf_env%matrix_sigma_inv, &
    9169              :                                nspins=nspins, &
    9170              :                                xalmo_history=almo_scf_env%xalmo_history, &
    9171              :                                assume_t0_q0x=assume_t0_q0x, &
    9172              :                                optimize_theta=optimize_theta, &
    9173              :                                envelope_amplitude=almo_scf_env%envelope_amplitude, &
    9174              :                                eps_filter=almo_scf_env%eps_filter, &
    9175              :                                order_lanczos=almo_scf_env%order_lanczos, &
    9176              :                                eps_lanczos=almo_scf_env%eps_lanczos, &
    9177              :                                max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
    9178           18 :                                nocc_of_domain=almo_scf_env%nocc_of_domain)
    9179              : 
    9180           18 :       ndomains = almo_scf_env%ndomains
    9181          218 :       ALLOCATE (domain_r_down(ndomains, nspins))
    9182           18 :       CALL init_submatrices(domain_r_down)
    9183          200 :       ALLOCATE (domain_model_hessian_inv(ndomains, nspins))
    9184           18 :       CALL init_submatrices(domain_model_hessian_inv)
    9185              : 
    9186           54 :       ALLOCATE (m_model_hessian(nspins))
    9187           54 :       ALLOCATE (m_model_hessian_inv(nspins))
    9188           54 :       ALLOCATE (siginvTFTsiginv(nspins))
    9189           54 :       ALLOCATE (STsiginv_0(nspins))
    9190           54 :       ALLOCATE (FTsiginv(nspins))
    9191           54 :       ALLOCATE (ST(nspins))
    9192           54 :       ALLOCATE (grad(nspins))
    9193           72 :       ALLOCATE (prev_step(nspins))
    9194           54 :       ALLOCATE (step(nspins))
    9195           54 :       ALLOCATE (m_sig_sqrti_ii(nspins))
    9196           54 :       ALLOCATE (m_model_r(nspins))
    9197           54 :       ALLOCATE (m_model_rt(nspins))
    9198           54 :       ALLOCATE (m_model_d(nspins))
    9199           54 :       ALLOCATE (m_model_Bd(nspins))
    9200           54 :       ALLOCATE (m_model_r_prev(nspins))
    9201           54 :       ALLOCATE (m_model_rt_prev(nspins))
    9202           54 :       ALLOCATE (m_theta_trial(nspins))
    9203              : 
    9204           36 :       DO ispin = 1, nspins
    9205              : 
    9206              :          ! init temporary storage
    9207              :          CALL dbcsr_create(m_model_hessian_inv(ispin), &
    9208              :                            template=almo_scf_env%matrix_ks(ispin), &
    9209           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9210              :          CALL dbcsr_create(m_model_hessian(ispin), &
    9211              :                            template=almo_scf_env%matrix_ks(ispin), &
    9212           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9213              :          CALL dbcsr_create(siginvTFTsiginv(ispin), &
    9214              :                            template=almo_scf_env%matrix_sigma(ispin), &
    9215           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9216              :          CALL dbcsr_create(STsiginv_0(ispin), &
    9217              :                            template=matrix_t_out(ispin), &
    9218           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9219              :          CALL dbcsr_create(FTsiginv(ispin), &
    9220              :                            template=matrix_t_out(ispin), &
    9221           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9222              :          CALL dbcsr_create(ST(ispin), &
    9223              :                            template=matrix_t_out(ispin), &
    9224           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9225              :          CALL dbcsr_create(grad(ispin), &
    9226              :                            template=matrix_t_out(ispin), &
    9227           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9228              :          CALL dbcsr_create(prev_step(ispin), &
    9229              :                            template=matrix_t_out(ispin), &
    9230           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9231              :          CALL dbcsr_create(step(ispin), &
    9232              :                            template=matrix_t_out(ispin), &
    9233           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9234              :          CALL dbcsr_create(m_sig_sqrti_ii(ispin), &
    9235              :                            template=almo_scf_env%matrix_sigma_inv(ispin), &
    9236           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9237              :          CALL dbcsr_create(m_model_r(ispin), &
    9238              :                            template=matrix_t_out(ispin), &
    9239           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9240              :          CALL dbcsr_create(m_model_rt(ispin), &
    9241              :                            template=matrix_t_out(ispin), &
    9242           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9243              :          CALL dbcsr_create(m_model_d(ispin), &
    9244              :                            template=matrix_t_out(ispin), &
    9245           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9246              :          CALL dbcsr_create(m_model_Bd(ispin), &
    9247              :                            template=matrix_t_out(ispin), &
    9248           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9249              :          CALL dbcsr_create(m_model_r_prev(ispin), &
    9250              :                            template=matrix_t_out(ispin), &
    9251           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9252              :          CALL dbcsr_create(m_model_rt_prev(ispin), &
    9253              :                            template=matrix_t_out(ispin), &
    9254           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9255              :          CALL dbcsr_create(m_theta_trial(ispin), &
    9256              :                            template=matrix_t_out(ispin), &
    9257           18 :                            matrix_type=dbcsr_type_no_symmetry)
    9258              : 
    9259           18 :          CALL dbcsr_set(step(ispin), 0.0_dp)
    9260           18 :          CALL dbcsr_set(prev_step(ispin), 0.0_dp)
    9261              : 
    9262              :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
    9263           18 :                              nfullrows_total=nocc(ispin))
    9264              : 
    9265              :          ! invert S domains if necessary
    9266              :          ! Note: domains for alpha and beta electrons might be different
    9267              :          ! that is why the inversion of the AO overlap is inside the spin loop
    9268           36 :          IF (my_special_case == xalmo_case_normal) THEN
    9269              : 
    9270              :             CALL construct_domain_s_inv( &
    9271              :                matrix_s=almo_scf_env%matrix_s(1), &
    9272              :                subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9273              :                dpattern=quench_t(ispin), &
    9274              :                map=almo_scf_env%domain_map(ispin), &
    9275           16 :                node_of_domain=almo_scf_env%cpu_of_domain)
    9276              : 
    9277              :          END IF
    9278              : 
    9279              :       END DO ! ispin
    9280              : 
    9281              :       ! invert metric for special case where metric is spin independent
    9282           18 :       IF (my_special_case == xalmo_case_block_diag) THEN
    9283              : 
    9284              :          CALL dbcsr_create(m_s_inv, &
    9285              :                            template=almo_scf_env%matrix_s(1), &
    9286            2 :                            matrix_type=dbcsr_type_no_symmetry)
    9287              :          CALL invert_Hotelling(m_s_inv, &
    9288              :                                almo_scf_env%matrix_s_blk(1), &
    9289              :                                threshold=almo_scf_env%eps_filter, &
    9290            2 :                                filter_eps=almo_scf_env%eps_filter)
    9291              : 
    9292           16 :       ELSE IF (my_special_case == xalmo_case_fully_deloc) THEN
    9293              : 
    9294              :          ! invert S using cholesky
    9295              :          CALL dbcsr_create(m_s_inv, &
    9296              :                            template=almo_scf_env%matrix_s(1), &
    9297            0 :                            matrix_type=dbcsr_type_no_symmetry)
    9298            0 :          CALL dbcsr_desymmetrize(almo_scf_env%matrix_s(1), m_s_inv)
    9299              :          CALL cp_dbcsr_cholesky_decompose(m_s_inv, &
    9300              :                                           para_env=almo_scf_env%para_env, &
    9301            0 :                                           blacs_env=almo_scf_env%blacs_env)
    9302              :          CALL cp_dbcsr_cholesky_invert(m_s_inv, &
    9303              :                                        para_env=almo_scf_env%para_env, &
    9304              :                                        blacs_env=almo_scf_env%blacs_env, &
    9305            0 :                                        uplo_to_full=.TRUE.)
    9306            0 :          CALL dbcsr_filter(m_s_inv, almo_scf_env%eps_filter)
    9307              : 
    9308              :       END IF ! s_inv
    9309              : 
    9310           18 :       radius_max = optimizer%max_trust_radius
    9311           18 :       radius_current = MIN(optimizer%initial_trust_radius, radius_max)
    9312              :       ! eta must be between 0 and 0.25
    9313           18 :       eta = MIN(MAX(optimizer%rho_do_not_update, 0.0_dp), 0.25_dp)
    9314              :       energy_start = 0.0_dp
    9315           18 :       energy_trial = 0.0_dp
    9316              :       penalty_start = 0.0_dp
    9317           18 :       penalty_trial = 0.0_dp
    9318              :       loss_start = 0.0_dp ! sum of the energy and penalty
    9319           18 :       loss_trial = 0.0_dp
    9320              : 
    9321           18 :       same_position = .FALSE.
    9322              : 
    9323              :       ! compute the energy
    9324              :       CALL main_var_to_xalmos_and_loss_func( &
    9325              :          almo_scf_env=almo_scf_env, &
    9326              :          qs_env=qs_env, &
    9327              :          m_main_var_in=m_theta, &
    9328              :          m_t_out=matrix_t_out, &
    9329              :          m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
    9330              :          energy_out=energy_start, &
    9331              :          penalty_out=penalty_start, &
    9332              :          m_FTsiginv_out=FTsiginv, &
    9333              :          m_siginvTFTsiginv_out=siginvTFTsiginv, &
    9334              :          m_ST_out=ST, &
    9335              :          m_STsiginv0_in=STsiginv_0, &
    9336              :          m_quench_t_in=quench_t, &
    9337              :          domain_r_down_in=domain_r_down, &
    9338              :          assume_t0_q0x=assume_t0_q0x, &
    9339              :          just_started=.TRUE., &
    9340              :          optimize_theta=optimize_theta, &
    9341              :          normalize_orbitals=normalize_orbitals, &
    9342              :          perturbation_only=perturbation_only, &
    9343              :          do_penalty=penalty_occ_vol, &
    9344           18 :          special_case=my_special_case)
    9345           18 :       loss_start = energy_start + penalty_start
    9346           18 :       IF (my_special_case == xalmo_case_block_diag) THEN
    9347            2 :          almo_scf_env%almo_scf_energy = energy_start
    9348              :       END IF
    9349           36 :       DO ispin = 1, nspins
    9350           36 :          IF (penalty_occ_vol) THEN
    9351              :             penalty_occ_vol_g_prefactor(ispin) = &
    9352            0 :                -2.0_dp*penalty_amplitude*spin_factor*nocc(ispin)
    9353            0 :             penalty_occ_vol_h_prefactor(ispin) = 0.0_dp
    9354              :          END IF
    9355              :       END DO ! ispin
    9356              : 
    9357              :       ! start the outer step-size-adjustment loop
    9358           18 :       scf_converged = .FALSE.
    9359          426 :       adjust_r_loop: DO outer_iteration = 1, optimizer%max_iter_outer_loop
    9360              : 
    9361              :          ! start the inner fixed-radius loop
    9362          426 :          border_reached = .FALSE.
    9363              : 
    9364          852 :          DO ispin = 1, nspins
    9365          426 :             CALL dbcsr_set(step(ispin), 0.0_dp)
    9366          852 :             CALL dbcsr_filter(step(ispin), almo_scf_env%eps_filter)
    9367              :          END DO
    9368              : 
    9369          426 :          IF (.NOT. same_position) THEN
    9370              : 
    9371          852 :             DO ispin = 1, nspins
    9372              : 
    9373              :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Compute model gradient"
    9374              :                CALL compute_gradient( &
    9375              :                   m_grad_out=grad(ispin), &
    9376              :                   m_ks=almo_scf_env%matrix_ks(ispin), &
    9377              :                   m_s=almo_scf_env%matrix_s(1), &
    9378              :                   m_t=matrix_t_out(ispin), &
    9379              :                   m_t0=almo_scf_env%matrix_t_blk(ispin), &
    9380              :                   m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9381              :                   m_quench_t=quench_t(ispin), &
    9382              :                   m_FTsiginv=FTsiginv(ispin), &
    9383              :                   m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9384              :                   m_ST=ST(ispin), &
    9385              :                   m_STsiginv0=STsiginv_0(ispin), &
    9386              :                   m_theta=m_theta(ispin), &
    9387              :                   m_sig_sqrti_ii=m_sig_sqrti_ii(ispin), &
    9388              :                   domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9389              :                   domain_r_down=domain_r_down(:, ispin), &
    9390              :                   cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9391              :                   domain_map=almo_scf_env%domain_map(ispin), &
    9392              :                   assume_t0_q0x=assume_t0_q0x, &
    9393              :                   optimize_theta=optimize_theta, &
    9394              :                   normalize_orbitals=normalize_orbitals, &
    9395              :                   penalty_occ_vol=penalty_occ_vol, &
    9396              :                   penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9397              :                   envelope_amplitude=almo_scf_env%envelope_amplitude, &
    9398              :                   eps_filter=almo_scf_env%eps_filter, &
    9399              :                   spin_factor=spin_factor, &
    9400          852 :                   special_case=my_special_case)
    9401              : 
    9402              :             END DO ! ispin
    9403              : 
    9404              :          END IF ! skip_grad
    9405              : 
    9406              :          ! check convergence and other exit criteria
    9407          852 :          DO ispin = 1, nspins
    9408          852 :             grad_norm_spin(ispin) = dbcsr_maxabs(grad(ispin))
    9409              :             !grad_norm_frob = dbcsr_frobenius_norm(grad(ispin)) / &
    9410              :             !                 dbcsr_frobenius_norm(quench_t(ispin))
    9411              :          END DO ! ispin
    9412         1278 :          grad_norm_ref = MAXVAL(grad_norm_spin)
    9413              : 
    9414          426 :          t2outer = m_walltime()
    9415              :          CALL trust_r_report(unit_nr, &
    9416              :                              iter_type=1, & ! only some data is important
    9417              :                              iteration=outer_iteration, &
    9418              :                              loss=loss_start, &
    9419              :                              delta_loss=0.0_dp, &
    9420              :                              grad_norm=grad_norm_ref, &
    9421              :                              predicted_reduction=0.0_dp, &
    9422              :                              rho=0.0_dp, &
    9423              :                              radius=radius_current, &
    9424              :                              new=.NOT. same_position, &
    9425          426 :                              time=t2outer - t1outer)
    9426          426 :          t1outer = m_walltime()
    9427              : 
    9428          426 :          IF (grad_norm_ref <= optimizer%eps_error) THEN
    9429           18 :             scf_converged = .TRUE.
    9430           18 :             border_reached = .FALSE.
    9431           18 :             expected_reduction = 0.0_dp
    9432           18 :             IF (.NOT. (optimizer%early_stopping_on .AND. outer_iteration == 1)) &
    9433              :                EXIT adjust_r_loop
    9434              :          ELSE
    9435              :             scf_converged = .FALSE.
    9436              :          END IF
    9437              : 
    9438          816 :          DO ispin = 1, nspins
    9439              : 
    9440          408 :             CALL dbcsr_copy(m_model_r(ispin), grad(ispin))
    9441          408 :             CALL dbcsr_scale(m_model_r(ispin), -1.0_dp)
    9442              : 
    9443          408 :             IF (my_special_case == xalmo_case_block_diag .OR. &
    9444              :                 my_special_case == xalmo_case_fully_deloc) THEN
    9445              : 
    9446              :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Multiply Sinv.r"
    9447              :                CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9448              :                                    m_s_inv, &
    9449              :                                    m_model_r(ispin), &
    9450              :                                    0.0_dp, m_model_rt(ispin), &
    9451           92 :                                    filter_eps=almo_scf_env%eps_filter)
    9452              : 
    9453          316 :             ELSE IF (my_special_case == xalmo_case_normal) THEN
    9454              : 
    9455              :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Multiply Sinv_xx.r"
    9456              :                CALL apply_domain_operators( &
    9457              :                   matrix_in=m_model_r(ispin), &
    9458              :                   matrix_out=m_model_rt(ispin), &
    9459              :                   operator1=almo_scf_env%domain_s_inv(:, ispin), &
    9460              :                   dpattern=quench_t(ispin), &
    9461              :                   map=almo_scf_env%domain_map(ispin), &
    9462              :                   node_of_domain=almo_scf_env%cpu_of_domain, &
    9463              :                   my_action=0, &
    9464          316 :                   filter_eps=almo_scf_env%eps_filter)
    9465              : 
    9466              :             ELSE
    9467            0 :                CPABORT("Unknown XALMO special case")
    9468              :             END IF
    9469              : 
    9470          816 :             CALL dbcsr_copy(m_model_d(ispin), m_model_rt(ispin))
    9471              : 
    9472              :          END DO ! ispin
    9473              : 
    9474              :          ! compute model Hessian
    9475          408 :          IF (.NOT. same_position) THEN
    9476              : 
    9477              :             SELECT CASE (prec_type)
    9478              :             CASE (xalmo_prec_domain)
    9479              : 
    9480              :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Compute model Hessian"
    9481          816 :                DO ispin = 1, nspins
    9482              :                   CALL compute_preconditioner( &
    9483              :                      domain_prec_out=almo_scf_env%domain_preconditioner(:, ispin), &
    9484              :                      m_prec_out=m_model_hessian(ispin), &
    9485              :                      m_ks=almo_scf_env%matrix_ks(ispin), &
    9486              :                      m_s=almo_scf_env%matrix_s(1), &
    9487              :                      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9488              :                      m_quench_t=quench_t(ispin), &
    9489              :                      m_FTsiginv=FTsiginv(ispin), &
    9490              :                      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9491              :                      m_ST=ST(ispin), &
    9492              :                      para_env=almo_scf_env%para_env, &
    9493              :                      blacs_env=almo_scf_env%blacs_env, &
    9494              :                      nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    9495              :                      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9496              :                      domain_r_down=domain_r_down(:, ispin), &
    9497              :                      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9498              :                      domain_map=almo_scf_env%domain_map(ispin), &
    9499              :                      assume_t0_q0x=.FALSE., &
    9500              :                      penalty_occ_vol=penalty_occ_vol, &
    9501              :                      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9502              :                      eps_filter=almo_scf_env%eps_filter, &
    9503              :                      neg_thr=0.5_dp, &
    9504              :                      spin_factor=spin_factor, &
    9505              :                      skip_inversion=.TRUE., &
    9506          816 :                      special_case=my_special_case)
    9507              :                END DO ! ispin
    9508              : 
    9509              :             CASE DEFAULT
    9510              : 
    9511          408 :                CPABORT("Unknown preconditioner")
    9512              : 
    9513              :             END SELECT ! preconditioner type fork
    9514              : 
    9515              :          END IF  ! not same position
    9516              : 
    9517              :          ! print the header (argument values are ignored)
    9518              :          CALL fixed_r_report(unit_nr, &
    9519              :                              iter_type=0, &
    9520              :                              iteration=0, &
    9521              :                              step_size=0.0_dp, &
    9522              :                              border_reached=.FALSE., &
    9523              :                              curvature=0.0_dp, &
    9524              :                              grad_norm_ratio=0.0_dp, &
    9525          408 :                              time=0.0_dp)
    9526              : 
    9527              :          IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Start inner loop"
    9528              : 
    9529          408 :          t1 = m_walltime()
    9530          408 :          inner_loop_success = .FALSE.
    9531              :          ! trustr_steihaug, trustr_cauchy, trustr_dogleg
    9532          490 :          fixed_r_loop: DO iteration = 1, optimizer%max_iter
    9533              : 
    9534              :             ! Step 2. Get curvature. If negative, step to the border
    9535          490 :             y_scalar = 0.0_dp
    9536          980 :             DO ispin = 1, nspins
    9537              : 
    9538              :                ! Get B.d
    9539          490 :                IF (my_special_case == xalmo_case_block_diag .OR. &
    9540              :                    my_special_case == xalmo_case_fully_deloc) THEN
    9541              : 
    9542              :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9543              :                                       m_model_hessian(ispin), &
    9544              :                                       m_model_d(ispin), &
    9545              :                                       0.0_dp, m_model_Bd(ispin), &
    9546           92 :                                       filter_eps=almo_scf_env%eps_filter)
    9547              : 
    9548              :                ELSE
    9549              : 
    9550              :                   CALL apply_domain_operators( &
    9551              :                      matrix_in=m_model_d(ispin), &
    9552              :                      matrix_out=m_model_Bd(ispin), &
    9553              :                      operator1=almo_scf_env%domain_preconditioner(:, ispin), &
    9554              :                      dpattern=quench_t(ispin), &
    9555              :                      map=almo_scf_env%domain_map(ispin), &
    9556              :                      node_of_domain=almo_scf_env%cpu_of_domain, &
    9557              :                      my_action=0, &
    9558          398 :                      filter_eps=almo_scf_env%eps_filter)
    9559              : 
    9560              :                END IF ! special case
    9561              : 
    9562              :                ! Get y=d^T.B.d
    9563          490 :                CALL dbcsr_dot(m_model_d(ispin), m_model_Bd(ispin), real_temp)
    9564          980 :                y_scalar = y_scalar + real_temp
    9565              : 
    9566              :             END DO ! ispin
    9567              :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Curvature: ", y_scalar
    9568              : 
    9569              :             ! step to the border
    9570          490 :             IF (y_scalar < 0.0_dp) THEN
    9571              : 
    9572              :                CALL step_size_to_border( &
    9573              :                   step_size_out=step_size, &
    9574              :                   metric_in=almo_scf_env%matrix_s, &
    9575              :                   position_in=step, &
    9576              :                   direction_in=m_model_d, &
    9577              :                   trust_radius_in=radius_current, &
    9578              :                   quench_t_in=quench_t, &
    9579              :                   eps_filter_in=almo_scf_env%eps_filter &
    9580            0 :                   )
    9581              : 
    9582            0 :                DO ispin = 1, nspins
    9583            0 :                   CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9584              :                END DO
    9585              : 
    9586            0 :                border_reached = .TRUE.
    9587            0 :                inner_loop_success = .TRUE.
    9588              : 
    9589              :                CALL predicted_reduction( &
    9590              :                   reduction_out=expected_reduction, &
    9591              :                   grad_in=grad, &
    9592              :                   step_in=step, &
    9593              :                   hess_in=m_model_hessian, &
    9594              :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9595              :                   quench_t_in=quench_t, &
    9596              :                   special_case=my_special_case, &
    9597              :                   eps_filter=almo_scf_env%eps_filter, &
    9598              :                   domain_map=almo_scf_env%domain_map, &
    9599              :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9600            0 :                   )
    9601              : 
    9602            0 :                t2 = m_walltime()
    9603              :                CALL fixed_r_report(unit_nr, &
    9604              :                                    iter_type=2, &
    9605              :                                    iteration=iteration, &
    9606              :                                    step_size=step_size, &
    9607              :                                    border_reached=border_reached, &
    9608              :                                    curvature=y_scalar, &
    9609              :                                    grad_norm_ratio=expected_reduction, &
    9610            0 :                                    time=t2 - t1)
    9611              : 
    9612              :                EXIT fixed_r_loop ! the inner loop
    9613              : 
    9614              :             END IF ! y is negative
    9615              : 
    9616              :             ! Step 3. Compute the step size along the direction
    9617          490 :             step_size = 0.0_dp
    9618          980 :             DO ispin = 1, nspins
    9619          490 :                CALL dbcsr_dot(m_model_r(ispin), m_model_rt(ispin), real_temp)
    9620          980 :                step_size = step_size + real_temp
    9621              :             END DO ! ispin
    9622          490 :             step_size = step_size/y_scalar
    9623              :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Proposed step size: ", step_size
    9624              : 
    9625              :             ! Update the step matrix
    9626          980 :             DO ispin = 1, nspins
    9627          490 :                CALL dbcsr_copy(prev_step(ispin), step(ispin))
    9628          980 :                CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9629              :             END DO
    9630              : 
    9631              :             ! Compute step norm
    9632              :             CALL contravariant_matrix_norm( &
    9633              :                norm_out=step_norm, &
    9634              :                matrix_in=step, &
    9635              :                metric_in=almo_scf_env%matrix_s, &
    9636              :                quench_t_in=quench_t, &
    9637              :                eps_filter_in=almo_scf_env%eps_filter &
    9638          490 :                )
    9639              :             IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9640              : 
    9641              :             ! Do not step beyond the trust radius
    9642          490 :             IF (step_norm > radius_current) THEN
    9643              : 
    9644              :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Norm is too large"
    9645              :                CALL step_size_to_border( &
    9646              :                   step_size_out=step_size, &
    9647              :                   metric_in=almo_scf_env%matrix_s, &
    9648              :                   position_in=prev_step, &
    9649              :                   direction_in=m_model_d, &
    9650              :                   trust_radius_in=radius_current, &
    9651              :                   quench_t_in=quench_t, &
    9652              :                   eps_filter_in=almo_scf_env%eps_filter &
    9653           34 :                   )
    9654              :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step size to border: ", step_size
    9655              : 
    9656           68 :                DO ispin = 1, nspins
    9657           34 :                   CALL dbcsr_copy(step(ispin), prev_step(ispin))
    9658           68 :                   CALL dbcsr_add(step(ispin), m_model_d(ispin), 1.0_dp, step_size)
    9659              :                END DO
    9660              : 
    9661              :                IF (debug_mode) THEN
    9662              :                   ! Compute step norm
    9663              :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Extra norm evaluation"
    9664              :                   CALL contravariant_matrix_norm( &
    9665              :                      norm_out=step_norm, &
    9666              :                      matrix_in=step, &
    9667              :                      metric_in=almo_scf_env%matrix_s, &
    9668              :                      quench_t_in=quench_t, &
    9669              :                      eps_filter_in=almo_scf_env%eps_filter &
    9670              :                      )
    9671              :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9672              :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Current radius: ", radius_current
    9673              :                END IF
    9674              : 
    9675           34 :                border_reached = .TRUE.
    9676           34 :                inner_loop_success = .TRUE.
    9677              : 
    9678              :                CALL predicted_reduction( &
    9679              :                   reduction_out=expected_reduction, &
    9680              :                   grad_in=grad, &
    9681              :                   step_in=step, &
    9682              :                   hess_in=m_model_hessian, &
    9683              :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9684              :                   quench_t_in=quench_t, &
    9685              :                   special_case=my_special_case, &
    9686              :                   eps_filter=almo_scf_env%eps_filter, &
    9687              :                   domain_map=almo_scf_env%domain_map, &
    9688              :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9689           34 :                   )
    9690              : 
    9691           34 :                t2 = m_walltime()
    9692              :                CALL fixed_r_report(unit_nr, &
    9693              :                                    iter_type=3, &
    9694              :                                    iteration=iteration, &
    9695              :                                    step_size=step_size, &
    9696              :                                    border_reached=border_reached, &
    9697              :                                    curvature=y_scalar, &
    9698              :                                    grad_norm_ratio=expected_reduction, &
    9699           34 :                                    time=t2 - t1)
    9700              : 
    9701              :                EXIT fixed_r_loop ! the inner loop
    9702              : 
    9703              :             END IF
    9704              : 
    9705          456 :             IF (optimizer%trustr_algorithm == trustr_cauchy) THEN
    9706              :                ! trustr_steihaug, trustr_cauchy, trustr_dogleg
    9707              : 
    9708           80 :                border_reached = .FALSE.
    9709           80 :                inner_loop_success = .TRUE.
    9710              : 
    9711              :                CALL predicted_reduction( &
    9712              :                   reduction_out=expected_reduction, &
    9713              :                   grad_in=grad, &
    9714              :                   step_in=step, &
    9715              :                   hess_in=m_model_hessian, &
    9716              :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9717              :                   quench_t_in=quench_t, &
    9718              :                   special_case=my_special_case, &
    9719              :                   eps_filter=almo_scf_env%eps_filter, &
    9720              :                   domain_map=almo_scf_env%domain_map, &
    9721              :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9722           80 :                   )
    9723              : 
    9724           80 :                t2 = m_walltime()
    9725              :                CALL fixed_r_report(unit_nr, &
    9726              :                                    iter_type=5, & ! Cauchy point
    9727              :                                    iteration=iteration, &
    9728              :                                    step_size=step_size, &
    9729              :                                    border_reached=border_reached, &
    9730              :                                    curvature=y_scalar, &
    9731              :                                    grad_norm_ratio=expected_reduction, &
    9732           80 :                                    time=t2 - t1)
    9733              : 
    9734              :                EXIT fixed_r_loop ! the inner loop
    9735              : 
    9736          376 :             ELSE IF (optimizer%trustr_algorithm == trustr_dogleg) THEN
    9737              : 
    9738              :                ! invert or pseudo-invert B
    9739          268 :                SELECT CASE (prec_type)
    9740              :                CASE (xalmo_prec_domain)
    9741              : 
    9742              :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Pseudo-invert model Hessian"
    9743          268 :                   IF (special_case == xalmo_case_block_diag) THEN ! non-overlapping diagonal blocks
    9744              : 
    9745          156 :                      DO ispin = 1, nspins
    9746              :                         CALL pseudo_invert_diagonal_blk( &
    9747              :                            matrix_in=m_model_hessian(ispin), &
    9748              :                            matrix_out=m_model_hessian_inv(ispin), &
    9749              :                            nocc=almo_scf_env%nocc_of_domain(:, ispin) &
    9750          156 :                            )
    9751              :                      END DO
    9752              : 
    9753          190 :                   ELSE IF (special_case == xalmo_case_fully_deloc) THEN ! the entire system is a block
    9754              : 
    9755              :                      ! invert using cholesky decomposition
    9756            0 :                      DO ispin = 1, nspins
    9757              :                         CALL dbcsr_copy(m_model_hessian_inv(ispin), &
    9758            0 :                                         m_model_hessian(ispin))
    9759              :                         CALL cp_dbcsr_cholesky_decompose(m_model_hessian_inv(ispin), &
    9760              :                                                          para_env=almo_scf_env%para_env, &
    9761            0 :                                                          blacs_env=almo_scf_env%blacs_env)
    9762              :                         CALL cp_dbcsr_cholesky_invert(m_model_hessian_inv(ispin), &
    9763              :                                                       para_env=almo_scf_env%para_env, &
    9764              :                                                       blacs_env=almo_scf_env%blacs_env, &
    9765            0 :                                                       uplo_to_full=.TRUE.)
    9766              :                         CALL dbcsr_filter(m_model_hessian_inv(ispin), &
    9767            0 :                                           almo_scf_env%eps_filter)
    9768              :                      END DO
    9769              : 
    9770              :                   ELSE
    9771              : 
    9772          380 :                      DO ispin = 1, nspins
    9773              :                         CALL construct_domain_preconditioner( &
    9774              :                            matrix_main=m_model_hessian(ispin), &
    9775              :                            subm_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9776              :                            subm_r_down=domain_r_down(:, ispin), &
    9777              :                            matrix_trimmer=quench_t(ispin), &
    9778              :                            dpattern=quench_t(ispin), &
    9779              :                            map=almo_scf_env%domain_map(ispin), &
    9780              :                            node_of_domain=almo_scf_env%cpu_of_domain, &
    9781              :                            preconditioner=domain_model_hessian_inv(:, ispin), &
    9782              :                            use_trimmer=.FALSE., &
    9783              :                            my_action=0, & ! do not do domain (1-r0) projection
    9784              :                            skip_inversion=.FALSE. &
    9785          380 :                            )
    9786              :                      END DO
    9787              : 
    9788              :                   END IF ! special_case
    9789              : 
    9790              :                   ! slower but more reliable way to get inverted hessian
    9791              :                   !DO ispin = 1, nspins
    9792              :                   !   CALL compute_preconditioner( &
    9793              :                   !      domain_prec_out=domain_model_hessian_inv(:, ispin), &
    9794              :                   !      m_prec_out=m_model_hessian_inv(ispin), & ! RZK-warning: this one is not inverted if DOMAINs
    9795              :                   !      m_ks=almo_scf_env%matrix_ks(ispin), &
    9796              :                   !      m_s=almo_scf_env%matrix_s(1), &
    9797              :                   !      m_siginv=almo_scf_env%matrix_sigma_inv(ispin), &
    9798              :                   !      m_quench_t=quench_t(ispin), &
    9799              :                   !      m_FTsiginv=FTsiginv(ispin), &
    9800              :                   !      m_siginvTFTsiginv=siginvTFTsiginv(ispin), &
    9801              :                   !      m_ST=ST(ispin), &
    9802              :                   !      para_env=almo_scf_env%para_env, &
    9803              :                   !      blacs_env=almo_scf_env%blacs_env, &
    9804              :                   !      nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
    9805              :                   !      domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
    9806              :                   !      domain_r_down=domain_r_down(:, ispin), &
    9807              :                   !      cpu_of_domain=almo_scf_env%cpu_of_domain, &
    9808              :                   !      domain_map=almo_scf_env%domain_map(ispin), &
    9809              :                   !      assume_t0_q0x=.FALSE., &
    9810              :                   !      penalty_occ_vol=penalty_occ_vol, &
    9811              :                   !      penalty_occ_vol_prefactor=penalty_occ_vol_g_prefactor(ispin), &
    9812              :                   !      eps_filter=almo_scf_env%eps_filter, &
    9813              :                   !      neg_thr=1.0E10_dp, &
    9814              :                   !      spin_factor=spin_factor, &
    9815              :                   !      skip_inversion=.FALSE., &
    9816              :                   !      special_case=my_special_case)
    9817              :                   !ENDDO ! ispin
    9818              : 
    9819              :                CASE DEFAULT
    9820              : 
    9821          268 :                   CPABORT("Unknown preconditioner")
    9822              : 
    9823              :                END SELECT ! preconditioner type fork
    9824              : 
    9825              :                ! get pB = Binv.m_model_r = -Binv.grad
    9826          536 :                DO ispin = 1, nspins
    9827              : 
    9828              :                   ! Get B.d
    9829          268 :                   IF (my_special_case == xalmo_case_block_diag .OR. &
    9830          268 :                       my_special_case == xalmo_case_fully_deloc) THEN
    9831              : 
    9832              :                      CALL dbcsr_multiply("N", "N", 1.0_dp, &
    9833              :                                          m_model_hessian_inv(ispin), &
    9834              :                                          m_model_r(ispin), &
    9835              :                                          0.0_dp, m_model_Bd(ispin), &
    9836           78 :                                          filter_eps=almo_scf_env%eps_filter)
    9837              : 
    9838              :                   ELSE
    9839              : 
    9840              :                      CALL apply_domain_operators( &
    9841              :                         matrix_in=m_model_r(ispin), &
    9842              :                         matrix_out=m_model_Bd(ispin), &
    9843              :                         operator1=domain_model_hessian_inv(:, ispin), &
    9844              :                         dpattern=quench_t(ispin), &
    9845              :                         map=almo_scf_env%domain_map(ispin), &
    9846              :                         node_of_domain=almo_scf_env%cpu_of_domain, &
    9847              :                         my_action=0, &
    9848          190 :                         filter_eps=almo_scf_env%eps_filter)
    9849              : 
    9850              :                   END IF ! special case
    9851              : 
    9852              :                END DO ! ispin
    9853              : 
    9854              :                ! Compute norm of pB
    9855              :                CALL contravariant_matrix_norm( &
    9856              :                   norm_out=step_norm, &
    9857              :                   matrix_in=m_model_Bd, &
    9858              :                   metric_in=almo_scf_env%matrix_s, &
    9859              :                   quench_t_in=quench_t, &
    9860              :                   eps_filter_in=almo_scf_env%eps_filter &
    9861          268 :                   )
    9862              :                IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...pB norm: ", step_norm
    9863              : 
    9864              :                ! Do not step beyond the trust radius
    9865          268 :                IF (step_norm <= radius_current) THEN
    9866              : 
    9867              :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Full dogleg"
    9868              : 
    9869          266 :                   border_reached = .FALSE.
    9870              : 
    9871          532 :                   DO ispin = 1, nspins
    9872          532 :                      CALL dbcsr_copy(step(ispin), m_model_Bd(ispin))
    9873              :                   END DO
    9874              : 
    9875          266 :                   fake_step_size_to_report = 2.0_dp
    9876          266 :                   iteration_type_to_report = 6
    9877              : 
    9878              :                ELSE ! take a shorter dogleg step
    9879              : 
    9880              :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...pB norm is too large"
    9881              : 
    9882            2 :                   border_reached = .TRUE.
    9883              : 
    9884              :                   ! compute the dogleg vector = pB - pU
    9885              :                   ! this destroys -Binv.grad content
    9886            4 :                   DO ispin = 1, nspins
    9887            4 :                      CALL dbcsr_add(m_model_Bd(ispin), step(ispin), 1.0_dp, -1.0_dp)
    9888              :                   END DO
    9889              : 
    9890              :                   CALL step_size_to_border( &
    9891              :                      step_size_out=step_size, &
    9892              :                      metric_in=almo_scf_env%matrix_s, &
    9893              :                      position_in=step, &
    9894              :                      direction_in=m_model_Bd, &
    9895              :                      trust_radius_in=radius_current, &
    9896              :                      quench_t_in=quench_t, &
    9897              :                      eps_filter_in=almo_scf_env%eps_filter &
    9898            2 :                      )
    9899              :                   IF (unit_nr > 0 .AND. debug_mode) WRITE (unit_nr, *) "...Step size to border: ", step_size
    9900            2 :                   IF (step_size > 1.0_dp .OR. step_size < 0.0_dp) THEN
    9901            0 :                      IF (unit_nr > 0) &
    9902            0 :                         WRITE (unit_nr, *) "Step size (", step_size, ") must lie inside (0,1)"
    9903            0 :                      CPABORT("Wrong dog leg step. We should never end up here.")
    9904              :                   END IF
    9905              : 
    9906            4 :                   DO ispin = 1, nspins
    9907            4 :                      CALL dbcsr_add(step(ispin), m_model_Bd(ispin), 1.0_dp, step_size)
    9908              :                   END DO
    9909              : 
    9910            2 :                   fake_step_size_to_report = 1.0_dp + step_size
    9911            2 :                   iteration_type_to_report = 7
    9912              : 
    9913              :                END IF ! full or partial dogleg?
    9914              : 
    9915              :                IF (debug_mode) THEN
    9916              :                   ! Compute step norm
    9917              :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Extra norm evaluation"
    9918              :                   CALL contravariant_matrix_norm( &
    9919              :                      norm_out=step_norm, &
    9920              :                      matrix_in=step, &
    9921              :                      metric_in=almo_scf_env%matrix_s, &
    9922              :                      quench_t_in=quench_t, &
    9923              :                      eps_filter_in=almo_scf_env%eps_filter &
    9924              :                      )
    9925              :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Step norm: ", step_norm
    9926              :                   IF (unit_nr > 0) WRITE (unit_nr, *) "...Current radius: ", radius_current
    9927              :                END IF
    9928              : 
    9929              :                CALL predicted_reduction( &
    9930              :                   reduction_out=expected_reduction, &
    9931              :                   grad_in=grad, &
    9932              :                   step_in=step, &
    9933              :                   hess_in=m_model_hessian, &
    9934              :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9935              :                   quench_t_in=quench_t, &
    9936              :                   special_case=my_special_case, &
    9937              :                   eps_filter=almo_scf_env%eps_filter, &
    9938              :                   domain_map=almo_scf_env%domain_map, &
    9939              :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9940          268 :                   )
    9941              : 
    9942          268 :                inner_loop_success = .TRUE.
    9943              : 
    9944          268 :                t2 = m_walltime()
    9945              :                CALL fixed_r_report(unit_nr, &
    9946              :                                    iter_type=iteration_type_to_report, &
    9947              :                                    iteration=iteration, &
    9948              :                                    step_size=fake_step_size_to_report, &
    9949              :                                    border_reached=border_reached, &
    9950              :                                    curvature=y_scalar, &
    9951              :                                    grad_norm_ratio=expected_reduction, &
    9952          268 :                                    time=t2 - t1)
    9953              : 
    9954              :                EXIT fixed_r_loop ! the inner loop
    9955              : 
    9956              :             END IF ! Non-iterative subproblem methods exit here
    9957              : 
    9958              :             ! Step 4: update model gradient
    9959          216 :             DO ispin = 1, nspins
    9960              :                ! save previous data
    9961          108 :                CALL dbcsr_copy(m_model_r_prev(ispin), m_model_r(ispin))
    9962              :                CALL dbcsr_add(m_model_r(ispin), m_model_Bd(ispin), &
    9963          216 :                               1.0_dp, -step_size)
    9964              :             END DO ! ispin
    9965              : 
    9966              :             ! Model grad norm
    9967          216 :             DO ispin = 1, nspins
    9968          216 :                grad_norm_spin(ispin) = dbcsr_maxabs(m_model_r(ispin))
    9969              :                !grad_norm_frob = dbcsr_frobenius_norm(grad(ispin)) / &
    9970              :                !                 dbcsr_frobenius_norm(quench_t(ispin))
    9971              :             END DO ! ispin
    9972          324 :             model_grad_norm = MAXVAL(grad_norm_spin)
    9973              : 
    9974              :             ! Check norm reduction
    9975          108 :             grad_norm_ratio = model_grad_norm/grad_norm_ref
    9976          108 :             IF (grad_norm_ratio < optimizer%model_grad_norm_ratio) THEN
    9977              : 
    9978           26 :                border_reached = .FALSE.
    9979           26 :                inner_loop_success = .TRUE.
    9980              : 
    9981              :                CALL predicted_reduction( &
    9982              :                   reduction_out=expected_reduction, &
    9983              :                   grad_in=grad, &
    9984              :                   step_in=step, &
    9985              :                   hess_in=m_model_hessian, &
    9986              :                   hess_submatrix_in=almo_scf_env%domain_preconditioner, &
    9987              :                   quench_t_in=quench_t, &
    9988              :                   special_case=my_special_case, &
    9989              :                   eps_filter=almo_scf_env%eps_filter, &
    9990              :                   domain_map=almo_scf_env%domain_map, &
    9991              :                   cpu_of_domain=almo_scf_env%cpu_of_domain &
    9992           26 :                   )
    9993              : 
    9994           26 :                t2 = m_walltime()
    9995              :                CALL fixed_r_report(unit_nr, &
    9996              :                                    iter_type=4, &
    9997              :                                    iteration=iteration, &
    9998              :                                    step_size=step_size, &
    9999              :                                    border_reached=border_reached, &
   10000              :                                    curvature=y_scalar, &
   10001              :                                    grad_norm_ratio=expected_reduction, &
   10002           26 :                                    time=t2 - t1)
   10003              : 
   10004              :                EXIT fixed_r_loop ! the inner loop
   10005              : 
   10006              :             END IF
   10007              : 
   10008              :             ! Step 5: update model direction
   10009          164 :             DO ispin = 1, nspins
   10010              :                ! save previous data
   10011          164 :                CALL dbcsr_copy(m_model_rt_prev(ispin), m_model_rt(ispin))
   10012              :             END DO ! ispin
   10013              : 
   10014          164 :             DO ispin = 1, nspins
   10015              : 
   10016           82 :                IF (my_special_case == xalmo_case_block_diag .OR. &
   10017           82 :                    my_special_case == xalmo_case_fully_deloc) THEN
   10018              : 
   10019              :                   CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10020              :                                       m_s_inv, &
   10021              :                                       m_model_r(ispin), &
   10022              :                                       0.0_dp, m_model_rt(ispin), &
   10023            0 :                                       filter_eps=almo_scf_env%eps_filter)
   10024              : 
   10025           82 :                ELSE IF (my_special_case == xalmo_case_normal) THEN
   10026              : 
   10027              :                   CALL apply_domain_operators( &
   10028              :                      matrix_in=m_model_r(ispin), &
   10029              :                      matrix_out=m_model_rt(ispin), &
   10030              :                      operator1=almo_scf_env%domain_s_inv(:, ispin), &
   10031              :                      dpattern=quench_t(ispin), &
   10032              :                      map=almo_scf_env%domain_map(ispin), &
   10033              :                      node_of_domain=almo_scf_env%cpu_of_domain, &
   10034              :                      my_action=0, &
   10035           82 :                      filter_eps=almo_scf_env%eps_filter)
   10036              : 
   10037              :                END IF
   10038              : 
   10039              :             END DO ! ispin
   10040              : 
   10041              :             CALL compute_cg_beta( &
   10042              :                beta=beta, &
   10043              :                reset_conjugator=reset_conjugator, &
   10044              :                conjugator=optimizer%conjugator, &
   10045              :                grad=m_model_r(:), &
   10046              :                prev_grad=m_model_r_prev(:), &
   10047              :                step=m_model_rt(:), &
   10048              :                prev_step=m_model_rt_prev(:) &
   10049           82 :                )
   10050              : 
   10051          164 :             DO ispin = 1, nspins
   10052              :                ! update direction
   10053          164 :                CALL dbcsr_add(m_model_d(ispin), m_model_rt(ispin), beta, 1.0_dp)
   10054              :             END DO ! ispin
   10055              : 
   10056           82 :             t2 = m_walltime()
   10057              :             CALL fixed_r_report(unit_nr, &
   10058              :                                 iter_type=1, &
   10059              :                                 iteration=iteration, &
   10060              :                                 step_size=step_size, &
   10061              :                                 border_reached=border_reached, &
   10062              :                                 curvature=y_scalar, &
   10063              :                                 grad_norm_ratio=grad_norm_ratio, &
   10064           82 :                                 time=t2 - t1)
   10065           82 :             t1 = m_walltime()
   10066              : 
   10067              :          END DO fixed_r_loop
   10068              :          !!!! done with the inner loop
   10069              :          ! the inner loop must return: step, predicted reduction,
   10070              :          ! whether it reached the border and completed successfully
   10071              : 
   10072              :          IF (.NOT. inner_loop_success) THEN
   10073            0 :             CPABORT("Inner loop did not produce solution")
   10074              :          END IF
   10075              : 
   10076          816 :          DO ispin = 1, nspins
   10077              : 
   10078          408 :             CALL dbcsr_copy(m_theta_trial(ispin), m_theta(ispin))
   10079          816 :             CALL dbcsr_add(m_theta_trial(ispin), step(ispin), 1.0_dp, 1.0_dp)
   10080              : 
   10081              :          END DO ! ispin
   10082              : 
   10083              :          ! compute the energy
   10084              :          !IF (.NOT. same_position) THEN
   10085              :          CALL main_var_to_xalmos_and_loss_func( &
   10086              :             almo_scf_env=almo_scf_env, &
   10087              :             qs_env=qs_env, &
   10088              :             m_main_var_in=m_theta_trial, &
   10089              :             m_t_out=matrix_t_out, &
   10090              :             m_sig_sqrti_ii_out=m_sig_sqrti_ii, &
   10091              :             energy_out=energy_trial, &
   10092              :             penalty_out=penalty_trial, &
   10093              :             m_FTsiginv_out=FTsiginv, &
   10094              :             m_siginvTFTsiginv_out=siginvTFTsiginv, &
   10095              :             m_ST_out=ST, &
   10096              :             m_STsiginv0_in=STsiginv_0, &
   10097              :             m_quench_t_in=quench_t, &
   10098              :             domain_r_down_in=domain_r_down, &
   10099              :             assume_t0_q0x=assume_t0_q0x, &
   10100              :             just_started=.FALSE., &
   10101              :             optimize_theta=optimize_theta, &
   10102              :             normalize_orbitals=normalize_orbitals, &
   10103              :             perturbation_only=perturbation_only, &
   10104              :             do_penalty=penalty_occ_vol, &
   10105          408 :             special_case=my_special_case)
   10106          408 :          loss_trial = energy_trial + penalty_trial
   10107              :          !ENDIF ! not same_position
   10108              : 
   10109          408 :          rho = (loss_trial - loss_start)/expected_reduction
   10110          408 :          loss_change_to_report = loss_trial - loss_start
   10111              : 
   10112          408 :          IF (rho < 0.25_dp) THEN
   10113            0 :             radius_current = 0.25_dp*radius_current
   10114              :          ELSE
   10115          408 :             IF (rho > 0.75_dp .AND. border_reached) THEN
   10116            2 :                radius_current = MIN(2.0_dp*radius_current, radius_max)
   10117              :             END IF
   10118              :          END IF ! radius adjustment
   10119              : 
   10120          408 :          IF (rho > eta) THEN
   10121          816 :             DO ispin = 1, nspins
   10122          816 :                CALL dbcsr_copy(m_theta(ispin), m_theta_trial(ispin))
   10123              :             END DO ! ispin
   10124          408 :             loss_start = loss_trial
   10125          408 :             energy_start = energy_trial
   10126          408 :             penalty_start = penalty_trial
   10127          408 :             same_position = .FALSE.
   10128          408 :             IF (my_special_case == xalmo_case_block_diag) THEN
   10129           92 :                almo_scf_env%almo_scf_energy = energy_trial
   10130              :             END IF
   10131              :          ELSE
   10132            0 :             same_position = .TRUE.
   10133            0 :             IF (my_special_case == xalmo_case_block_diag) THEN
   10134            0 :                almo_scf_env%almo_scf_energy = energy_start
   10135              :             END IF
   10136              :          END IF ! finalize step
   10137              : 
   10138          408 :          t2outer = m_walltime()
   10139              :          CALL trust_r_report(unit_nr, &
   10140              :                              iter_type=2, &
   10141              :                              iteration=outer_iteration, &
   10142              :                              loss=loss_trial, &
   10143              :                              delta_loss=loss_change_to_report, &
   10144              :                              grad_norm=0.0_dp, &
   10145              :                              predicted_reduction=expected_reduction, &
   10146              :                              rho=rho, &
   10147              :                              radius=radius_current, &
   10148              :                              new=.NOT. same_position, &
   10149          408 :                              time=t2outer - t1outer)
   10150          426 :          t1outer = m_walltime()
   10151              : 
   10152              :       END DO adjust_r_loop
   10153              : 
   10154              :       ! post SCF-loop calculations
   10155           18 :       IF (scf_converged) THEN
   10156              : 
   10157              :          CALL wrap_up_xalmo_scf( &
   10158              :             qs_env=qs_env, &
   10159              :             almo_scf_env=almo_scf_env, &
   10160              :             perturbation_in=perturbation_only, &
   10161              :             m_xalmo_in=matrix_t_out, &
   10162              :             m_quench_in=quench_t, &
   10163           18 :             energy_inout=energy_start)
   10164              : 
   10165              :       END IF ! if converged
   10166              : 
   10167           36 :       DO ispin = 1, nspins
   10168           18 :          CALL dbcsr_release(m_model_hessian_inv(ispin))
   10169           18 :          CALL dbcsr_release(m_model_hessian(ispin))
   10170           18 :          CALL dbcsr_release(STsiginv_0(ispin))
   10171           18 :          CALL dbcsr_release(ST(ispin))
   10172           18 :          CALL dbcsr_release(FTsiginv(ispin))
   10173           18 :          CALL dbcsr_release(siginvTFTsiginv(ispin))
   10174           18 :          CALL dbcsr_release(prev_step(ispin))
   10175           18 :          CALL dbcsr_release(grad(ispin))
   10176           18 :          CALL dbcsr_release(step(ispin))
   10177           18 :          CALL dbcsr_release(m_theta(ispin))
   10178           18 :          CALL dbcsr_release(m_sig_sqrti_ii(ispin))
   10179           18 :          CALL dbcsr_release(m_model_r(ispin))
   10180           18 :          CALL dbcsr_release(m_model_rt(ispin))
   10181           18 :          CALL dbcsr_release(m_model_d(ispin))
   10182           18 :          CALL dbcsr_release(m_model_Bd(ispin))
   10183           18 :          CALL dbcsr_release(m_model_r_prev(ispin))
   10184           18 :          CALL dbcsr_release(m_model_rt_prev(ispin))
   10185           18 :          CALL dbcsr_release(m_theta_trial(ispin))
   10186           18 :          CALL release_submatrices(domain_r_down(:, ispin))
   10187           36 :          CALL release_submatrices(domain_model_hessian_inv(:, ispin))
   10188              :       END DO ! ispin
   10189              : 
   10190           18 :       IF (my_special_case == xalmo_case_block_diag .OR. &
   10191              :           my_special_case == xalmo_case_fully_deloc) THEN
   10192            2 :          CALL dbcsr_release(m_s_inv)
   10193              :       END IF
   10194              : 
   10195           18 :       DEALLOCATE (m_model_hessian)
   10196           18 :       DEALLOCATE (m_model_hessian_inv)
   10197           18 :       DEALLOCATE (siginvTFTsiginv)
   10198           18 :       DEALLOCATE (STsiginv_0)
   10199           18 :       DEALLOCATE (FTsiginv)
   10200           18 :       DEALLOCATE (ST)
   10201           18 :       DEALLOCATE (grad)
   10202           18 :       DEALLOCATE (prev_step)
   10203           18 :       DEALLOCATE (step)
   10204           18 :       DEALLOCATE (m_sig_sqrti_ii)
   10205           18 :       DEALLOCATE (m_model_r)
   10206           18 :       DEALLOCATE (m_model_rt)
   10207           18 :       DEALLOCATE (m_model_d)
   10208           18 :       DEALLOCATE (m_model_Bd)
   10209           18 :       DEALLOCATE (m_model_r_prev)
   10210           18 :       DEALLOCATE (m_model_rt_prev)
   10211           18 :       DEALLOCATE (m_theta_trial)
   10212              : 
   10213          146 :       DEALLOCATE (domain_r_down)
   10214          146 :       DEALLOCATE (domain_model_hessian_inv)
   10215              : 
   10216           18 :       DEALLOCATE (penalty_occ_vol_g_prefactor)
   10217           18 :       DEALLOCATE (penalty_occ_vol_h_prefactor)
   10218           18 :       DEALLOCATE (grad_norm_spin)
   10219           18 :       DEALLOCATE (nocc)
   10220              : 
   10221           18 :       DEALLOCATE (m_theta)
   10222              : 
   10223           18 :       IF (.NOT. scf_converged .AND. .NOT. optimizer%early_stopping_on) THEN
   10224            0 :          CPABORT("Optimization not converged! ")
   10225              :       END IF
   10226              : 
   10227           18 :       CALL timestop(handle)
   10228              : 
   10229           36 :    END SUBROUTINE almo_scf_xalmo_trustr
   10230              : 
   10231              : ! **************************************************************************************************
   10232              : !> \brief Computes molecular orbitals and the objective (loss) function from the main variables
   10233              : !>        Most important input and output variables are given as arguments explicitly.
   10234              : !>        Some variables inside almo_scf_env (KS, DM) and qs_env are also updated but are not
   10235              : !>        listed as arguments for brevity
   10236              : !> \param almo_scf_env ...
   10237              : !> \param qs_env ...
   10238              : !> \param m_main_var_in ...
   10239              : !> \param m_t_out ...
   10240              : !> \param energy_out ...
   10241              : !> \param penalty_out ...
   10242              : !> \param m_sig_sqrti_ii_out ...
   10243              : !> \param m_FTsiginv_out ...
   10244              : !> \param m_siginvTFTsiginv_out ...
   10245              : !> \param m_ST_out ...
   10246              : !> \param m_STsiginv0_in ...
   10247              : !> \param m_quench_t_in ...
   10248              : !> \param domain_r_down_in ...
   10249              : !> \param assume_t0_q0x ...
   10250              : !> \param just_started ...
   10251              : !> \param optimize_theta ...
   10252              : !> \param normalize_orbitals ...
   10253              : !> \param perturbation_only ...
   10254              : !> \param do_penalty ...
   10255              : !> \param special_case ...
   10256              : !> \par History
   10257              : !>       2019.12 created [Rustam Z Khaliullin]
   10258              : !> \author Rustam Z Khaliullin
   10259              : ! **************************************************************************************************
   10260         1474 :    SUBROUTINE main_var_to_xalmos_and_loss_func(almo_scf_env, qs_env, m_main_var_in, &
   10261         1474 :                                                m_t_out, energy_out, penalty_out, m_sig_sqrti_ii_out, m_FTsiginv_out, &
   10262         1474 :                                                m_siginvTFTsiginv_out, m_ST_out, m_STsiginv0_in, m_quench_t_in, domain_r_down_in, &
   10263              :                                                assume_t0_q0x, just_started, optimize_theta, normalize_orbitals, perturbation_only, &
   10264              :                                                do_penalty, special_case)
   10265              : 
   10266              :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
   10267              :       TYPE(qs_environment_type), POINTER                 :: qs_env
   10268              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_main_var_in
   10269              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_t_out
   10270              :       REAL(KIND=dp), INTENT(OUT)                         :: energy_out, penalty_out
   10271              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: m_sig_sqrti_ii_out, m_FTsiginv_out, &
   10272              :                                                             m_siginvTFTsiginv_out, m_ST_out, &
   10273              :                                                             m_STsiginv0_in, m_quench_t_in
   10274              :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
   10275              :          INTENT(IN)                                      :: domain_r_down_in
   10276              :       LOGICAL, INTENT(IN)                                :: assume_t0_q0x, just_started, &
   10277              :                                                             optimize_theta, normalize_orbitals, &
   10278              :                                                             perturbation_only, do_penalty
   10279              :       INTEGER, INTENT(IN)                                :: special_case
   10280              : 
   10281              :       CHARACTER(len=*), PARAMETER :: routineN = 'main_var_to_xalmos_and_loss_func'
   10282              : 
   10283              :       INTEGER                                            :: handle, ispin, nspins
   10284         1474 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10285              :       REAL(KIND=dp)                                      :: det1, energy_ispin, penalty_amplitude, &
   10286              :                                                             spin_factor
   10287              : 
   10288         1474 :       CALL timeset(routineN, handle)
   10289              : 
   10290         1474 :       energy_out = 0.0_dp
   10291         1474 :       penalty_out = 0.0_dp
   10292              : 
   10293         1474 :       nspins = SIZE(m_main_var_in)
   10294         1474 :       IF (nspins == 1) THEN
   10295         1474 :          spin_factor = 2.0_dp
   10296              :       ELSE
   10297            0 :          spin_factor = 1.0_dp
   10298              :       END IF
   10299              : 
   10300         1474 :       penalty_amplitude = 0.0_dp !almo_scf_env%penalty%occ_vol_coeff
   10301              : 
   10302         4422 :       ALLOCATE (nocc(nspins))
   10303         2948 :       DO ispin = 1, nspins
   10304              :          CALL dbcsr_get_info(almo_scf_env%matrix_sigma_inv(ispin), &
   10305         2948 :                              nfullrows_total=nocc(ispin))
   10306              :       END DO
   10307              : 
   10308         2948 :       DO ispin = 1, nspins
   10309              : 
   10310              :          ! compute MO coefficients from the main variable
   10311              :          CALL compute_xalmos_from_main_var( &
   10312              :             m_var_in=m_main_var_in(ispin), &
   10313              :             m_t_out=m_t_out(ispin), &
   10314              :             m_quench_t=m_quench_t_in(ispin), &
   10315              :             m_t0=almo_scf_env%matrix_t_blk(ispin), &
   10316              :             m_oo_template=almo_scf_env%matrix_sigma_inv(ispin), &
   10317              :             m_STsiginv0=m_STsiginv0_in(ispin), &
   10318              :             m_s=almo_scf_env%matrix_s(1), &
   10319              :             m_sig_sqrti_ii_out=m_sig_sqrti_ii_out(ispin), &
   10320              :             domain_r_down=domain_r_down_in(:, ispin), &
   10321              :             domain_s_inv=almo_scf_env%domain_s_inv(:, ispin), &
   10322              :             domain_map=almo_scf_env%domain_map(ispin), &
   10323              :             cpu_of_domain=almo_scf_env%cpu_of_domain, &
   10324              :             assume_t0_q0x=assume_t0_q0x, &
   10325              :             just_started=just_started, &
   10326              :             optimize_theta=optimize_theta, &
   10327              :             normalize_orbitals=normalize_orbitals, &
   10328              :             envelope_amplitude=almo_scf_env%envelope_amplitude, &
   10329              :             eps_filter=almo_scf_env%eps_filter, &
   10330              :             special_case=special_case, &
   10331              :             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
   10332              :             order_lanczos=almo_scf_env%order_lanczos, &
   10333              :             eps_lanczos=almo_scf_env%eps_lanczos, &
   10334         1474 :             max_iter_lanczos=almo_scf_env%max_iter_lanczos)
   10335              : 
   10336              :          ! compute the global projectors (for the density matrix)
   10337              :          CALL almo_scf_t_to_proj( &
   10338              :             t=m_t_out(ispin), &
   10339              :             p=almo_scf_env%matrix_p(ispin), &
   10340              :             eps_filter=almo_scf_env%eps_filter, &
   10341              :             orthog_orbs=.FALSE., &
   10342              :             nocc_of_domain=almo_scf_env%nocc_of_domain(:, ispin), &
   10343              :             s=almo_scf_env%matrix_s(1), &
   10344              :             sigma=almo_scf_env%matrix_sigma(ispin), &
   10345              :             sigma_inv=almo_scf_env%matrix_sigma_inv(ispin), &
   10346              :             use_guess=.FALSE., &
   10347              :             algorithm=almo_scf_env%sigma_inv_algorithm, &
   10348              :             inv_eps_factor=almo_scf_env%matrix_iter_eps_error_factor, &
   10349              :             inverse_accelerator=almo_scf_env%order_lanczos, &
   10350              :             eps_lanczos=almo_scf_env%eps_lanczos, &
   10351              :             max_iter_lanczos=almo_scf_env%max_iter_lanczos, &
   10352              :             para_env=almo_scf_env%para_env, &
   10353         1474 :             blacs_env=almo_scf_env%blacs_env)
   10354              : 
   10355              :          ! compute dm from the projector(s)
   10356              :          CALL dbcsr_scale(almo_scf_env%matrix_p(ispin), &
   10357         2948 :                           spin_factor)
   10358              : 
   10359              :       END DO ! ispin
   10360              : 
   10361              :       ! update the KS matrix and energy if necessary
   10362         1474 :       IF (perturbation_only) THEN
   10363              :          ! note: do not combine the two IF statements
   10364          212 :          IF (just_started) THEN
   10365           48 :             DO ispin = 1, nspins
   10366              :                CALL dbcsr_copy(almo_scf_env%matrix_ks(ispin), &
   10367           48 :                                almo_scf_env%matrix_ks_0deloc(ispin))
   10368              :             END DO
   10369              :          END IF
   10370              :       ELSE
   10371              :          ! the KS matrix is updated outside the spin loop
   10372              :          CALL almo_dm_to_almo_ks(qs_env, &
   10373              :                                  almo_scf_env%matrix_p, &
   10374              :                                  almo_scf_env%matrix_ks, &
   10375              :                                  energy_out, &
   10376              :                                  almo_scf_env%eps_filter, &
   10377         1262 :                                  almo_scf_env%mat_distr_aos)
   10378              :       END IF
   10379              : 
   10380         1474 :       penalty_out = 0.0_dp
   10381         2948 :       DO ispin = 1, nspins
   10382              : 
   10383              :          CALL compute_frequently_used_matrices( &
   10384              :             filter_eps=almo_scf_env%eps_filter, &
   10385              :             m_T_in=m_t_out(ispin), &
   10386              :             m_siginv_in=almo_scf_env%matrix_sigma_inv(ispin), &
   10387              :             m_S_in=almo_scf_env%matrix_s(1), &
   10388              :             m_F_in=almo_scf_env%matrix_ks(ispin), &
   10389              :             m_FTsiginv_out=m_FTsiginv_out(ispin), &
   10390              :             m_siginvTFTsiginv_out=m_siginvTFTsiginv_out(ispin), &
   10391         1474 :             m_ST_out=m_ST_out(ispin))
   10392              : 
   10393         1474 :          IF (perturbation_only) THEN
   10394              :             ! calculate objective function Tr(F_0 R)
   10395          212 :             IF (ispin == 1) energy_out = 0.0_dp
   10396          212 :             CALL dbcsr_dot(m_t_out(ispin), m_FTsiginv_out(ispin), energy_ispin)
   10397          212 :             energy_out = energy_out + energy_ispin*spin_factor
   10398              :          END IF
   10399              : 
   10400         2948 :          IF (do_penalty) THEN
   10401              : 
   10402              :             CALL determinant(almo_scf_env%matrix_sigma(ispin), det1, &
   10403            0 :                              almo_scf_env%eps_filter)
   10404              :             penalty_out = penalty_out - &
   10405            0 :                           penalty_amplitude*spin_factor*nocc(ispin)*LOG(det1)
   10406              : 
   10407              :          END IF
   10408              : 
   10409              :       END DO ! ispin
   10410              : 
   10411         1474 :       DEALLOCATE (nocc)
   10412              : 
   10413         1474 :       CALL timestop(handle)
   10414              : 
   10415         1474 :    END SUBROUTINE main_var_to_xalmos_and_loss_func
   10416              : 
   10417              : ! **************************************************************************************************
   10418              : !> \brief Computes the step size required to reach the trust-radius border,
   10419              : !>        measured from the origin,
   10420              : !>        given the current position (position) in the direction (direction)
   10421              : !> \param step_size_out ...
   10422              : !> \param metric_in ...
   10423              : !> \param position_in ...
   10424              : !> \param direction_in ...
   10425              : !> \param trust_radius_in ...
   10426              : !> \param quench_t_in ...
   10427              : !> \param eps_filter_in ...
   10428              : !> \par History
   10429              : !>       2019.12 created [Rustam Z Khaliullin]
   10430              : !> \author Rustam Z Khaliullin
   10431              : ! **************************************************************************************************
   10432           36 :    SUBROUTINE step_size_to_border(step_size_out, metric_in, position_in, &
   10433           36 :                                   direction_in, trust_radius_in, quench_t_in, eps_filter_in)
   10434              : 
   10435              :       REAL(KIND=dp), INTENT(INOUT)                       :: step_size_out
   10436              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: metric_in, position_in, direction_in
   10437              :       REAL(KIND=dp), INTENT(IN)                          :: trust_radius_in
   10438              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: quench_t_in
   10439              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter_in
   10440              : 
   10441              :       INTEGER                                            :: isol, ispin, nsolutions, &
   10442              :                                                             nsolutions_found, nspins
   10443           36 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10444              :       REAL(KIND=dp)                                      :: discrim_sign, discriminant, solution, &
   10445              :                                                             spin_factor, temp_real
   10446              :       REAL(KIND=dp), DIMENSION(3)                        :: coef
   10447           36 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10448              : 
   10449           36 :       step_size_out = 0.0_dp
   10450              : 
   10451           36 :       nspins = SIZE(position_in)
   10452           36 :       IF (nspins == 1) THEN
   10453              :          spin_factor = 2.0_dp
   10454              :       ELSE
   10455            0 :          spin_factor = 1.0_dp
   10456              :       END IF
   10457              : 
   10458          108 :       ALLOCATE (nocc(nspins))
   10459          144 :       ALLOCATE (m_temp_no(nspins))
   10460              : 
   10461           36 :       coef(:) = 0.0_dp
   10462           72 :       DO ispin = 1, nspins
   10463              : 
   10464              :          CALL dbcsr_create(m_temp_no(ispin), &
   10465           36 :                            template=direction_in(ispin))
   10466              : 
   10467              :          CALL dbcsr_get_info(direction_in(ispin), &
   10468           36 :                              nfullcols_total=nocc(ispin))
   10469              : 
   10470           36 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10471              :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10472              :                              metric_in(1), &
   10473              :                              position_in(ispin), &
   10474              :                              0.0_dp, m_temp_no(ispin), &
   10475           36 :                              retain_sparsity=.TRUE.)
   10476           36 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10477           36 :          CALL dbcsr_dot(position_in(ispin), m_temp_no(ispin), temp_real)
   10478           36 :          coef(3) = coef(3) + temp_real/nocc(ispin)
   10479           36 :          CALL dbcsr_dot(direction_in(ispin), m_temp_no(ispin), temp_real)
   10480           36 :          coef(2) = coef(2) + 2.0_dp*temp_real/nocc(ispin)
   10481           36 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10482              :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10483              :                              metric_in(1), &
   10484              :                              direction_in(ispin), &
   10485              :                              0.0_dp, m_temp_no(ispin), &
   10486           36 :                              retain_sparsity=.TRUE.)
   10487           36 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10488           36 :          CALL dbcsr_dot(direction_in(ispin), m_temp_no(ispin), temp_real)
   10489           36 :          coef(1) = coef(1) + temp_real/nocc(ispin)
   10490              : 
   10491          108 :          CALL dbcsr_release(m_temp_no(ispin))
   10492              : 
   10493              :       END DO !ispin
   10494              : 
   10495           36 :       DEALLOCATE (nocc)
   10496           36 :       DEALLOCATE (m_temp_no)
   10497              : 
   10498          144 :       coef(:) = coef(:)*spin_factor
   10499           36 :       coef(3) = coef(3) - trust_radius_in*trust_radius_in
   10500              : 
   10501              :       ! solve the quadratic equation
   10502           36 :       discriminant = coef(2)*coef(2) - 4.0_dp*coef(1)*coef(3)
   10503           36 :       IF (discriminant > TINY(discriminant)) THEN
   10504              :          nsolutions = 2
   10505            0 :       ELSE IF (discriminant < 0.0_dp) THEN
   10506            0 :          nsolutions = 0
   10507            0 :          CPABORT("Step to border: no solutions")
   10508              :       ELSE
   10509              :          nsolutions = 1
   10510              :       END IF
   10511              : 
   10512           36 :       discrim_sign = 1.0_dp
   10513           36 :       nsolutions_found = 0
   10514          108 :       DO isol = 1, nsolutions
   10515           72 :          solution = (-coef(2) + discrim_sign*SQRT(discriminant))/(2.0_dp*coef(1))
   10516           72 :          IF (solution > 0.0_dp) THEN
   10517           36 :             nsolutions_found = nsolutions_found + 1
   10518           36 :             step_size_out = solution
   10519              :          END IF
   10520          108 :          discrim_sign = -discrim_sign
   10521              :       END DO
   10522              : 
   10523           36 :       IF (nsolutions_found == 0) THEN
   10524            0 :          CPABORT("Step to border: no positive solutions")
   10525           36 :       ELSE IF (nsolutions_found == 2) THEN
   10526            0 :          CPABORT("Two positive border steps possible!")
   10527              :       END IF
   10528              : 
   10529           36 :    END SUBROUTINE step_size_to_border
   10530              : 
   10531              : ! **************************************************************************************************
   10532              : !> \brief Computes a norm of a contravariant NBasis x Occ matrix using proper metric
   10533              : !> \param norm_out ...
   10534              : !> \param matrix_in ...
   10535              : !> \param metric_in ...
   10536              : !> \param quench_t_in ...
   10537              : !> \param eps_filter_in ...
   10538              : !> \par History
   10539              : !>       2019.12 created [Rustam Z Khaliullin]
   10540              : !> \author Rustam Z Khaliullin
   10541              : ! **************************************************************************************************
   10542          758 :    SUBROUTINE contravariant_matrix_norm(norm_out, matrix_in, metric_in, &
   10543          758 :                                         quench_t_in, eps_filter_in)
   10544              : 
   10545              :       REAL(KIND=dp), INTENT(OUT)                         :: norm_out
   10546              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: matrix_in, metric_in, quench_t_in
   10547              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter_in
   10548              : 
   10549              :       INTEGER                                            :: ispin, nspins
   10550          758 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nocc
   10551              :       REAL(KIND=dp)                                      :: my_norm, spin_factor, temp_real
   10552          758 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10553              : 
   10554              :       ! Frist thing: assign the output value to avoid norms being undefined
   10555          758 :       norm_out = 0.0_dp
   10556              : 
   10557          758 :       nspins = SIZE(matrix_in)
   10558          758 :       IF (nspins == 1) THEN
   10559              :          spin_factor = 2.0_dp
   10560              :       ELSE
   10561            0 :          spin_factor = 1.0_dp
   10562              :       END IF
   10563              : 
   10564         2274 :       ALLOCATE (nocc(nspins))
   10565         3032 :       ALLOCATE (m_temp_no(nspins))
   10566              : 
   10567          758 :       my_norm = 0.0_dp
   10568         1516 :       DO ispin = 1, nspins
   10569              : 
   10570          758 :          CALL dbcsr_create(m_temp_no(ispin), template=matrix_in(ispin))
   10571              : 
   10572              :          CALL dbcsr_get_info(matrix_in(ispin), &
   10573          758 :                              nfullcols_total=nocc(ispin))
   10574              : 
   10575          758 :          CALL dbcsr_copy(m_temp_no(ispin), quench_t_in(ispin))
   10576              :          CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10577              :                              metric_in(1), &
   10578              :                              matrix_in(ispin), &
   10579              :                              0.0_dp, m_temp_no(ispin), &
   10580          758 :                              retain_sparsity=.TRUE.)
   10581          758 :          CALL dbcsr_filter(m_temp_no(ispin), eps_filter_in)
   10582          758 :          CALL dbcsr_dot(matrix_in(ispin), m_temp_no(ispin), temp_real)
   10583              : 
   10584          758 :          my_norm = my_norm + temp_real/nocc(ispin)
   10585              : 
   10586         1516 :          CALL dbcsr_release(m_temp_no(ispin))
   10587              : 
   10588              :       END DO !ispin
   10589              : 
   10590          758 :       DEALLOCATE (nocc)
   10591          758 :       DEALLOCATE (m_temp_no)
   10592              : 
   10593          758 :       my_norm = my_norm*spin_factor
   10594          758 :       norm_out = SQRT(my_norm)
   10595              : 
   10596          758 :    END SUBROUTINE contravariant_matrix_norm
   10597              : 
   10598              : ! **************************************************************************************************
   10599              : !> \brief Loss reduction for a given step is estimated using
   10600              : !>        gradient and hessian
   10601              : !> \param reduction_out ...
   10602              : !> \param grad_in ...
   10603              : !> \param step_in ...
   10604              : !> \param hess_in ...
   10605              : !> \param hess_submatrix_in ...
   10606              : !> \param quench_t_in ...
   10607              : !> \param special_case ...
   10608              : !> \param eps_filter ...
   10609              : !> \param domain_map ...
   10610              : !> \param cpu_of_domain ...
   10611              : !> \par History
   10612              : !>       2019.12 created [Rustam Z Khaliullin]
   10613              : !> \author Rustam Z Khaliullin
   10614              : ! **************************************************************************************************
   10615          408 :    SUBROUTINE predicted_reduction(reduction_out, grad_in, step_in, hess_in, &
   10616          408 :                                   hess_submatrix_in, quench_t_in, special_case, eps_filter, domain_map, &
   10617          408 :                                   cpu_of_domain)
   10618              : 
   10619              :       !RZK-noncritical: can be formulated without submatrices
   10620              :       REAL(KIND=dp), INTENT(INOUT)                       :: reduction_out
   10621              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(INOUT)      :: grad_in, step_in, hess_in
   10622              :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
   10623              :          INTENT(IN)                                      :: hess_submatrix_in
   10624              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: quench_t_in
   10625              :       INTEGER, INTENT(IN)                                :: special_case
   10626              :       REAL(KIND=dp), INTENT(IN)                          :: eps_filter
   10627              :       TYPE(domain_map_type), DIMENSION(:), INTENT(IN)    :: domain_map
   10628              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: cpu_of_domain
   10629              : 
   10630              :       INTEGER                                            :: ispin, nspins
   10631              :       REAL(KIND=dp)                                      :: my_reduction, spin_factor, temp_real
   10632          408 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no
   10633              : 
   10634          408 :       reduction_out = 0.0_dp
   10635              : 
   10636          408 :       nspins = SIZE(grad_in)
   10637          408 :       IF (nspins == 1) THEN
   10638              :          spin_factor = 2.0_dp
   10639              :       ELSE
   10640            0 :          spin_factor = 1.0_dp
   10641              :       END IF
   10642              : 
   10643         1632 :       ALLOCATE (m_temp_no(nspins))
   10644              : 
   10645          408 :       my_reduction = 0.0_dp
   10646          816 :       DO ispin = 1, nspins
   10647              : 
   10648          408 :          CALL dbcsr_create(m_temp_no(ispin), template=grad_in(ispin))
   10649              : 
   10650          408 :          CALL dbcsr_dot(step_in(ispin), grad_in(ispin), temp_real)
   10651          408 :          my_reduction = my_reduction + temp_real
   10652              : 
   10653              :          ! Get Hess.step
   10654          408 :          IF (special_case == xalmo_case_block_diag .OR. &
   10655              :              special_case == xalmo_case_fully_deloc) THEN
   10656              : 
   10657              :             CALL dbcsr_multiply("N", "N", 1.0_dp, &
   10658              :                                 hess_in(ispin), &
   10659              :                                 step_in(ispin), &
   10660              :                                 0.0_dp, m_temp_no(ispin), &
   10661           92 :                                 filter_eps=eps_filter)
   10662              : 
   10663              :          ELSE
   10664              : 
   10665              :             CALL apply_domain_operators( &
   10666              :                matrix_in=step_in(ispin), &
   10667              :                matrix_out=m_temp_no(ispin), &
   10668              :                operator1=hess_submatrix_in(:, ispin), &
   10669              :                dpattern=quench_t_in(ispin), &
   10670              :                map=domain_map(ispin), &
   10671              :                node_of_domain=cpu_of_domain, &
   10672              :                my_action=0, &
   10673          316 :                filter_eps=eps_filter)
   10674              : 
   10675              :          END IF ! special case
   10676              : 
   10677              :          ! Get y=step^T.Hess.step
   10678          408 :          CALL dbcsr_dot(step_in(ispin), m_temp_no(ispin), temp_real)
   10679          408 :          my_reduction = my_reduction + 0.5_dp*temp_real
   10680              : 
   10681         1224 :          CALL dbcsr_release(m_temp_no(ispin))
   10682              : 
   10683              :       END DO ! ispin
   10684              : 
   10685              :       !RZK-critical: do we need to multiply by the spin factor?
   10686          408 :       my_reduction = spin_factor*my_reduction
   10687              : 
   10688          408 :       reduction_out = my_reduction
   10689              : 
   10690          408 :       DEALLOCATE (m_temp_no)
   10691              : 
   10692          408 :    END SUBROUTINE predicted_reduction
   10693              : 
   10694              : ! **************************************************************************************************
   10695              : !> \brief Prints key quantities from the fixed-radius minimizer
   10696              : !> \param unit_nr ...
   10697              : !> \param iter_type ...
   10698              : !> \param iteration ...
   10699              : !> \param step_size ...
   10700              : !> \param border_reached ...
   10701              : !> \param curvature ...
   10702              : !> \param grad_norm_ratio ...
   10703              : !> \param predicted_reduction ...
   10704              : !> \param time ...
   10705              : !> \par History
   10706              : !>       2019.12 created [Rustam Z Khaliullin]
   10707              : !> \author Rustam Z Khaliullin
   10708              : ! **************************************************************************************************
   10709          898 :    SUBROUTINE fixed_r_report(unit_nr, iter_type, iteration, step_size, &
   10710              :                              border_reached, curvature, grad_norm_ratio, predicted_reduction, time)
   10711              : 
   10712              :       INTEGER, INTENT(IN)                                :: unit_nr, iter_type, iteration
   10713              :       REAL(KIND=dp), INTENT(IN)                          :: step_size
   10714              :       LOGICAL, INTENT(IN)                                :: border_reached
   10715              :       REAL(KIND=dp), INTENT(IN)                          :: curvature
   10716              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: grad_norm_ratio, predicted_reduction
   10717              :       REAL(KIND=dp), INTENT(IN)                          :: time
   10718              : 
   10719              :       CHARACTER(LEN=20)                                  :: iter_type_str
   10720              :       REAL(KIND=dp)                                      :: loss_or_grad_change
   10721              : 
   10722          898 :       loss_or_grad_change = 0.0_dp
   10723          898 :       IF (PRESENT(grad_norm_ratio)) THEN
   10724          898 :          loss_or_grad_change = grad_norm_ratio
   10725            0 :       ELSE IF (PRESENT(predicted_reduction)) THEN
   10726            0 :          loss_or_grad_change = predicted_reduction
   10727              :       ELSE
   10728            0 :          CPABORT("one argument is missing")
   10729              :       END IF
   10730              : 
   10731         1306 :       SELECT CASE (iter_type)
   10732              :       CASE (0)
   10733          408 :          iter_type_str = TRIM("Ignored")
   10734              :       CASE (1)
   10735           82 :          iter_type_str = TRIM("PCG")
   10736              :       CASE (2)
   10737            0 :          iter_type_str = TRIM("Neg. curvatr.")
   10738              :       CASE (3)
   10739           34 :          iter_type_str = TRIM("Step too long")
   10740              :       CASE (4)
   10741           26 :          iter_type_str = TRIM("Grad. reduced")
   10742              :       CASE (5)
   10743           80 :          iter_type_str = TRIM("Cauchy point")
   10744              :       CASE (6)
   10745          266 :          iter_type_str = TRIM("Full dogleg")
   10746              :       CASE (7)
   10747            2 :          iter_type_str = TRIM("Part. dogleg")
   10748              :       CASE DEFAULT
   10749          898 :          CPABORT("unknown report type")
   10750              :       END SELECT
   10751              : 
   10752          898 :       IF (unit_nr > 0) THEN
   10753              : 
   10754          204 :          SELECT CASE (iter_type)
   10755              :          CASE (0)
   10756              : 
   10757          204 :             WRITE (unit_nr, *)
   10758              :             WRITE (unit_nr, '(T4,A15,A6,A10,A10,A7,A20,A8)') &
   10759          204 :                "Action", &
   10760          204 :                "Iter", &
   10761          204 :                "Curv", &
   10762          204 :                "Step", &
   10763          204 :                "Edge?", &
   10764          204 :                "Grad/o.f. reduc", &
   10765          408 :                "Time"
   10766              : 
   10767              :          CASE DEFAULT
   10768              : 
   10769              :             WRITE (unit_nr, '(T4,A15,I6,F10.5,F10.5,L7,F20.10,F8.2)') &
   10770          245 :                iter_type_str, &
   10771          245 :                iteration, &
   10772          245 :                curvature, step_size, border_reached, &
   10773          245 :                loss_or_grad_change, &
   10774          694 :                time
   10775              : 
   10776              :          END SELECT
   10777              : 
   10778              :          ! epilogue
   10779          204 :          SELECT CASE (iter_type)
   10780              :          CASE (2, 3, 4, 5, 6, 7)
   10781              : 
   10782          449 :             WRITE (unit_nr, *)
   10783              : 
   10784              :          END SELECT
   10785              : 
   10786              :       END IF
   10787              : 
   10788          898 :    END SUBROUTINE fixed_r_report
   10789              : 
   10790              : ! **************************************************************************************************
   10791              : !> \brief Prints key quantities from the loop that tunes trust radius
   10792              : !> \param unit_nr ...
   10793              : !> \param iter_type ...
   10794              : !> \param iteration ...
   10795              : !> \param radius ...
   10796              : !> \param loss ...
   10797              : !> \param delta_loss ...
   10798              : !> \param grad_norm ...
   10799              : !> \param predicted_reduction ...
   10800              : !> \param rho ...
   10801              : !> \param new ...
   10802              : !> \param time ...
   10803              : !> \par History
   10804              : !>       2019.12 created [Rustam Z Khaliullin]
   10805              : !> \author Rustam Z Khaliullin
   10806              : ! **************************************************************************************************
   10807          843 :    SUBROUTINE trust_r_report(unit_nr, iter_type, iteration, radius, &
   10808              :                              loss, delta_loss, grad_norm, predicted_reduction, rho, new, time)
   10809              : 
   10810              :       INTEGER, INTENT(IN)                                :: unit_nr, iter_type, iteration
   10811              :       REAL(KIND=dp), INTENT(IN)                          :: radius, loss, delta_loss, grad_norm, &
   10812              :                                                             predicted_reduction, rho
   10813              :       LOGICAL, INTENT(IN)                                :: new
   10814              :       REAL(KIND=dp), INTENT(IN)                          :: time
   10815              : 
   10816              :       CHARACTER(LEN=20)                                  :: iter_status, iter_type_str
   10817              : 
   10818          852 :       SELECT CASE (iter_type)
   10819              :       CASE (0) ! header
   10820            9 :          iter_type_str = TRIM("Iter")
   10821            9 :          iter_status = TRIM("Stat")
   10822              :       CASE (1) ! first iteration, not all data is available yet
   10823          426 :          iter_type_str = TRIM("TR INI")
   10824          426 :          IF (new) THEN
   10825          426 :             iter_status = "  New" ! new point
   10826              :          ELSE
   10827            0 :             iter_status = " Redo" ! restarted
   10828              :          END IF
   10829              :       CASE (2) ! typical
   10830          408 :          iter_type_str = TRIM("TR FIN")
   10831          408 :          IF (new) THEN
   10832          408 :             iter_status = "  Acc" ! accepted
   10833              :          ELSE
   10834            0 :             iter_status = "  Rej" ! rejected
   10835              :          END IF
   10836              :       CASE DEFAULT
   10837          843 :          CPABORT("unknown report type")
   10838              :       END SELECT
   10839              : 
   10840          843 :       IF (unit_nr > 0) THEN
   10841              : 
   10842            9 :          SELECT CASE (iter_type)
   10843              :          CASE (0)
   10844              : 
   10845              :             WRITE (unit_nr, '(T2,A6,A5,A6,A22,A10,T67,A7,A6)') &
   10846            9 :                "Method", &
   10847            9 :                "Stat", &
   10848            9 :                "Iter", &
   10849            9 :                "Objective Function", &
   10850            9 :                "Conver", &!"Model Change", "Rho", &
   10851            9 :                "Radius", &
   10852           18 :                "Time"
   10853              :             WRITE (unit_nr, '(T41,A10,A10,A6)') &
   10854              :                !"Method", &
   10855              :                !"Iter", &
   10856              :                !"Objective Function", &
   10857            9 :                "Change", "Expct.", "Rho"
   10858              :             !"Radius", &
   10859              :             !"Time"
   10860              : 
   10861              :          CASE (1)
   10862              : 
   10863              :             WRITE (unit_nr, '(T2,A6,A5,I6,F22.10,ES10.2,T67,ES7.0,F6.1)') &
   10864          213 :                iter_type_str, &
   10865          213 :                iter_status, &
   10866          213 :                iteration, &
   10867          213 :                loss, &
   10868          213 :                grad_norm, & ! distinct
   10869          213 :                radius, &
   10870          426 :                time
   10871              : 
   10872              :          CASE (2)
   10873              : 
   10874              :             WRITE (unit_nr, '(T2,A6,A5,I6,F22.10,ES10.2,ES10.2,F6.1,ES7.0,F6.1)') &
   10875          204 :                iter_type_str, &
   10876          204 :                iter_status, &
   10877          204 :                iteration, &
   10878          204 :                loss, &
   10879          204 :                delta_loss, predicted_reduction, rho, & ! distinct
   10880          204 :                radius, &
   10881          630 :                time
   10882              : 
   10883              :          END SELECT
   10884              :       END IF
   10885              : 
   10886          843 :    END SUBROUTINE trust_r_report
   10887              : 
   10888              : ! **************************************************************************************************
   10889              : !> \brief ...
   10890              : !> \param unit_nr ...
   10891              : !> \param ref_energy ...
   10892              : !> \param energy_lowering ...
   10893              : ! **************************************************************************************************
   10894           26 :    SUBROUTINE energy_lowering_report(unit_nr, ref_energy, energy_lowering)
   10895              : 
   10896              :       INTEGER, INTENT(IN)                                :: unit_nr
   10897              :       REAL(KIND=dp), INTENT(IN)                          :: ref_energy, energy_lowering
   10898              : 
   10899              :       ! print out the energy lowering
   10900           26 :       IF (unit_nr > 0) THEN
   10901           13 :          WRITE (unit_nr, *)
   10902           13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "ENERGY OF BLOCK-DIAGONAL ALMOs:", &
   10903           26 :             ref_energy
   10904           13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "ENERGY LOWERING:", &
   10905           26 :             energy_lowering
   10906           13 :          WRITE (unit_nr, '(T2,A35,F25.10)') "CORRECTED ENERGY:", &
   10907           26 :             ref_energy + energy_lowering
   10908           13 :          WRITE (unit_nr, *)
   10909              :       END IF
   10910              : 
   10911           26 :    END SUBROUTINE energy_lowering_report
   10912              : 
   10913              :    ! post SCF-loop calculations
   10914              : ! **************************************************************************************************
   10915              : !> \brief ...
   10916              : !> \param qs_env ...
   10917              : !> \param almo_scf_env ...
   10918              : !> \param perturbation_in ...
   10919              : !> \param m_xalmo_in ...
   10920              : !> \param m_quench_in ...
   10921              : !> \param energy_inout ...
   10922              : ! **************************************************************************************************
   10923          104 :    SUBROUTINE wrap_up_xalmo_scf(qs_env, almo_scf_env, perturbation_in, &
   10924          104 :                                 m_xalmo_in, m_quench_in, energy_inout)
   10925              : 
   10926              :       TYPE(qs_environment_type), POINTER                 :: qs_env
   10927              :       TYPE(almo_scf_env_type), INTENT(INOUT)             :: almo_scf_env
   10928              :       LOGICAL, INTENT(IN)                                :: perturbation_in
   10929              :       TYPE(dbcsr_type), DIMENSION(:), INTENT(IN)         :: m_xalmo_in, m_quench_in
   10930              :       REAL(KIND=dp), INTENT(INOUT)                       :: energy_inout
   10931              : 
   10932              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'wrap_up_xalmo_scf'
   10933              : 
   10934              :       INTEGER                                            :: eda_unit, handle, ispin, nspins, unit_nr
   10935              :       TYPE(cp_logger_type), POINTER                      :: logger
   10936          104 :       TYPE(dbcsr_type), ALLOCATABLE, DIMENSION(:)        :: m_temp_no1, m_temp_no2
   10937              :       TYPE(section_vals_type), POINTER                   :: almo_print_section, input
   10938              : 
   10939          104 :       CALL timeset(routineN, handle)
   10940              : 
   10941              :       ! get a useful output_unit
   10942          104 :       logger => cp_get_default_logger()
   10943          104 :       IF (logger%para_env%is_source()) THEN
   10944           52 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
   10945              :       ELSE
   10946           52 :          unit_nr = -1
   10947              :       END IF
   10948              : 
   10949          104 :       nspins = almo_scf_env%nspins
   10950              : 
   10951              :       ! RZK-warning: must obtain MO coefficients from final theta
   10952              : 
   10953          104 :       IF (perturbation_in) THEN
   10954              : 
   10955           96 :          ALLOCATE (m_temp_no1(nspins))
   10956           72 :          ALLOCATE (m_temp_no2(nspins))
   10957              : 
   10958           48 :          DO ispin = 1, nspins
   10959           24 :             CALL dbcsr_create(m_temp_no1(ispin), template=m_xalmo_in(ispin))
   10960           48 :             CALL dbcsr_create(m_temp_no2(ispin), template=m_xalmo_in(ispin))
   10961              :          END DO
   10962              : 
   10963              :          ! return perturbed density to qs_env
   10964              :          CALL almo_dm_to_qs_env(qs_env, almo_scf_env%matrix_p, &
   10965           24 :                                 almo_scf_env%mat_distr_aos)
   10966              : 
   10967              :          ! compute energy correction and perform
   10968              :          ! detailed decomposition analysis (if requested)
   10969              :          ! reuse step and grad matrices to store decomposition results
   10970              :          CALL xalmo_analysis( &
   10971              :             detailed_analysis=almo_scf_env%almo_analysis%do_analysis, &
   10972              :             eps_filter=almo_scf_env%eps_filter, &
   10973              :             m_T_in=m_xalmo_in, &
   10974              :             m_T0_in=almo_scf_env%matrix_t_blk, &
   10975              :             m_siginv_in=almo_scf_env%matrix_sigma_inv, &
   10976              :             m_siginv0_in=almo_scf_env%matrix_sigma_inv_0deloc, &
   10977              :             m_S_in=almo_scf_env%matrix_s, &
   10978              :             m_KS0_in=almo_scf_env%matrix_ks_0deloc, &
   10979              :             m_quench_t_in=m_quench_in, &
   10980              :             energy_out=energy_inout, & ! get energy loewring
   10981              :             m_eda_out=m_temp_no1, &
   10982              :             m_cta_out=m_temp_no2 &
   10983           24 :             )
   10984              : 
   10985           24 :          IF (almo_scf_env%almo_analysis%do_analysis) THEN
   10986              : 
   10987            4 :             DO ispin = 1, nspins
   10988              : 
   10989              :                ! energy decomposition analysis (EDA)
   10990            2 :                IF (unit_nr > 0) THEN
   10991            1 :                   WRITE (unit_nr, '(T2,A)') "DECOMPOSITION OF THE DELOCALIZATION ENERGY"
   10992              :                END IF
   10993              : 
   10994              :                ! open the output file, print and close
   10995            2 :                CALL get_qs_env(qs_env, input=input)
   10996            2 :                almo_print_section => section_vals_get_subs_vals(input, "DFT%ALMO_SCF%ANALYSIS%PRINT")
   10997              :                eda_unit = cp_print_key_unit_nr(logger, almo_print_section, &
   10998            2 :                                                "ALMO_EDA_CT", extension=".dat", local=.TRUE.)
   10999            2 :                CALL print_block_sum(m_temp_no1(ispin), eda_unit)
   11000              :                CALL cp_print_key_finished_output(eda_unit, logger, almo_print_section, &
   11001            2 :                                                  "ALMO_EDA_CT", local=.TRUE.)
   11002              : 
   11003              :                ! charge transfer analysis (CTA)
   11004            2 :                IF (unit_nr > 0) THEN
   11005            1 :                   WRITE (unit_nr, '(T2,A)') "DECOMPOSITION OF CHARGE TRANSFER TERMS"
   11006              :                END IF
   11007              : 
   11008              :                eda_unit = cp_print_key_unit_nr(logger, almo_print_section, &
   11009            2 :                                                "ALMO_CTA", extension=".dat", local=.TRUE.)
   11010            2 :                CALL print_block_sum(m_temp_no2(ispin), eda_unit)
   11011              :                CALL cp_print_key_finished_output(eda_unit, logger, almo_print_section, &
   11012            4 :                                                  "ALMO_CTA", local=.TRUE.)
   11013              : 
   11014              :             END DO ! ispin
   11015              : 
   11016              :          END IF ! do ALMO EDA/CTA
   11017              : 
   11018              :          CALL energy_lowering_report( &
   11019              :             unit_nr=unit_nr, &
   11020              :             ref_energy=almo_scf_env%almo_scf_energy, &
   11021           24 :             energy_lowering=energy_inout)
   11022              :          CALL almo_scf_update_ks_energy(qs_env, &
   11023              :                                         energy=almo_scf_env%almo_scf_energy, &
   11024           24 :                                         energy_singles_corr=energy_inout)
   11025              : 
   11026           48 :          DO ispin = 1, nspins
   11027           24 :             CALL dbcsr_release(m_temp_no1(ispin))
   11028           48 :             CALL dbcsr_release(m_temp_no2(ispin))
   11029              :          END DO
   11030              : 
   11031           24 :          DEALLOCATE (m_temp_no1)
   11032           24 :          DEALLOCATE (m_temp_no2)
   11033              : 
   11034              :       ELSE ! non-perturbative
   11035              : 
   11036              :          CALL almo_scf_update_ks_energy(qs_env, &
   11037           80 :                                         energy=energy_inout)
   11038              : 
   11039              :       END IF ! if perturbation only
   11040              : 
   11041          104 :       CALL timestop(handle)
   11042              : 
   11043          104 :    END SUBROUTINE wrap_up_xalmo_scf
   11044              : 
   11045              : ! **************************************************************************************************
   11046              : !> \brief Computes tanh(alpha*x) of the matrix elements. Fails if |alpha*x| >= 1.
   11047              : !> \param matrix ...
   11048              : !> \param alpha ...
   11049              : !> \author Ole Schuett
   11050              : ! **************************************************************************************************
   11051            0 :    SUBROUTINE tanh_of_elements(matrix, alpha)
   11052              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
   11053              :       REAL(kind=dp), INTENT(IN)                          :: alpha
   11054              : 
   11055              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'tanh_of_elements'
   11056              : 
   11057              :       INTEGER                                            :: handle
   11058            0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: block
   11059              :       TYPE(dbcsr_iterator_type)                          :: iter
   11060              : 
   11061            0 :       CALL timeset(routineN, handle)
   11062            0 :       CALL dbcsr_iterator_start(iter, matrix)
   11063            0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
   11064            0 :          CALL dbcsr_iterator_next_block(iter, block=block)
   11065            0 :          block = TANH(alpha*block)
   11066              :       END DO
   11067            0 :       CALL dbcsr_iterator_stop(iter)
   11068            0 :       CALL timestop(handle)
   11069              : 
   11070            0 :    END SUBROUTINE tanh_of_elements
   11071              : 
   11072              : ! **************************************************************************************************
   11073              : !> \brief Computes d(tanh(alpha*x)) / dx of the matrix elements. Fails if |alpha*x| >= 1.
   11074              : !> \param matrix ...
   11075              : !> \param alpha ...
   11076              : !> \author Ole Schuett
   11077              : ! **************************************************************************************************
   11078            0 :    SUBROUTINE dtanh_of_elements(matrix, alpha)
   11079              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
   11080              :       REAL(kind=dp), INTENT(IN)                          :: alpha
   11081              : 
   11082              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'dtanh_of_elements'
   11083              : 
   11084              :       INTEGER                                            :: handle
   11085            0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: block
   11086              :       TYPE(dbcsr_iterator_type)                          :: iter
   11087              : 
   11088            0 :       CALL timeset(routineN, handle)
   11089            0 :       CALL dbcsr_iterator_start(iter, matrix)
   11090            0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
   11091            0 :          CALL dbcsr_iterator_next_block(iter, block=block)
   11092            0 :          block = alpha*(1.0_dp - TANH(block)**2)
   11093              :       END DO
   11094            0 :       CALL dbcsr_iterator_stop(iter)
   11095            0 :       CALL timestop(handle)
   11096              : 
   11097            0 :    END SUBROUTINE dtanh_of_elements
   11098              : 
   11099              : ! **************************************************************************************************
   11100              : !> \brief Computes 1/x of the matrix elements.
   11101              : !> \param matrix ...
   11102              : !> \author Ole Schuett
   11103              : ! **************************************************************************************************
   11104            0 :    SUBROUTINE inverse_of_elements(matrix)
   11105              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
   11106              : 
   11107              :       CHARACTER(len=*), PARAMETER :: routineN = 'inverse_of_elements'
   11108              : 
   11109              :       INTEGER                                            :: handle
   11110            0 :       REAL(kind=dp), DIMENSION(:, :), POINTER            :: block
   11111              :       TYPE(dbcsr_iterator_type)                          :: iter
   11112              : 
   11113            0 :       CALL timeset(routineN, handle)
   11114            0 :       CALL dbcsr_iterator_start(iter, matrix)
   11115            0 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
   11116            0 :          CALL dbcsr_iterator_next_block(iter, block=block)
   11117            0 :          block = 1.0_dp/block
   11118              :       END DO
   11119            0 :       CALL dbcsr_iterator_stop(iter)
   11120            0 :       CALL timestop(handle)
   11121              : 
   11122            0 :    END SUBROUTINE inverse_of_elements
   11123              : 
   11124              : ! **************************************************************************************************
   11125              : !> \brief Prints the sum of the elements for each block.
   11126              : !> \param matrix ...
   11127              : !> \param unit_nr ...
   11128              : ! **************************************************************************************************
   11129            4 :    SUBROUTINE print_block_sum(matrix, unit_nr)
   11130              :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix
   11131              :       INTEGER, INTENT(IN)                                :: unit_nr
   11132              : 
   11133              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'print_block_sum'
   11134              : 
   11135              :       INTEGER                                            :: col, handle, row
   11136            4 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block
   11137              :       TYPE(dbcsr_iterator_type)                          :: iter
   11138              : 
   11139            4 :       CALL timeset(routineN, handle)
   11140              : 
   11141            4 :       IF (unit_nr > 0) THEN
   11142            4 :          CALL dbcsr_iterator_readonly_start(iter, matrix)
   11143           34 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
   11144           30 :             CALL dbcsr_iterator_next_block(iter, row, col, block)
   11145         2914 :             WRITE (unit_nr, '(I6,I6,ES18.9)') row, col, SUM(block)
   11146              :          END DO
   11147            4 :          CALL dbcsr_iterator_stop(iter)
   11148              :       END IF
   11149              : 
   11150            4 :       CALL timestop(handle)
   11151            4 :    END SUBROUTINE print_block_sum
   11152              : 
   11153              : END MODULE almo_scf_optimizer
   11154              : 
        

Generated by: LCOV version 2.0-1