LCOV - code coverage report
Current view: top level - src - qs_localization_methods.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 81.8 % 1507 1232
Test Date: 2025-12-04 06:27:48 Functions: 69.2 % 26 18

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Localization methods such as 2x2 Jacobi rotations
      10              : !>                                   Steepest Decents
      11              : !>                                   Conjugate Gradient
      12              : !> \par History
      13              : !>      Initial parallellization of jacobi (JVDV 07.2003)
      14              : !>      direct minimization using exponential parametrization (JVDV 09.2003)
      15              : !>      crazy rotations go fast (JVDV 10.2003)
      16              : !> \author CJM (04.2003)
      17              : ! **************************************************************************************************
      18              : MODULE qs_localization_methods
      19              :    USE cell_types,                      ONLY: cell_type
      20              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      21              :    USE cp_cfm_basic_linalg,             ONLY: cp_cfm_column_scale,&
      22              :                                               cp_cfm_rot_cols,&
      23              :                                               cp_cfm_rot_rows,&
      24              :                                               cp_cfm_scale,&
      25              :                                               cp_cfm_scale_and_add,&
      26              :                                               cp_cfm_schur_product,&
      27              :                                               cp_cfm_trace
      28              :    USE cp_cfm_diag,                     ONLY: cp_cfm_heevd
      29              :    USE cp_cfm_types,                    ONLY: &
      30              :         cp_cfm_create, cp_cfm_get_element, cp_cfm_get_info, cp_cfm_get_submatrix, cp_cfm_release, &
      31              :         cp_cfm_set_all, cp_cfm_set_submatrix, cp_cfm_to_cfm, cp_cfm_to_fm, cp_cfm_type, &
      32              :         cp_fm_to_cfm
      33              :    USE cp_dbcsr_api,                    ONLY: dbcsr_p_type
      34              :    USE cp_dbcsr_operations,             ONLY: cp_dbcsr_sm_fm_multiply
      35              :    USE cp_external_control,             ONLY: external_control
      36              :    USE cp_fm_basic_linalg,              ONLY: cp_fm_frobenius_norm,&
      37              :                                               cp_fm_pdgeqpf,&
      38              :                                               cp_fm_pdorgqr,&
      39              :                                               cp_fm_scale,&
      40              :                                               cp_fm_scale_and_add,&
      41              :                                               cp_fm_trace,&
      42              :                                               cp_fm_transpose,&
      43              :                                               cp_fm_triangular_multiply
      44              :    USE cp_fm_cholesky,                  ONLY: cp_fm_cholesky_decompose
      45              :    USE cp_fm_diag,                      ONLY: cp_fm_syevd
      46              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      47              :                                               cp_fm_struct_get,&
      48              :                                               cp_fm_struct_release,&
      49              :                                               cp_fm_struct_type
      50              :    USE cp_fm_types,                     ONLY: &
      51              :         cp_fm_create, cp_fm_get_element, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_init_random, &
      52              :         cp_fm_maxabsrownorm, cp_fm_maxabsval, cp_fm_release, cp_fm_set_all, cp_fm_set_submatrix, &
      53              :         cp_fm_to_fm, cp_fm_to_fm_submat, cp_fm_type
      54              :    USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit,&
      55              :                                               cp_logger_get_default_unit_nr
      56              :    USE kahan_sum,                       ONLY: accurate_sum
      57              :    USE kinds,                           ONLY: dp
      58              :    USE machine,                         ONLY: m_flush,&
      59              :                                               m_walltime
      60              :    USE mathconstants,                   ONLY: pi,&
      61              :                                               twopi,&
      62              :                                               z_zero
      63              :    USE matrix_exp,                      ONLY: exp_pade_real,&
      64              :                                               get_nsquare_norder
      65              :    USE message_passing,                 ONLY: mp_para_env_type
      66              :    USE parallel_gemm_api,               ONLY: parallel_gemm
      67              : #include "./base/base_uses.f90"
      68              : 
      69              :    IMPLICIT NONE
      70              :    PUBLIC :: initialize_weights, crazy_rotations, &
      71              :              direct_mini, rotate_orbitals, approx_l1_norm_sd, jacobi_rotations, scdm_qrfact, zij_matrix, &
      72              :              jacobi_cg_edf_ls
      73              : 
      74              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
      75              : 
      76              :    PRIVATE
      77              : 
      78              :    TYPE set_c_1d_type
      79              :       COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array => NULL()
      80              :    END TYPE set_c_1d_type
      81              : 
      82              :    TYPE set_c_2d_type
      83              :       COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array => NULL()
      84              :    END TYPE set_c_2d_type
      85              : 
      86              : CONTAINS
      87              : ! **************************************************************************************************
      88              : !> \brief ...
      89              : !> \param C ...
      90              : !> \param iterations ...
      91              : !> \param eps ...
      92              : !> \param converged ...
      93              : !> \param sweeps ...
      94              : ! **************************************************************************************************
      95          210 :    SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
      96              :       TYPE(cp_fm_type), INTENT(IN)                       :: C
      97              :       INTEGER, INTENT(IN)                                :: iterations
      98              :       REAL(KIND=dp), INTENT(IN)                          :: eps
      99              :       LOGICAL, INTENT(INOUT)                             :: converged
     100              :       INTEGER, INTENT(INOUT)                             :: sweeps
     101              : 
     102              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'approx_l1_norm_sd'
     103              :       INTEGER, PARAMETER                                 :: taylor_order = 100
     104              :       REAL(KIND=dp), PARAMETER                           :: alpha = 0.1_dp, f2_eps = 0.01_dp
     105              : 
     106              :       INTEGER                                            :: handle, i, istep, k, n, ncol_local, &
     107              :                                                             nrow_local, output_unit, p
     108              :       REAL(KIND=dp)                                      :: expfactor, f2, f2old, gnorm, tnorm
     109              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     110              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_k_k
     111              :       TYPE(cp_fm_type)                                   :: CTmp, G, Gp1, Gp2, U
     112              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     113              : 
     114           30 :       CALL timeset(routineN, handle)
     115              : 
     116           30 :       NULLIFY (context, para_env, fm_struct_k_k)
     117              : 
     118           30 :       output_unit = cp_logger_get_default_io_unit()
     119              : 
     120              :       CALL cp_fm_struct_get(C%matrix_struct, nrow_global=n, ncol_global=k, &
     121              :                             nrow_local=nrow_local, ncol_local=ncol_local, &
     122           30 :                             para_env=para_env, context=context)
     123              :       CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
     124           30 :                                nrow_global=k, ncol_global=k)
     125           30 :       CALL cp_fm_create(CTmp, C%matrix_struct)
     126           30 :       CALL cp_fm_create(U, fm_struct_k_k)
     127           30 :       CALL cp_fm_create(G, fm_struct_k_k)
     128           30 :       CALL cp_fm_create(Gp1, fm_struct_k_k)
     129           30 :       CALL cp_fm_create(Gp2, fm_struct_k_k)
     130              :       !
     131              :       ! printing
     132           30 :       IF (output_unit > 0) THEN
     133           15 :          WRITE (output_unit, '(1X)')
     134           15 :          WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
     135           15 :          WRITE (output_unit, '(A,I5)') '      Nbr iterations =', iterations
     136           15 :          WRITE (output_unit, '(A,E10.2)') '     eps convergence =', eps
     137           15 :          WRITE (output_unit, '(A,I5)') '    Max Taylor order =', taylor_order
     138           15 :          WRITE (output_unit, '(A,E10.2)') '              f2 eps =', f2_eps
     139           15 :          WRITE (output_unit, '(A,E10.2)') '               alpha =', alpha
     140           15 :          WRITE (output_unit, '(A)') '     iteration    approx_l1_norm    g_norm   rel_err'
     141              :       END IF
     142              :       !
     143           30 :       f2old = 0.0_dp
     144           30 :       converged = .FALSE.
     145              :       !
     146              :       ! Start the steepest descent
     147         1480 :       DO istep = 1, iterations
     148              :          !
     149              :          !-------------------------------------------------------------------
     150              :          ! compute f_2
     151              :          ! f_2(x)=(x^2+eps)^1/2
     152         1470 :          f2 = 0.0_dp
     153        22748 :          DO p = 1, ncol_local ! p
     154      1100956 :             DO i = 1, nrow_local ! i
     155      1099486 :                f2 = f2 + SQRT(C%local_data(i, p)**2 + f2_eps)
     156              :             END DO
     157              :          END DO
     158         1470 :          CALL C%matrix_struct%para_env%sum(f2)
     159              :          !write(*,*) 'qs_localize: f_2=',f2
     160              :          !-------------------------------------------------------------------
     161              :          ! compute the derivative of f_2
     162              :          ! f_2(x)=(x^2+eps)^1/2
     163        22748 :          DO p = 1, ncol_local ! p
     164      1100956 :             DO i = 1, nrow_local ! i
     165      1099486 :                CTmp%local_data(i, p) = C%local_data(i, p)/SQRT(C%local_data(i, p)**2 + f2_eps)
     166              :             END DO
     167              :          END DO
     168         1470 :          CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, CTmp, C, 0.0_dp, G)
     169              :          ! antisymmetrize
     170         1470 :          CALL cp_fm_transpose(G, U)
     171         1470 :          CALL cp_fm_scale_and_add(-0.5_dp, G, 0.5_dp, U)
     172              :          !
     173              :          !-------------------------------------------------------------------
     174              :          !
     175         1470 :          gnorm = cp_fm_frobenius_norm(G)
     176              :          !write(*,*) 'qs_localize: norm(G)=',gnorm
     177              :          !
     178              :          ! rescale for steepest descent
     179         1470 :          CALL cp_fm_scale(-alpha, G)
     180              :          !
     181              :          ! compute unitary transform
     182              :          ! zeroth order
     183         1470 :          CALL cp_fm_set_all(U, 0.0_dp, 1.0_dp)
     184              :          ! first order
     185         1470 :          expfactor = 1.0_dp
     186         1470 :          CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, G)
     187         1470 :          tnorm = cp_fm_frobenius_norm(G)
     188              :          !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
     189         1470 :          IF (tnorm > 1.0E-10_dp) THEN
     190              :             ! other orders
     191         1470 :             CALL cp_fm_to_fm(G, Gp1)
     192         4866 :             DO i = 2, taylor_order
     193              :                ! new power of G
     194         4866 :                CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, G, Gp1, 0.0_dp, Gp2)
     195         4866 :                CALL cp_fm_to_fm(Gp2, Gp1)
     196              :                ! add to the taylor expansion so far
     197         4866 :                expfactor = expfactor/REAL(i, KIND=dp)
     198         4866 :                CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, Gp1)
     199         4866 :                tnorm = cp_fm_frobenius_norm(Gp1)
     200              :                !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
     201         4866 :                IF (tnorm*expfactor < 1.0E-10_dp) EXIT
     202              :             END DO
     203              :          END IF
     204              :          !
     205              :          ! incrementaly rotate the MOs
     206         1470 :          CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, C, U, 0.0_dp, CTmp)
     207         1470 :          CALL cp_fm_to_fm(CTmp, C)
     208              :          !
     209              :          ! printing
     210         1470 :          IF (output_unit > 0) THEN
     211          735 :             WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, ABS((f2 - f2old)/f2)
     212              :          END IF
     213              :          !
     214              :          ! Are we done?
     215         1470 :          sweeps = istep
     216              :          !IF(gnorm<=grad_thresh.AND.ABS((f2-f2old)/f2)<=f2_thresh.AND.istep>1) THEN
     217         1470 :          IF (ABS((f2 - f2old)/f2) <= eps .AND. istep > 1) THEN
     218           20 :             converged = .TRUE.
     219           20 :             EXIT
     220              :          END IF
     221         1460 :          f2old = f2
     222              :       END DO
     223              :       !
     224              :       ! here we should do one refine step to enforce C'*S*C=1 for any case
     225              :       !
     226              :       ! Print the final result
     227           30 :       IF (output_unit > 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
     228              :       !
     229              :       ! sparsity
     230              :       !DO i=1,size(thresh,1)
     231              :       !   gnorm = 0.0_dp
     232              :       !   DO o=1,ncol_local
     233              :       !      DO p=1,nrow_local
     234              :       !         IF(ABS(C%local_data(p,o))>thresh(i)) THEN
     235              :       !            gnorm = gnorm + 1.0_dp
     236              :       !         ENDIF
     237              :       !      ENDDO
     238              :       !   ENDDO
     239              :       !   CALL C%matrix_struct%para_env%sum(gnorm)
     240              :       !   IF(output_unit>0) THEN
     241              :       !      WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
     242              :       !   ENDIF
     243              :       !ENDDO
     244              :       !
     245              :       ! deallocate
     246           30 :       CALL cp_fm_struct_release(fm_struct_k_k)
     247           30 :       CALL cp_fm_release(CTmp)
     248           30 :       CALL cp_fm_release(U)
     249           30 :       CALL cp_fm_release(G)
     250           30 :       CALL cp_fm_release(Gp1)
     251           30 :       CALL cp_fm_release(Gp2)
     252              : 
     253           30 :       CALL timestop(handle)
     254              : 
     255           30 :    END SUBROUTINE approx_l1_norm_sd
     256              : ! **************************************************************************************************
     257              : !> \brief ...
     258              : !> \param cell ...
     259              : !> \param weights ...
     260              : ! **************************************************************************************************
     261          476 :    SUBROUTINE initialize_weights(cell, weights)
     262              : 
     263              :       TYPE(cell_type), POINTER                           :: cell
     264              :       REAL(KIND=dp), DIMENSION(:)                        :: weights
     265              : 
     266              :       REAL(KIND=dp), DIMENSION(3, 3)                     :: metric
     267              : 
     268          476 :       CPASSERT(ASSOCIATED(cell))
     269              : 
     270          476 :       metric = 0.0_dp
     271          476 :       CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
     272              : 
     273          476 :       weights(1) = METRIC(1, 1) - METRIC(1, 2) - METRIC(1, 3)
     274          476 :       weights(2) = METRIC(2, 2) - METRIC(1, 2) - METRIC(2, 3)
     275          476 :       weights(3) = METRIC(3, 3) - METRIC(1, 3) - METRIC(2, 3)
     276          476 :       weights(4) = METRIC(1, 2)
     277          476 :       weights(5) = METRIC(1, 3)
     278          476 :       weights(6) = METRIC(2, 3)
     279              : 
     280          476 :    END SUBROUTINE initialize_weights
     281              : 
     282              : ! **************************************************************************************************
     283              : !> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
     284              : !>        can deal with serial para_envs.
     285              : !> \param weights ...
     286              : !> \param zij ...
     287              : !> \param vectors ...
     288              : !> \param para_env ...
     289              : !> \param max_iter ...
     290              : !> \param eps_localization ...
     291              : !> \param sweeps ...
     292              : !> \param out_each ...
     293              : !> \param target_time ...
     294              : !> \param start_time ...
     295              : !> \param restricted ...
     296              : !> \par History
     297              : !> \author Joost VandeVondele (02.2010)
     298              : ! **************************************************************************************************
     299          390 :    SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
     300              :                                eps_localization, sweeps, out_each, target_time, start_time, restricted)
     301              : 
     302              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     303              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
     304              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     305              :       INTEGER, INTENT(IN)                                :: max_iter
     306              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
     307              :       INTEGER                                            :: sweeps
     308              :       INTEGER, INTENT(IN)                                :: out_each
     309              :       REAL(dp)                                           :: target_time, start_time
     310              :       INTEGER                                            :: restricted
     311              : 
     312          390 :       IF (para_env%num_pe == 1) THEN
     313              :          CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
     314            0 :                                       sweeps, out_each, restricted=restricted)
     315              :       ELSE
     316              :          CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
     317          390 :                               sweeps, out_each, target_time, start_time, restricted=restricted)
     318              :       END IF
     319              : 
     320          390 :    END SUBROUTINE jacobi_rotations
     321              : 
     322              : ! **************************************************************************************************
     323              : !> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
     324              : !>        while the routine below works in parallel, it is too slow to be useful
     325              : !> \param weights ...
     326              : !> \param zij ...
     327              : !> \param vectors ...
     328              : !> \param max_iter ...
     329              : !> \param eps_localization ...
     330              : !> \param sweeps ...
     331              : !> \param out_each ...
     332              : !> \param restricted ...
     333              : ! **************************************************************************************************
     334            0 :    SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
     335              :                                       out_each, restricted)
     336              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     337              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
     338              :       INTEGER, INTENT(IN)                                :: max_iter
     339              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
     340              :       INTEGER                                            :: sweeps
     341              :       INTEGER, INTENT(IN)                                :: out_each
     342              :       INTEGER                                            :: restricted
     343              : 
     344              :       CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial'
     345              : 
     346            0 :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
     347              :       INTEGER                                            :: dim2, handle, idim, istate, jstate, &
     348              :                                                             nstate, unit_nr
     349              :       REAL(KIND=dp)                                      :: ct, st, t1, t2, theta, tolerance
     350              :       TYPE(cp_cfm_type)                                  :: c_rmat
     351              :       TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:)       :: c_zij
     352              :       TYPE(cp_fm_type)                                   :: rmat
     353              : 
     354            0 :       CALL timeset(routineN, handle)
     355              : 
     356            0 :       dim2 = SIZE(zij, 2)
     357            0 :       ALLOCATE (c_zij(dim2))
     358              :       NULLIFY (mii, mij, mjj)
     359            0 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
     360              : 
     361            0 :       CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
     362            0 :       CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
     363              : 
     364            0 :       CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
     365            0 :       CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
     366            0 :       DO idim = 1, dim2
     367            0 :          CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
     368              :          c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
     369            0 :                                         zij(2, idim)%local_data, dp)
     370              :       END DO
     371              : 
     372            0 :       CALL cp_fm_get_info(rmat, nrow_global=nstate)
     373            0 :       tolerance = 1.0e10_dp
     374              : 
     375            0 :       sweeps = 0
     376            0 :       unit_nr = -1
     377            0 :       IF (rmat%matrix_struct%para_env%is_source()) THEN
     378            0 :          unit_nr = cp_logger_get_default_unit_nr()
     379            0 :          WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
     380              :       END IF
     381              : 
     382            0 :       IF (restricted > 0) THEN
     383            0 :          unit_nr = cp_logger_get_default_unit_nr()
     384            0 :          WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
     385            0 :          nstate = nstate - restricted
     386              :       END IF
     387              : 
     388              :       ! do jacobi sweeps until converged
     389            0 :       DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
     390            0 :          sweeps = sweeps + 1
     391            0 :          t1 = m_walltime()
     392              : 
     393            0 :          DO istate = 1, nstate
     394            0 :             DO jstate = istate + 1, nstate
     395            0 :                DO idim = 1, dim2
     396            0 :                   CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
     397            0 :                   CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
     398            0 :                   CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
     399              :                END DO
     400            0 :                CALL get_angle(mii, mjj, mij, weights, theta)
     401            0 :                st = SIN(theta)
     402            0 :                ct = COS(theta)
     403            0 :                CALL rotate_zij(istate, jstate, st, ct, c_zij)
     404              : 
     405            0 :                CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
     406              :             END DO
     407              :          END DO
     408              : 
     409            0 :          CALL check_tolerance(c_zij, weights, tolerance)
     410              : 
     411            0 :          t2 = m_walltime()
     412            0 :          IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
     413              :             WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
     414            0 :                "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
     415            0 :             CALL m_flush(unit_nr)
     416              :          END IF
     417              : 
     418              :       END DO
     419              : 
     420            0 :       DO idim = 1, dim2
     421            0 :          zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
     422            0 :          zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
     423            0 :          CALL cp_cfm_release(c_zij(idim))
     424              :       END DO
     425            0 :       DEALLOCATE (c_zij)
     426            0 :       DEALLOCATE (mii, mij, mjj)
     427            0 :       rmat%local_data = REAL(c_rmat%local_data, dp)
     428            0 :       CALL cp_cfm_release(c_rmat)
     429            0 :       CALL rotate_orbitals(rmat, vectors)
     430            0 :       CALL cp_fm_release(rmat)
     431              : 
     432            0 :       CALL timestop(handle)
     433              : 
     434            0 :    END SUBROUTINE jacobi_rotations_serial
     435              : ! **************************************************************************************************
     436              : !> \brief very similar to jacobi_rotations_serial with some extra output options
     437              : !> \param weights ...
     438              : !> \param c_zij ...
     439              : !> \param max_iter ...
     440              : !> \param c_rmat ...
     441              : !> \param eps_localization ...
     442              : !> \param tol_out ...
     443              : !> \param jsweeps ...
     444              : !> \param out_each ...
     445              : !> \param c_zij_out ...
     446              : !> \param grad_final ...
     447              : ! **************************************************************************************************
     448            0 :    SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
     449            0 :                                         tol_out, jsweeps, out_each, c_zij_out, grad_final)
     450              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     451              :       TYPE(cp_cfm_type), INTENT(IN)                      :: c_zij(:)
     452              :       INTEGER, INTENT(IN)                                :: max_iter
     453              :       TYPE(cp_cfm_type), INTENT(IN)                      :: c_rmat
     454              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: eps_localization
     455              :       REAL(KIND=dp), INTENT(OUT), OPTIONAL               :: tol_out
     456              :       INTEGER, INTENT(OUT), OPTIONAL                     :: jsweeps
     457              :       INTEGER, INTENT(IN), OPTIONAL                      :: out_each
     458              :       TYPE(cp_cfm_type), INTENT(IN), OPTIONAL            :: c_zij_out(:)
     459              :       TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER   :: grad_final
     460              : 
     461              :       CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial_1'
     462              : 
     463              :       COMPLEX(KIND=dp)                                   :: mzii
     464              :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
     465              :       INTEGER                                            :: dim2, handle, idim, istate, jstate, &
     466              :                                                             nstate, sweeps, unit_nr
     467              :       REAL(KIND=dp)                                      :: alpha, avg_spread_ii, ct, spread_ii, st, &
     468              :                                                             sum_spread_ii, t1, t2, theta, tolerance
     469              :       TYPE(cp_cfm_type)                                  :: c_rmat_local
     470              :       TYPE(cp_cfm_type), ALLOCATABLE                     :: c_zij_local(:)
     471              : 
     472            0 :       CALL timeset(routineN, handle)
     473              : 
     474            0 :       dim2 = SIZE(c_zij)
     475              :       NULLIFY (mii, mij, mjj)
     476            0 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
     477              : 
     478            0 :       ALLOCATE (c_zij_local(dim2))
     479            0 :       CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
     480            0 :       CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
     481            0 :       DO idim = 1, dim2
     482            0 :          CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
     483            0 :          c_zij_local(idim)%local_data = c_zij(idim)%local_data
     484              :       END DO
     485              : 
     486            0 :       CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
     487            0 :       tolerance = 1.0e10_dp
     488              : 
     489            0 :       IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
     490              : 
     491            0 :       sweeps = 0
     492            0 :       IF (PRESENT(out_each)) THEN
     493            0 :          unit_nr = -1
     494            0 :          IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
     495            0 :             unit_nr = cp_logger_get_default_unit_nr()
     496              :          END IF
     497            0 :          alpha = 0.0_dp
     498            0 :          DO idim = 1, dim2
     499            0 :             alpha = alpha + weights(idim)
     500              :          END DO
     501              :       END IF
     502              : 
     503              :       ! do jacobi sweeps until converged
     504            0 :       DO WHILE (sweeps < max_iter)
     505            0 :          sweeps = sweeps + 1
     506            0 :          IF (PRESENT(eps_localization)) THEN
     507            0 :             IF (tolerance < eps_localization) EXIT
     508              :          END IF
     509            0 :          IF (PRESENT(out_each)) t1 = m_walltime()
     510              : 
     511            0 :          DO istate = 1, nstate
     512            0 :             DO jstate = istate + 1, nstate
     513            0 :                DO idim = 1, dim2
     514            0 :                   CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
     515            0 :                   CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
     516            0 :                   CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
     517              :                END DO
     518            0 :                CALL get_angle(mii, mjj, mij, weights, theta)
     519            0 :                st = SIN(theta)
     520            0 :                ct = COS(theta)
     521            0 :                CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
     522              : 
     523            0 :                CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
     524              :             END DO
     525              :          END DO
     526              : 
     527            0 :          IF (PRESENT(grad_final)) THEN
     528            0 :             CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
     529              :          ELSE
     530            0 :             CALL check_tolerance(c_zij_local, weights, tolerance)
     531              :          END IF
     532            0 :          IF (PRESENT(tol_out)) tol_out = tolerance
     533              : 
     534            0 :          IF (PRESENT(out_each)) THEN
     535            0 :             t2 = m_walltime()
     536            0 :             IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
     537            0 :                sum_spread_ii = 0.0_dp
     538            0 :                DO istate = 1, nstate
     539              :                   spread_ii = 0.0_dp
     540            0 :                   DO idim = 1, dim2
     541            0 :                      CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
     542              :                      spread_ii = spread_ii + weights(idim)* &
     543            0 :                                  ABS(mzii)**2/twopi/twopi
     544              :                   END DO
     545            0 :                   sum_spread_ii = sum_spread_ii + spread_ii
     546              :                END DO
     547            0 :                sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
     548            0 :                avg_spread_ii = sum_spread_ii/nstate
     549              :                WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
     550            0 :                   "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
     551              :                WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
     552            0 :                   sweeps, avg_spread_ii, tolerance, t2 - t1
     553            0 :                CALL m_flush(unit_nr)
     554              :             END IF
     555            0 :             IF (PRESENT(jsweeps)) jsweeps = sweeps
     556              :          END IF
     557              : 
     558              :       END DO
     559              : 
     560            0 :       IF (PRESENT(c_zij_out)) THEN
     561            0 :          DO idim = 1, dim2
     562            0 :             CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
     563              :          END DO
     564              :       END IF
     565            0 :       CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
     566              : 
     567            0 :       DEALLOCATE (mii, mij, mjj)
     568            0 :       DO idim = 1, dim2
     569            0 :          CALL cp_cfm_release(c_zij_local(idim))
     570              :       END DO
     571            0 :       DEALLOCATE (c_zij_local)
     572            0 :       CALL cp_cfm_release(c_rmat_local)
     573              : 
     574            0 :       CALL timestop(handle)
     575              : 
     576            0 :    END SUBROUTINE jacobi_rotations_serial_1
     577              : ! **************************************************************************************************
     578              : !> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
     579              : !>        for partially occupied wannier functions
     580              : !> \param para_env ...
     581              : !> \param weights ...
     582              : !> \param zij ...
     583              : !> \param vectors ...
     584              : !> \param max_iter ...
     585              : !> \param eps_localization ...
     586              : !> \param iter ...
     587              : !> \param out_each ...
     588              : !> \param nextra ...
     589              : !> \param do_cg ...
     590              : !> \param nmo ...
     591              : !> \param vectors_2 ...
     592              : !> \param mos_guess ...
     593              : ! **************************************************************************************************
     594            2 :    SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
     595              :                                iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
     596              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     597              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     598              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
     599              :       INTEGER, INTENT(IN)                                :: max_iter
     600              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
     601              :       INTEGER                                            :: iter
     602              :       INTEGER, INTENT(IN)                                :: out_each, nextra
     603              :       LOGICAL, INTENT(IN)                                :: do_cg
     604              :       INTEGER, INTENT(IN), OPTIONAL                      :: nmo
     605              :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: vectors_2, mos_guess
     606              : 
     607              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'jacobi_cg_edf_ls'
     608              :       COMPLEX(KIND=dp), PARAMETER                        :: cone = (1.0_dp, 0.0_dp), &
     609              :                                                             czero = (0.0_dp, 0.0_dp)
     610              :       REAL(KIND=dp), PARAMETER                           :: gold_sec = 0.3819_dp
     611              : 
     612              :       COMPLEX(KIND=dp)                                   :: cnorm2_Gct, cnorm2_Gct_cross, mzii
     613            2 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :)     :: tmp_cmat
     614            2 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: arr_zii
     615            2 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: matrix_zii
     616              :       INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
     617              :          lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
     618              :       INTEGER, DIMENSION(1)                              :: iloc
     619              :       LOGICAL                                            :: do_cinit_mo, do_cinit_random, &
     620              :                                                             do_U_guess_mo, new_direction
     621              :       REAL(KIND=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_Gct, &
     622              :          norm2_Gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
     623            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: sum_spread
     624            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: tmp_mat, tmp_mat_1
     625              :       REAL(KIND=dp), DIMENSION(50)                       :: energy, pos
     626            2 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tmp_arr
     627              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     628              :       TYPE(cp_cfm_type)                                  :: c_tilde, ctrans_lambda, Gct_old, &
     629              :                                                             grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
     630              :                                                             tmp_cfm_2, U, UL, V, VL, zdiag
     631            2 :       TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:)       :: c_zij, zij_0
     632              :       TYPE(cp_fm_struct_type), POINTER                   :: tmp_fm_struct
     633              :       TYPE(cp_fm_type)                                   :: id_nextra, matrix_U, matrix_V, &
     634              :                                                             matrix_V_all, rmat, tmp_fm, vectors_all
     635              : 
     636            2 :       CALL timeset(routineN, handle)
     637              : 
     638            2 :       dim2 = SIZE(zij, 2)
     639            2 :       NULLIFY (context)
     640            2 :       NULLIFY (matrix_zii, arr_zii)
     641            2 :       NULLIFY (tmp_fm_struct)
     642            2 :       NULLIFY (tmp_arr)
     643              : 
     644           12 :       ALLOCATE (c_zij(dim2))
     645              : 
     646            2 :       CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
     647              : 
     648            6 :       ALLOCATE (sum_spread(nstate))
     649            8 :       ALLOCATE (matrix_zii(nstate, dim2))
     650          266 :       matrix_zii = czero
     651           88 :       sum_spread = 0.0_dp
     652              : 
     653              :       alpha = 0.0_dp
     654            8 :       DO idim = 1, dim2
     655            6 :          alpha = alpha + weights(idim)
     656            6 :          CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
     657              :          c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
     658         5813 :                                         zij(2, idim)%local_data, dp)
     659              :       END DO
     660              : 
     661           10 :       ALLOCATE (zij_0(dim2))
     662              : 
     663            2 :       CALL cp_cfm_create(U, zij(1, 1)%matrix_struct)
     664            2 :       CALL cp_fm_create(matrix_U, zij(1, 1)%matrix_struct)
     665              : 
     666            2 :       CALL cp_cfm_set_all(U, czero, cone)
     667            2 :       CALL cp_fm_set_all(matrix_U, 0.0_dp, 1.0_dp)
     668              : 
     669            2 :       CALL cp_fm_get_info(vectors, nrow_global=nao)
     670            2 :       IF (nextra > 0) THEN
     671            2 :          IF (PRESENT(mos_guess)) THEN
     672            2 :             do_cinit_random = .FALSE.
     673            2 :             do_cinit_mo = .TRUE.
     674            2 :             CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
     675              :          ELSE
     676            0 :             do_cinit_random = .TRUE.
     677            0 :             do_cinit_mo = .FALSE.
     678            0 :             ndummy = nstate
     679              :          END IF
     680              : 
     681              :          IF (do_cinit_random) THEN
     682              :             icinit = 1
     683              :             do_U_guess_mo = .FALSE.
     684              :          ELSEIF (do_cinit_mo) THEN
     685            2 :             icinit = 2
     686            2 :             do_U_guess_mo = .TRUE.
     687              :          END IF
     688              : 
     689            2 :          nocc = nstate - nextra
     690            2 :          northo = nmo - nocc
     691            2 :          norextra = nmo - nstate
     692            2 :          CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
     693              : 
     694            8 :          ALLOCATE (tmp_cmat(nstate, nstate))
     695              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
     696            2 :                                   para_env=para_env, context=context)
     697            8 :          DO idim = 1, dim2
     698            6 :             CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
     699            6 :             CALL cp_cfm_set_all(zij_0(idim), czero, cone)
     700            6 :             CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
     701            8 :             CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
     702              :          END DO
     703            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     704            2 :          DEALLOCATE (tmp_cmat)
     705              : 
     706              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
     707            2 :                                   para_env=para_env, context=context)
     708            2 :          CALL cp_cfm_create(V, tmp_fm_struct)
     709            2 :          CALL cp_fm_create(matrix_V, tmp_fm_struct)
     710            2 :          CALL cp_cfm_create(zdiag, tmp_fm_struct)
     711            2 :          CALL cp_fm_create(rmat, tmp_fm_struct)
     712            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     713            2 :          CALL cp_cfm_set_all(V, czero, cone)
     714            2 :          CALL cp_fm_set_all(matrix_V, 0.0_dp, 1.0_dp)
     715              : 
     716              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
     717            2 :                                   para_env=para_env, context=context)
     718            2 :          CALL cp_fm_create(matrix_V_all, tmp_fm_struct)
     719            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     720            2 :          CALL cp_fm_set_all(matrix_V_all, 0._dp, 1._dp)
     721              : 
     722            6 :          ALLOCATE (arr_zii(nstate))
     723              : 
     724              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
     725            2 :                                   para_env=para_env, context=context)
     726            2 :          CALL cp_cfm_create(c_tilde, tmp_fm_struct)
     727            2 :          CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
     728            2 :          CALL cp_cfm_create(Gct_old, tmp_fm_struct)
     729            2 :          CALL cp_cfm_create(skc, tmp_fm_struct)
     730            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     731            2 :          CALL cp_cfm_set_all(c_tilde, czero)
     732            2 :          CALL cp_cfm_set_all(Gct_old, czero)
     733            2 :          CALL cp_cfm_set_all(skc, czero)
     734              : 
     735              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
     736            2 :                                   para_env=para_env, context=context)
     737            2 :          CALL cp_cfm_create(VL, tmp_fm_struct)
     738            2 :          CALL cp_cfm_set_all(VL, czero)
     739            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     740              : 
     741              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
     742            2 :                                   para_env=para_env, context=context)
     743            2 :          CALL cp_fm_create(id_nextra, tmp_fm_struct)
     744            2 :          CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
     745            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     746            2 :          CALL cp_cfm_set_all(ctrans_lambda, czero)
     747            2 :          CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
     748              : 
     749              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
     750            2 :                                   para_env=para_env, context=context)
     751            2 :          CALL cp_cfm_create(UL, tmp_fm_struct)
     752            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     753            2 :          CALL cp_cfm_set_all(UL, czero)
     754              : 
     755              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
     756            2 :                                   para_env=para_env, context=context)
     757            2 :          CALL cp_fm_create(vectors_all, tmp_fm_struct)
     758            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     759            8 :          ALLOCATE (tmp_mat(nao, nstate))
     760            2 :          CALL cp_fm_get_submatrix(vectors, tmp_mat)
     761            2 :          CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
     762            2 :          DEALLOCATE (tmp_mat)
     763            8 :          ALLOCATE (tmp_mat(nao, norextra))
     764            2 :          CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
     765            2 :          CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
     766            2 :          DEALLOCATE (tmp_mat)
     767              : 
     768              :          ! initialize c_tilde
     769           14 :          SELECT CASE (icinit)
     770              :          CASE (1) ! random coefficients
     771              :             !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
     772            0 :             CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
     773            0 :             CALL cp_fm_init_random(tmp_fm, nextra)
     774            0 :             CALL ortho_vectors(tmp_fm)
     775            0 :             c_tilde%local_data = tmp_fm%local_data
     776            0 :             CALL cp_fm_release(tmp_fm)
     777            0 :             ALLOCATE (tmp_cmat(northo, nextra))
     778            0 :             CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
     779            0 :             CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
     780            0 :             DEALLOCATE (tmp_cmat)
     781              :          CASE (2) ! MO based coeffs
     782            2 :             CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_V_all)
     783            6 :             ALLOCATE (tmp_arr(nmo))
     784            8 :             ALLOCATE (tmp_mat(nmo, ndummy))
     785            8 :             ALLOCATE (tmp_mat_1(nmo, nstate))
     786              :             ! normalize matrix_V_all
     787            2 :             CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat)
     788           88 :             DO istate = 1, ndummy
     789         4558 :                tmp_arr(:) = tmp_mat(:, istate)
     790         4558 :                norm = norm2(tmp_arr)
     791         4558 :                tmp_arr(:) = tmp_arr(:)/norm
     792         4560 :                tmp_mat(:, istate) = tmp_arr(:)
     793              :             END DO
     794            2 :             CALL cp_fm_set_submatrix(matrix_V_all, tmp_mat)
     795            2 :             CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat_1, 1, 1, nmo, nstate)
     796            2 :             CALL cp_fm_set_submatrix(matrix_V, tmp_mat_1)
     797            2 :             DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
     798            2 :             CALL cp_fm_to_cfm(msourcer=matrix_V, mtarget=V)
     799            8 :             ALLOCATE (tmp_mat(northo, ndummy))
     800            8 :             ALLOCATE (tmp_mat_1(northo, nextra))
     801            2 :             CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat, nocc + 1, 1, northo, ndummy)
     802            6 :             ALLOCATE (tmp_arr(ndummy))
     803           88 :             tmp_arr = 0.0_dp
     804           88 :             DO istate = 1, ndummy
     805         1034 :                tmp_arr(istate) = norm2(tmp_mat(:, istate))
     806              :             END DO
     807              :             ! find edfs
     808            6 :             DO istate = 1, nextra
     809          180 :                iloc = MAXLOC(tmp_arr)
     810           48 :                tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
     811            6 :                tmp_arr(iloc(1)) = 0.0_dp
     812              :             END DO
     813              : 
     814            2 :             DEALLOCATE (tmp_arr, tmp_mat)
     815              : 
     816              :             CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
     817            2 :                                      para_env=para_env, context=context)
     818            2 :             CALL cp_fm_create(tmp_fm, tmp_fm_struct)
     819            2 :             CALL cp_fm_struct_release(tmp_fm_struct)
     820            2 :             CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
     821            2 :             DEALLOCATE (tmp_mat_1)
     822            2 :             CALL ortho_vectors(tmp_fm)
     823            2 :             CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
     824            2 :             CALL cp_fm_release(tmp_fm)
     825              :             ! initialize U
     826            2 :             IF (do_U_guess_mo) THEN
     827            8 :                ALLOCATE (tmp_cmat(nocc, nstate))
     828            2 :                CALL cp_cfm_get_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
     829            2 :                CALL cp_cfm_set_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
     830            2 :                DEALLOCATE (tmp_cmat)
     831            8 :                ALLOCATE (tmp_cmat(northo, nstate))
     832            2 :                CALL cp_cfm_get_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
     833            2 :                CALL cp_cfm_set_submatrix(VL, tmp_cmat, 1, 1, northo, nstate)
     834            2 :                DEALLOCATE (tmp_cmat)
     835            2 :                CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, VL, czero, UL)
     836            8 :                ALLOCATE (tmp_cmat(nextra, nstate))
     837            2 :                CALL cp_cfm_get_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
     838            2 :                CALL cp_cfm_set_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
     839            2 :                DEALLOCATE (tmp_cmat)
     840            2 :                CALL cp_fm_create(tmp_fm, U%matrix_struct)
     841         1937 :                tmp_fm%local_data = REAL(U%local_data, KIND=dp)
     842            2 :                CALL ortho_vectors(tmp_fm)
     843            2 :                CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=U)
     844            2 :                CALL cp_fm_release(tmp_fm)
     845            4 :                CALL cp_cfm_to_fm(U, matrix_U)
     846              :             END IF
     847              :             ! reevaluate V
     848            8 :             ALLOCATE (tmp_cmat(nocc, nstate))
     849            2 :             CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
     850            2 :             CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
     851            2 :             DEALLOCATE (tmp_cmat)
     852            8 :             ALLOCATE (tmp_cmat(nextra, nstate))
     853            2 :             CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
     854            2 :             CALL cp_cfm_set_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
     855            2 :             DEALLOCATE (tmp_cmat)
     856            2 :             CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
     857            8 :             ALLOCATE (tmp_cmat(northo, nstate))
     858            2 :             CALL cp_cfm_get_submatrix(VL, tmp_cmat)
     859            2 :             CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
     860            6 :             DEALLOCATE (tmp_cmat)
     861              :          END SELECT
     862              :       ELSE
     863            0 :          DO idim = 1, dim2
     864            0 :             CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
     865            0 :             CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
     866              :          END DO
     867            0 :          CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
     868            0 :          CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
     869              :       END IF
     870              : 
     871            2 :       unit_nr = -1
     872            2 :       IF (rmat%matrix_struct%para_env%is_source()) THEN
     873            1 :          unit_nr = cp_logger_get_default_unit_nr()
     874            1 :          WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
     875              :       END IF
     876              : 
     877            2 :       norm2_old = 1.0E30_dp
     878            2 :       ds_min = 1.0_dp
     879            2 :       new_direction = .TRUE.
     880            2 :       iter = 0
     881            2 :       line_searches = 0
     882            2 :       line_search_count = 0
     883            2 :       tol = 1.0E+20_dp
     884            2 :       mintol = 1.0E+10_dp
     885            2 :       miniter = 0
     886              : 
     887              :       !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
     888              :       !                            do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
     889              : 
     890              :       ! do conjugate gradient until converged
     891           34 :       DO WHILE (iter < max_iter)
     892           34 :          iter = iter + 1
     893              :          !WRITE(*,*) 'iter = ', iter
     894           34 :          t1 = m_walltime()
     895              : 
     896           34 :          IF (iter > 1) THEN
     897              :             ! comput U
     898           32 :             CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
     899           32 :             CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
     900           32 :             IF (para_env%num_pe == 1) THEN
     901            0 :                CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
     902              :             ELSE
     903           32 :                CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
     904              :             END IF
     905           32 :             CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, U, tmp_cfm_2, czero, tmp_cfm)
     906           32 :             CALL cp_cfm_to_cfm(tmp_cfm, U)
     907           32 :             CALL cp_cfm_release(tmp_cfm)
     908           32 :             CALL cp_cfm_release(tmp_cfm_2)
     909              :          END IF
     910              : 
     911           34 :          IF (nextra > 0) THEN
     912          136 :             ALLOCATE (tmp_cmat(nextra, nstate))
     913           34 :             CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
     914           34 :             CALL cp_cfm_set_submatrix(UL, tmp_cmat)
     915           34 :             DEALLOCATE (tmp_cmat)
     916           34 :             IF (iter > 1) THEN
     917              :                ! orthonormalize c_tilde
     918           32 :                CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
     919          448 :                tmp_fm%local_data = REAL(c_tilde%local_data, KIND=dp)
     920           32 :                CALL ortho_vectors(tmp_fm)
     921           32 :                CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
     922           32 :                CALL cp_fm_release(tmp_fm)
     923              : 
     924          128 :                ALLOCATE (tmp_cmat(nocc, nstate))
     925           32 :                CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
     926           32 :                CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
     927           32 :                DEALLOCATE (tmp_cmat)
     928           32 :                CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
     929          128 :                ALLOCATE (tmp_cmat(northo, nstate))
     930           32 :                CALL cp_cfm_get_submatrix(VL, tmp_cmat)
     931           32 :                CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
     932           64 :                DEALLOCATE (tmp_cmat)
     933              :             END IF
     934              : 
     935              :             ! reset if new_direction
     936           34 :             IF (new_direction .AND. MOD(line_searches, 20) == 5) THEN
     937            0 :                CALL cp_cfm_set_all(skc, czero)
     938            0 :                CALL cp_cfm_set_all(Gct_old, czero)
     939            0 :                norm2_old = 1.0E30_dp
     940              :             END IF
     941              : 
     942           34 :             CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
     943           34 :             CALL cp_cfm_to_cfm(V, tmp_cfm)
     944           34 :             CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
     945           68 :             ndummy = nmo
     946              :          ELSE
     947            0 :             CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
     948            0 :             CALL cp_cfm_to_cfm(U, tmp_cfm)
     949            0 :             CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
     950            0 :             ndummy = nstate
     951              :          END IF
     952              :          ! update z_ij
     953          136 :          DO idim = 1, dim2
     954              :             ! 'tmp_cfm_1 = zij_0*tmp_cfm'
     955              :             CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
     956          102 :                                tmp_cfm, czero, tmp_cfm_1)
     957              :             ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
     958              :             CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
     959          136 :                                czero, c_zij(idim))
     960              :          END DO
     961           34 :          CALL cp_cfm_release(tmp_cfm)
     962           34 :          CALL cp_cfm_release(tmp_cfm_1)
     963              :          ! compute spread
     964         1496 :          DO istate = 1, nstate
     965         1462 :             spread_ii = 0.0_dp
     966         5848 :             DO idim = 1, dim2
     967         4386 :                CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
     968              :                spread_ii = spread_ii + weights(idim)* &
     969         4386 :                            ABS(mzii)**2/twopi/twopi
     970         5848 :                matrix_zii(istate, idim) = mzii
     971              :             END DO
     972              :             !WRITE(*,*) 'spread_ii', spread_ii
     973         1496 :             sum_spread(istate) = spread_ii
     974              :          END DO
     975           34 :          CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
     976           34 :          spread_sum = accurate_sum(sum_spread)
     977              : 
     978           34 :          IF (nextra > 0) THEN
     979              :             ! update c_tilde
     980           34 :             CALL cp_cfm_set_all(zdiag, czero)
     981           34 :             CALL cp_cfm_set_all(grad_ctilde, czero)
     982           34 :             CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
     983           34 :             CALL cp_cfm_set_all(tmp_cfm, czero)
     984           34 :             CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
     985           34 :             CALL cp_cfm_set_all(tmp_cfm_1, czero)
     986          136 :             ALLOCATE (tmp_cmat(northo, nstate))
     987          136 :             DO idim = 1, dim2
     988          102 :                weight = weights(idim)
     989         8874 :                arr_zii = matrix_zii(:, idim)
     990              :                ! tmp_cfm = zij_0*V
     991              :                CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
     992          102 :                                   zij_0(idim), V, czero, tmp_cfm)
     993              :                ! tmp_cfm = tmp_cfm*diag_zij_dagg
     994         4488 :                CALL cp_cfm_column_scale(tmp_cfm, CONJG(arr_zii))
     995              :                ! tmp_cfm_1 = tmp_cfm*U_dagg
     996              :                CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
     997          102 :                                   U, czero, tmp_cfm_1)
     998          102 :                CALL cp_cfm_scale(weight, tmp_cfm_1)
     999              :                ! zdiag = zdiag + tmp_cfm_1'
    1000          102 :                CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
    1001              : 
    1002              :                ! tmp_cfm = zij_0_dagg*V
    1003              :                CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
    1004          102 :                                   zij_0(idim), V, czero, tmp_cfm)
    1005              : 
    1006              :                ! tmp_cfm = tmp_cfm*diag_zij
    1007          102 :                CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
    1008              :                ! tmp_cfm_1 = tmp_cfm*U_dagg
    1009              :                CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
    1010          102 :                                   U, czero, tmp_cfm_1)
    1011          102 :                CALL cp_cfm_scale(weight, tmp_cfm_1)
    1012              :                ! zdiag = zdiag + tmp_cfm_1'
    1013          136 :                CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
    1014              :             END DO ! idim
    1015           34 :             CALL cp_cfm_release(tmp_cfm)
    1016           34 :             CALL cp_cfm_release(tmp_cfm_1)
    1017           34 :             DEALLOCATE (tmp_cmat)
    1018          136 :             ALLOCATE (tmp_cmat(northo, nextra))
    1019              :             CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
    1020           34 :                                       northo, nextra, .FALSE.)
    1021              :             ! 'grad_ctilde'
    1022           34 :             CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
    1023           34 :             DEALLOCATE (tmp_cmat)
    1024              :             ! ctrans_lambda = c_tilde_dagg*grad_ctilde
    1025           34 :             CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
    1026              :             !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
    1027              :             ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
    1028          102 :             CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
    1029              :          END IF ! nextra > 0
    1030              : 
    1031              :          ! tolerance
    1032           34 :          IF (nextra > 0) THEN
    1033              :             tolc = 0.0_dp
    1034           34 :             CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
    1035           34 :             CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
    1036           34 :             CALL cp_fm_maxabsval(tmp_fm, tolc)
    1037           34 :             CALL cp_fm_release(tmp_fm)
    1038              :             !WRITE(*,*) 'tolc = ', tolc
    1039           34 :             tol = tol + tolc
    1040              :          END IF
    1041              :          !WRITE(*,*) 'tol = ', tol
    1042              : 
    1043           36 :          IF (nextra > 0) THEN
    1044              :             !WRITE(*,*) 'new_direction: ', new_direction
    1045           34 :             IF (new_direction) THEN
    1046            6 :                line_searches = line_searches + 1
    1047            6 :                IF (mintol > tol) THEN
    1048            4 :                   mintol = tol
    1049            4 :                   miniter = iter
    1050              :                END IF
    1051              : 
    1052            6 :                IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
    1053            0 :                   sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
    1054            0 :                   avg_spread_ii = sum_spread_ii/nstate
    1055              :                   WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
    1056            0 :                      "Iteration", "Avg. Spread_ii", "Tolerance"
    1057              :                   WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
    1058            0 :                      iter, avg_spread_ii, tol
    1059            0 :                   CALL m_flush(unit_nr)
    1060              :                END IF
    1061            6 :                IF (tol < eps_localization) EXIT
    1062              : 
    1063            4 :                IF (do_cg) THEN
    1064              :                   cnorm2_Gct = czero
    1065              :                   cnorm2_Gct_cross = czero
    1066            4 :                   CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct_cross)
    1067            4 :                   norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
    1068           56 :                   Gct_old%local_data = grad_ctilde%local_data
    1069            4 :                   CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct)
    1070            4 :                   norm2_Gct = REAL(cnorm2_Gct, KIND=dp)
    1071              :                   ! compute beta_pr
    1072            4 :                   beta_pr = (norm2_Gct - norm2_Gct_cross)/norm2_old
    1073            4 :                   norm2_old = norm2_Gct
    1074            4 :                   beta = MAX(0.0_dp, beta_pr)
    1075              :                   !WRITE(*,*) 'beta = ', beta
    1076              :                   ! compute skc / ska = beta * skc / ska + grad_ctilde / G
    1077            4 :                   CALL cp_cfm_scale(beta, skc)
    1078            4 :                   CALL cp_cfm_scale_and_add(cone, skc, cone, Gct_old)
    1079            4 :                   CALL cp_cfm_trace(skc, Gct_old, cnorm2_Gct_cross)
    1080            4 :                   norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
    1081            4 :                   IF (norm2_Gct_cross <= 0.0_dp) THEN ! back to steepest ascent
    1082            0 :                      CALL cp_cfm_scale_and_add(czero, skc, cone, Gct_old)
    1083              :                   END IF
    1084              :                ELSE
    1085            0 :                   CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
    1086              :                END IF
    1087              :                line_search_count = 0
    1088              :             END IF
    1089              : 
    1090           32 :             line_search_count = line_search_count + 1
    1091              :             !WRITE(*,*) 'line_search_count = ', line_search_count
    1092           32 :             energy(line_search_count) = spread_sum
    1093              : 
    1094              :             ! gold line search
    1095           32 :             new_direction = .FALSE.
    1096           32 :             IF (line_search_count == 1) THEN
    1097            4 :                lsl = 1
    1098            4 :                lsr = 0
    1099            4 :                lsm = 1
    1100            4 :                pos(1) = 0.0_dp
    1101            4 :                pos(2) = ds_min/gold_sec
    1102            4 :                ds = pos(2)
    1103              :             ELSE
    1104           28 :                IF (line_search_count == 50) THEN
    1105            0 :                   IF (ABS(energy(line_search_count) - energy(line_search_count - 1)) < 1.0E-4_dp) THEN
    1106            0 :                      CPWARN("Line search failed to converge properly")
    1107            0 :                      ds_min = 0.1_dp
    1108            0 :                      new_direction = .TRUE.
    1109              :                      ds = pos(line_search_count)
    1110            0 :                      line_search_count = 0
    1111              :                   ELSE
    1112            0 :                      CPABORT("No. of line searches exceeds 50")
    1113              :                   END IF
    1114              :                ELSE
    1115           28 :                   IF (lsr == 0) THEN
    1116           28 :                      IF (energy(line_search_count - 1) > energy(line_search_count)) THEN
    1117            0 :                         lsr = line_search_count
    1118            0 :                         pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
    1119              :                      ELSE
    1120           28 :                         lsl = lsm
    1121           28 :                         lsm = line_search_count
    1122           28 :                         pos(line_search_count + 1) = pos(line_search_count)/gold_sec
    1123              :                      END IF
    1124              :                   ELSE
    1125            0 :                      IF (pos(line_search_count) < pos(lsm)) THEN
    1126            0 :                         IF (energy(line_search_count) > energy(lsm)) THEN
    1127              :                            lsr = lsm
    1128              :                            lsm = line_search_count
    1129              :                         ELSE
    1130            0 :                            lsl = line_search_count
    1131              :                         END IF
    1132              :                      ELSE
    1133            0 :                         IF (energy(line_search_count) > energy(lsm)) THEN
    1134              :                            lsl = lsm
    1135              :                            lsm = line_search_count
    1136              :                         ELSE
    1137            0 :                            lsr = line_search_count
    1138              :                         END IF
    1139              :                      END IF
    1140            0 :                      IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
    1141            0 :                         pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
    1142              :                      ELSE
    1143            0 :                         pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
    1144              :                      END IF
    1145            0 :                      IF ((pos(lsr) - pos(lsl)) < 1.0E-3_dp*pos(lsr)) THEN
    1146            0 :                         new_direction = .TRUE.
    1147              :                      END IF
    1148              :                   END IF ! lsr .eq. 0
    1149              :                END IF ! line_search_count .eq. 50
    1150              :                ! now go to the suggested point
    1151           28 :                ds = pos(line_search_count + 1) - pos(line_search_count)
    1152              :                !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
    1153           28 :                IF ((ABS(ds) < 1.0E-10_dp) .AND. (lsl == 1)) THEN
    1154            0 :                   new_direction = .TRUE.
    1155            0 :                   ds_min = 0.5_dp/alpha
    1156           28 :                ELSEIF (ABS(ds) > 10.0_dp) THEN
    1157            4 :                   new_direction = .TRUE.
    1158            4 :                   ds_min = 0.5_dp/alpha
    1159              :                ELSE
    1160              :                   ds_min = pos(line_search_count + 1)
    1161              :                END IF
    1162              :             END IF ! first step
    1163              :             ! 'c_tilde = c_tilde + d*skc'
    1164           32 :             CALL cp_cfm_scale(ds, skc)
    1165           32 :             CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
    1166              :          ELSE
    1167            0 :             IF (mintol > tol) THEN
    1168            0 :                mintol = tol
    1169            0 :                miniter = iter
    1170              :             END IF
    1171            0 :             IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
    1172            0 :                sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
    1173            0 :                avg_spread_ii = sum_spread_ii/nstate
    1174              :                WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
    1175            0 :                   "Iteration", "Avg. Spread_ii", "Tolerance"
    1176              :                WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
    1177            0 :                   iter, avg_spread_ii, tol
    1178            0 :                CALL m_flush(unit_nr)
    1179              :             END IF
    1180            0 :             IF (tol < eps_localization) EXIT
    1181              :          END IF ! nextra > 0
    1182              : 
    1183              :       END DO ! iteration
    1184              : 
    1185            2 :       IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
    1186            0 :          WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
    1187            0 :          WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
    1188            0 :          CALL m_flush(unit_nr)
    1189              :       END IF
    1190              : 
    1191            2 :       CALL cp_cfm_to_fm(U, matrix_U)
    1192              : 
    1193            2 :       IF (nextra > 0) THEN
    1194         2324 :          rmat%local_data = REAL(V%local_data, KIND=dp)
    1195            2 :          CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
    1196              : 
    1197            2 :          CALL cp_cfm_release(c_tilde)
    1198            2 :          CALL cp_cfm_release(grad_ctilde)
    1199            2 :          CALL cp_cfm_release(Gct_old)
    1200            2 :          CALL cp_cfm_release(skc)
    1201            2 :          CALL cp_cfm_release(UL)
    1202            2 :          CALL cp_cfm_release(zdiag)
    1203            2 :          CALL cp_cfm_release(ctrans_lambda)
    1204            2 :          CALL cp_fm_release(id_nextra)
    1205            2 :          CALL cp_fm_release(vectors_all)
    1206            2 :          CALL cp_cfm_release(V)
    1207            2 :          CALL cp_fm_release(matrix_V)
    1208            2 :          CALL cp_fm_release(matrix_V_all)
    1209            2 :          CALL cp_cfm_release(VL)
    1210            2 :          DEALLOCATE (arr_zii)
    1211              :       ELSE
    1212            0 :          rmat%local_data = matrix_U%local_data
    1213            0 :          CALL rotate_orbitals(rmat, vectors)
    1214              :       END IF
    1215            8 :       DO idim = 1, dim2
    1216            8 :          CALL cp_cfm_release(zij_0(idim))
    1217              :       END DO
    1218            2 :       DEALLOCATE (zij_0)
    1219              : 
    1220            8 :       DO idim = 1, dim2
    1221         5811 :          zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
    1222         5811 :          zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
    1223            8 :          CALL cp_cfm_release(c_zij(idim))
    1224              :       END DO
    1225            2 :       DEALLOCATE (c_zij)
    1226            2 :       CALL cp_fm_release(rmat)
    1227            2 :       CALL cp_cfm_release(U)
    1228            2 :       CALL cp_fm_release(matrix_U)
    1229            2 :       DEALLOCATE (matrix_zii, sum_spread)
    1230              : 
    1231            2 :       CALL timestop(handle)
    1232              : 
    1233           10 :    END SUBROUTINE jacobi_cg_edf_ls
    1234              : 
    1235              : ! **************************************************************************************************
    1236              : !> \brief ...
    1237              : !> \param vmatrix ...
    1238              : ! **************************************************************************************************
    1239          108 :    SUBROUTINE ortho_vectors(vmatrix)
    1240              : 
    1241              :       TYPE(cp_fm_type), INTENT(IN)                       :: vmatrix
    1242              : 
    1243              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'ortho_vectors'
    1244              : 
    1245              :       INTEGER                                            :: handle, n, ncol
    1246              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
    1247              :       TYPE(cp_fm_type)                                   :: overlap_vv
    1248              : 
    1249           36 :       CALL timeset(routineN, handle)
    1250              : 
    1251           36 :       NULLIFY (fm_struct_tmp)
    1252              : 
    1253           36 :       CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
    1254              : 
    1255              :       CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
    1256              :                                para_env=vmatrix%matrix_struct%para_env, &
    1257           36 :                                context=vmatrix%matrix_struct%context)
    1258           36 :       CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
    1259           36 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1260              : 
    1261           36 :       CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
    1262           36 :       CALL cp_fm_cholesky_decompose(overlap_vv)
    1263           36 :       CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.TRUE.)
    1264              : 
    1265           36 :       CALL cp_fm_release(overlap_vv)
    1266              : 
    1267           36 :       CALL timestop(handle)
    1268              : 
    1269           36 :    END SUBROUTINE ortho_vectors
    1270              : 
    1271              : ! **************************************************************************************************
    1272              : !> \brief ...
    1273              : !> \param istate ...
    1274              : !> \param jstate ...
    1275              : !> \param st ...
    1276              : !> \param ct ...
    1277              : !> \param zij ...
    1278              : ! **************************************************************************************************
    1279            0 :    SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
    1280              :       INTEGER, INTENT(IN)                                :: istate, jstate
    1281              :       REAL(KIND=dp), INTENT(IN)                          :: st, ct
    1282              :       TYPE(cp_cfm_type)                                  :: zij(:)
    1283              : 
    1284              :       INTEGER                                            :: id
    1285              : 
    1286              : ! Locals
    1287              : 
    1288            0 :       DO id = 1, SIZE(zij, 1)
    1289            0 :          CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
    1290            0 :          CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
    1291              :       END DO
    1292              : 
    1293            0 :    END SUBROUTINE rotate_zij
    1294              : ! **************************************************************************************************
    1295              : !> \brief ...
    1296              : !> \param istate ...
    1297              : !> \param jstate ...
    1298              : !> \param st ...
    1299              : !> \param ct ...
    1300              : !> \param rmat ...
    1301              : ! **************************************************************************************************
    1302            0 :    SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
    1303              :       INTEGER, INTENT(IN)                                :: istate, jstate
    1304              :       REAL(KIND=dp), INTENT(IN)                          :: st, ct
    1305              :       TYPE(cp_cfm_type), INTENT(IN)                      :: rmat
    1306              : 
    1307            0 :       CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
    1308              : 
    1309            0 :    END SUBROUTINE rotate_rmat
    1310              : ! **************************************************************************************************
    1311              : !> \brief ...
    1312              : !> \param mii ...
    1313              : !> \param mjj ...
    1314              : !> \param mij ...
    1315              : !> \param weights ...
    1316              : !> \param theta ...
    1317              : !> \param grad_ij ...
    1318              : !> \param step ...
    1319              : ! **************************************************************************************************
    1320      2324078 :    SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
    1321              :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mjj(:), mij(:)
    1322              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1323              :       REAL(KIND=dp), INTENT(OUT)                         :: theta
    1324              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: grad_ij, step
    1325              : 
    1326              :       COMPLEX(KIND=dp)                                   :: z11, z12, z22
    1327              :       INTEGER                                            :: dim_m, idim
    1328              :       REAL(KIND=dp)                                      :: a12, b12, d2, ratio
    1329              : 
    1330      2324078 :       a12 = 0.0_dp
    1331      2324078 :       b12 = 0.0_dp
    1332      2324078 :       dim_m = SIZE(mii)
    1333      9402806 :       DO idim = 1, dim_m
    1334      7078728 :          z11 = mii(idim)
    1335      7078728 :          z22 = mjj(idim)
    1336      7078728 :          z12 = mij(idim)
    1337      7078728 :          a12 = a12 + weights(idim)*REAL(CONJG(z12)*(z11 - z22), KIND=dp)
    1338              :          b12 = b12 + weights(idim)*REAL((z12*CONJG(z12) - &
    1339      9402806 :                                          0.25_dp*(z11 - z22)*(CONJG(z11) - CONJG(z22))), KIND=dp)
    1340              :       END DO
    1341      2324078 :       IF (ABS(b12) > 1.e-10_dp) THEN
    1342      2324078 :          ratio = -a12/b12
    1343      2324078 :          theta = 0.25_dp*ATAN(ratio)
    1344            0 :       ELSEIF (ABS(b12) < 1.e-10_dp) THEN
    1345            0 :          b12 = 0.0_dp
    1346            0 :          theta = 0.0_dp
    1347              :       ELSE
    1348            0 :          theta = 0.25_dp*pi
    1349              :       END IF
    1350      2324078 :       IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
    1351              : ! Check second derivative info
    1352      2324078 :       d2 = a12*SIN(4._dp*theta) - b12*COS(4._dp*theta)
    1353      2324078 :       IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
    1354         3240 :          IF (theta > 0.0_dp) THEN ! make theta as small as possible
    1355         1563 :             theta = theta - 0.25_dp*pi
    1356              :          ELSE
    1357         1677 :             theta = theta + 0.25_dp*pi
    1358              :          END IF
    1359              :       END IF
    1360      2324078 :    END SUBROUTINE get_angle
    1361              : ! **************************************************************************************************
    1362              : !> \brief ...
    1363              : !> \param zij ...
    1364              : !> \param weights ...
    1365              : !> \param tolerance ...
    1366              : !> \param grad ...
    1367              : ! **************************************************************************************************
    1368            0 :    SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
    1369              :       TYPE(cp_cfm_type)                                  :: zij(:)
    1370              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1371              :       REAL(KIND=dp), INTENT(OUT)                         :: tolerance
    1372              :       TYPE(cp_fm_type), INTENT(OUT), OPTIONAL            :: grad
    1373              : 
    1374              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'check_tolerance'
    1375              : 
    1376              :       INTEGER                                            :: handle
    1377              :       TYPE(cp_fm_type)                                   :: force
    1378              : 
    1379            0 :       CALL timeset(routineN, handle)
    1380              : 
    1381              : ! compute gradient at t=0
    1382              : 
    1383            0 :       CALL cp_fm_create(force, zij(1)%matrix_struct)
    1384            0 :       CALL cp_fm_set_all(force, 0._dp)
    1385            0 :       CALL grad_at_0(zij, weights, force)
    1386            0 :       CALL cp_fm_maxabsval(force, tolerance)
    1387            0 :       IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
    1388            0 :       CALL cp_fm_release(force)
    1389              : 
    1390            0 :       CALL timestop(handle)
    1391              : 
    1392            0 :    END SUBROUTINE check_tolerance
    1393              : ! **************************************************************************************************
    1394              : !> \brief ...
    1395              : !> \param rmat ...
    1396              : !> \param vectors ...
    1397              : ! **************************************************************************************************
    1398         1052 :    SUBROUTINE rotate_orbitals(rmat, vectors)
    1399              :       TYPE(cp_fm_type), INTENT(IN)                       :: rmat, vectors
    1400              : 
    1401              :       INTEGER                                            :: k, n
    1402              :       TYPE(cp_fm_type)                                   :: wf
    1403              : 
    1404          526 :       CALL cp_fm_create(wf, vectors%matrix_struct)
    1405          526 :       CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
    1406          526 :       CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
    1407          526 :       CALL cp_fm_to_fm(wf, vectors)
    1408          526 :       CALL cp_fm_release(wf)
    1409          526 :    END SUBROUTINE rotate_orbitals
    1410              : ! **************************************************************************************************
    1411              : !> \brief ...
    1412              : !> \param rmat ...
    1413              : !> \param vec_all ...
    1414              : !> \param vectors ...
    1415              : ! **************************************************************************************************
    1416            6 :    SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
    1417              :       TYPE(cp_fm_type), INTENT(IN)                       :: rmat, vec_all, vectors
    1418              : 
    1419              :       INTEGER                                            :: k, l, n
    1420              :       TYPE(cp_fm_type)                                   :: wf
    1421              : 
    1422            2 :       CALL cp_fm_create(wf, vectors%matrix_struct)
    1423            2 :       CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
    1424            2 :       CALL cp_fm_get_info(rmat, ncol_global=l)
    1425              : 
    1426            2 :       CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
    1427            2 :       CALL cp_fm_to_fm(wf, vectors)
    1428            2 :       CALL cp_fm_release(wf)
    1429            2 :    END SUBROUTINE rotate_orbitals_edf
    1430              : ! **************************************************************************************************
    1431              : !> \brief ...
    1432              : !> \param diag ...
    1433              : !> \param weights ...
    1434              : !> \param matrix ...
    1435              : !> \param ndim ...
    1436              : ! **************************************************************************************************
    1437          204 :    SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
    1438              :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
    1439              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1440              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1441              :       INTEGER, INTENT(IN)                                :: ndim
    1442              : 
    1443              :       COMPLEX(KIND=dp)                                   :: zii, zjj
    1444              :       INTEGER                                            :: idim, istate, jstate, ncol_local, &
    1445              :                                                             nrow_global, nrow_local
    1446          204 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1447              :       REAL(KIND=dp)                                      :: gradsq_ij
    1448              : 
    1449              :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
    1450              :                           ncol_local=ncol_local, nrow_global=nrow_global, &
    1451          204 :                           row_indices=row_indices, col_indices=col_indices)
    1452              : 
    1453          612 :       DO istate = 1, nrow_local
    1454         2244 :          DO jstate = 1, ncol_local
    1455              : ! get real and imaginary parts
    1456         1632 :             gradsq_ij = 0.0_dp
    1457        11424 :             DO idim = 1, ndim
    1458         9792 :                zii = diag(row_indices(istate), idim)
    1459         9792 :                zjj = diag(col_indices(jstate), idim)
    1460              :                gradsq_ij = gradsq_ij + weights(idim)* &
    1461        11424 :                            4.0_dp*REAL((CONJG(zii)*zii + CONJG(zjj)*zjj), KIND=dp)
    1462              :             END DO
    1463         2040 :             matrix%local_data(istate, jstate) = gradsq_ij
    1464              :          END DO
    1465              :       END DO
    1466          204 :    END SUBROUTINE gradsq_at_0
    1467              : ! **************************************************************************************************
    1468              : !> \brief ...
    1469              : !> \param matrix_p ...
    1470              : !> \param weights ...
    1471              : !> \param matrix ...
    1472              : ! **************************************************************************************************
    1473            0 :    SUBROUTINE grad_at_0(matrix_p, weights, matrix)
    1474              :       TYPE(cp_cfm_type)                                  :: matrix_p(:)
    1475              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1476              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1477              : 
    1478              :       COMPLEX(KIND=dp)                                   :: zii, zij, zjj
    1479            0 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
    1480              :       INTEGER                                            :: dim_m, idim, istate, jstate, ncol_local, &
    1481              :                                                             nrow_global, nrow_local
    1482            0 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1483              :       REAL(KIND=dp)                                      :: grad_ij
    1484              : 
    1485            0 :       NULLIFY (diag)
    1486              :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
    1487              :                           ncol_local=ncol_local, nrow_global=nrow_global, &
    1488            0 :                           row_indices=row_indices, col_indices=col_indices)
    1489            0 :       dim_m = SIZE(matrix_p, 1)
    1490            0 :       ALLOCATE (diag(nrow_global, dim_m))
    1491              : 
    1492            0 :       DO idim = 1, dim_m
    1493            0 :          DO istate = 1, nrow_global
    1494            0 :             CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
    1495              :          END DO
    1496              :       END DO
    1497              : 
    1498            0 :       DO istate = 1, nrow_local
    1499            0 :          DO jstate = 1, ncol_local
    1500              : ! get real and imaginary parts
    1501              :             grad_ij = 0.0_dp
    1502            0 :             DO idim = 1, dim_m
    1503            0 :                zii = diag(row_indices(istate), idim)
    1504            0 :                zjj = diag(col_indices(jstate), idim)
    1505            0 :                zij = matrix_p(idim)%local_data(istate, jstate)
    1506              :                grad_ij = grad_ij + weights(idim)* &
    1507            0 :                          REAL(4.0_dp*CONJG(zij)*(zjj - zii), dp)
    1508              :             END DO
    1509            0 :             matrix%local_data(istate, jstate) = grad_ij
    1510              :          END DO
    1511              :       END DO
    1512            0 :       DEALLOCATE (diag)
    1513            0 :    END SUBROUTINE grad_at_0
    1514              : 
    1515              : ! return energy and maximum gradient in the current point
    1516              : ! **************************************************************************************************
    1517              : !> \brief ...
    1518              : !> \param weights ...
    1519              : !> \param zij ...
    1520              : !> \param tolerance ...
    1521              : !> \param value ...
    1522              : ! **************************************************************************************************
    1523         4246 :    SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
    1524              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1525              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :)
    1526              :       REAL(KIND=dp)                                      :: tolerance, value
    1527              : 
    1528              :       COMPLEX(KIND=dp)                                   :: kii, kij, kjj
    1529         4246 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
    1530              :       INTEGER                                            :: idim, istate, jstate, ncol_local, &
    1531              :                                                             nrow_global, nrow_local
    1532         4246 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1533              :       REAL(KIND=dp)                                      :: grad_ij, ra, rb
    1534              : 
    1535         4246 :       NULLIFY (diag)
    1536              :       CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
    1537              :                           ncol_local=ncol_local, nrow_global=nrow_global, &
    1538         4246 :                           row_indices=row_indices, col_indices=col_indices)
    1539        16984 :       ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
    1540         4246 :       value = 0.0_dp
    1541        17098 :       DO idim = 1, SIZE(zij, 2)
    1542       191338 :          DO istate = 1, nrow_global
    1543       174240 :             CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
    1544       174240 :             CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
    1545       174240 :             diag(istate, idim) = CMPLX(ra, rb, dp)
    1546       187092 :             value = value + weights(idim) - weights(idim)*ABS(diag(istate, idim))**2
    1547              :          END DO
    1548              :       END DO
    1549         4246 :       tolerance = 0.0_dp
    1550        33191 :       DO istate = 1, nrow_local
    1551       496716 :          DO jstate = 1, ncol_local
    1552      1855525 :             grad_ij = 0.0_dp
    1553      1855525 :             DO idim = 1, SIZE(zij, 2)
    1554      1392000 :                kii = diag(row_indices(istate), idim)
    1555      1392000 :                kjj = diag(col_indices(jstate), idim)
    1556      1392000 :                ra = zij(1, idim)%local_data(istate, jstate)
    1557      1392000 :                rb = zij(2, idim)%local_data(istate, jstate)
    1558      1392000 :                kij = CMPLX(ra, rb, dp)
    1559              :                grad_ij = grad_ij + weights(idim)* &
    1560      1855525 :                          REAL(4.0_dp*CONJG(kij)*(kjj - kii), dp)
    1561              :             END DO
    1562       492470 :             tolerance = MAX(ABS(grad_ij), tolerance)
    1563              :          END DO
    1564              :       END DO
    1565         4246 :       CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
    1566              : 
    1567         4246 :       DEALLOCATE (diag)
    1568              : 
    1569         4246 :    END SUBROUTINE check_tolerance_new
    1570              : 
    1571              : ! **************************************************************************************************
    1572              : !> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
    1573              : !>        and rotates them all at the same time (hoping for the best of course)
    1574              : !> \param weights ...
    1575              : !> \param zij ...
    1576              : !> \param vectors ...
    1577              : !> \param max_iter ...
    1578              : !> \param max_crazy_angle ...
    1579              : !> \param crazy_scale ...
    1580              : !> \param crazy_use_diag ...
    1581              : !> \param eps_localization ...
    1582              : !> \param iterations ...
    1583              : !> \param converged ...
    1584              : ! **************************************************************************************************
    1585          100 :    SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
    1586              :                               eps_localization, iterations, converged)
    1587              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1588              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
    1589              :       INTEGER, INTENT(IN)                                :: max_iter
    1590              :       REAL(KIND=dp), INTENT(IN)                          :: max_crazy_angle
    1591              :       REAL(KIND=dp)                                      :: crazy_scale
    1592              :       LOGICAL                                            :: crazy_use_diag
    1593              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
    1594              :       INTEGER                                            :: iterations
    1595              :       LOGICAL, INTENT(out), OPTIONAL                     :: converged
    1596              : 
    1597              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'crazy_rotations'
    1598              :       COMPLEX(KIND=dp), PARAMETER                        :: cone = (1.0_dp, 0.0_dp), &
    1599              :                                                             czero = (0.0_dp, 0.0_dp)
    1600              : 
    1601              :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: evals_exp
    1602          100 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag_z
    1603              :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
    1604              :       INTEGER                                            :: dim2, handle, i, icol, idim, irow, &
    1605              :                                                             method, ncol_global, ncol_local, &
    1606              :                                                             norder, nrow_global, nrow_local, &
    1607              :                                                             nsquare, unit_nr
    1608          100 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1609              :       LOGICAL                                            :: do_emd
    1610              :       REAL(KIND=dp)                                      :: eps_exp, limit_crazy_angle, maxeval, &
    1611              :                                                             norm, ra, rb, theta, tolerance, value
    1612              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: evals
    1613              :       TYPE(cp_cfm_type)                                  :: cmat_A, cmat_R, cmat_t1
    1614              :       TYPE(cp_fm_type)                                   :: mat_R, mat_t, mat_theta, mat_U
    1615              : 
    1616          100 :       CALL timeset(routineN, handle)
    1617          100 :       NULLIFY (row_indices, col_indices)
    1618              :       CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
    1619              :                           ncol_global=ncol_global, &
    1620              :                           row_indices=row_indices, col_indices=col_indices, &
    1621          100 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1622              : 
    1623          100 :       limit_crazy_angle = max_crazy_angle
    1624              : 
    1625          100 :       NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
    1626          100 :       dim2 = SIZE(zij, 2)
    1627          400 :       ALLOCATE (diag_z(nrow_global, dim2))
    1628          300 :       ALLOCATE (evals(nrow_global))
    1629          300 :       ALLOCATE (evals_exp(nrow_global))
    1630              : 
    1631          100 :       CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
    1632          100 :       CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
    1633          100 :       CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
    1634              : 
    1635          100 :       CALL cp_fm_create(mat_U, zij(1, 1)%matrix_struct)
    1636          100 :       CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
    1637          100 :       CALL cp_fm_create(mat_R, zij(1, 1)%matrix_struct)
    1638              : 
    1639          100 :       CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
    1640              : 
    1641          100 :       CALL cp_fm_set_all(mat_R, 0.0_dp, 1.0_dp)
    1642          100 :       CALL cp_fm_set_all(mat_t, 0.0_dp)
    1643          500 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
    1644          406 :       DO idim = 1, dim2
    1645          306 :          CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
    1646          406 :          CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
    1647              :       END DO
    1648          100 :       CALL cp_fm_syevd(mat_t, mat_U, evals)
    1649          406 :       DO idim = 1, dim2
    1650              :          ! rotate z's
    1651          306 :          CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
    1652          306 :          CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
    1653          306 :          CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
    1654          406 :          CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
    1655              :       END DO
    1656              :       ! collect rotation matrix
    1657          100 :       CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
    1658          100 :       CALL cp_fm_to_fm(mat_t, mat_R)
    1659              : 
    1660          100 :       unit_nr = -1
    1661          100 :       IF (cmat_A%matrix_struct%para_env%is_source()) THEN
    1662           50 :          unit_nr = cp_logger_get_default_unit_nr()
    1663              :          WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
    1664           50 :             "CRAZY| ", "Iter", "value    ", "gradient", "Max. eval", "limit"
    1665              :       END IF
    1666              : 
    1667          100 :       iterations = 0
    1668          100 :       tolerance = 1.0_dp
    1669              : 
    1670              :       DO
    1671         4246 :          iterations = iterations + 1
    1672        17098 :          DO idim = 1, dim2
    1673       191338 :             DO i = 1, nrow_global
    1674       174240 :                CALL cp_fm_get_element(zij(1, idim), i, i, ra)
    1675       174240 :                CALL cp_fm_get_element(zij(2, idim), i, i, rb)
    1676       187092 :                diag_z(i, idim) = CMPLX(ra, rb, dp)
    1677              :             END DO
    1678              :          END DO
    1679        33191 :          DO irow = 1, nrow_local
    1680       496716 :             DO icol = 1, ncol_local
    1681      1855525 :                DO idim = 1, dim2
    1682      1392000 :                   ra = zij(1, idim)%local_data(irow, icol)
    1683      1392000 :                   rb = zij(2, idim)%local_data(irow, icol)
    1684      1392000 :                   mij(idim) = CMPLX(ra, rb, dp)
    1685      1392000 :                   mii(idim) = diag_z(row_indices(irow), idim)
    1686      1855525 :                   mjj(idim) = diag_z(col_indices(icol), idim)
    1687              :                END DO
    1688       492470 :                IF (row_indices(irow) /= col_indices(icol)) THEN
    1689       434580 :                   CALL get_angle(mii, mjj, mij, weights, theta)
    1690       434580 :                   theta = crazy_scale*theta
    1691       434580 :                   IF (theta > limit_crazy_angle) theta = limit_crazy_angle
    1692       434580 :                   IF (theta < -limit_crazy_angle) theta = -limit_crazy_angle
    1693       434580 :                   IF (crazy_use_diag) THEN
    1694            0 :                      cmat_A%local_data(irow, icol) = -CMPLX(0.0_dp, theta, dp)
    1695              :                   ELSE
    1696       434580 :                      mat_theta%local_data(irow, icol) = -theta
    1697              :                   END IF
    1698              :                ELSE
    1699        28945 :                   IF (crazy_use_diag) THEN
    1700            0 :                      cmat_A%local_data(irow, icol) = czero
    1701              :                   ELSE
    1702        28945 :                      mat_theta%local_data(irow, icol) = 0.0_dp
    1703              :                   END IF
    1704              :                END IF
    1705              :             END DO
    1706              :          END DO
    1707              : 
    1708              :          ! construct rotation matrix U based on A using diagonalization
    1709              :          ! alternatively, exp based on repeated squaring could be faster
    1710         4246 :          IF (crazy_use_diag) THEN
    1711            0 :             CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
    1712            0 :             maxeval = MAXVAL(ABS(evals))
    1713            0 :             evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
    1714            0 :             CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
    1715            0 :             CALL cp_cfm_column_scale(cmat_t1, evals_exp)
    1716              :             CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
    1717            0 :                                cmat_t1, cmat_R, czero, cmat_A)
    1718            0 :             mat_U%local_data = REAL(cmat_A%local_data, KIND=dp) ! U is a real matrix
    1719              :          ELSE
    1720         4246 :             do_emd = .FALSE.
    1721         4246 :             method = 2
    1722         4246 :             eps_exp = 1.0_dp*EPSILON(eps_exp)
    1723         4246 :             CALL cp_fm_maxabsrownorm(mat_theta, norm)
    1724         4246 :             maxeval = norm ! an upper bound
    1725         4246 :             CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
    1726         4246 :             CALL exp_pade_real(mat_U, mat_theta, nsquare, norder)
    1727              :          END IF
    1728              : 
    1729        17098 :          DO idim = 1, dim2
    1730              :             ! rotate z's
    1731        12852 :             CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
    1732        12852 :             CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
    1733        12852 :             CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
    1734        17098 :             CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
    1735              :          END DO
    1736              :          ! collect rotation matrix
    1737         4246 :          CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
    1738         4246 :          CALL cp_fm_to_fm(mat_t, mat_R)
    1739              : 
    1740         4246 :          CALL check_tolerance_new(weights, zij, tolerance, value)
    1741              : 
    1742         4246 :          IF (unit_nr > 0) THEN
    1743              :             WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
    1744         2123 :                "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
    1745         2123 :             CALL m_flush(unit_nr)
    1746              :          END IF
    1747         4246 :          IF (tolerance < eps_localization .OR. iterations >= max_iter) EXIT
    1748              :       END DO
    1749              : 
    1750          100 :       IF (PRESENT(converged)) converged = (tolerance < eps_localization)
    1751              : 
    1752          100 :       CALL cp_cfm_release(cmat_A)
    1753          100 :       CALL cp_cfm_release(cmat_R)
    1754          100 :       CALL cp_cfm_release(cmat_T1)
    1755              : 
    1756          100 :       CALL cp_fm_release(mat_U)
    1757          100 :       CALL cp_fm_release(mat_T)
    1758          100 :       CALL cp_fm_release(mat_theta)
    1759              : 
    1760          100 :       CALL rotate_orbitals(mat_R, vectors)
    1761              : 
    1762          100 :       CALL cp_fm_release(mat_R)
    1763          100 :       DEALLOCATE (evals_exp, evals, diag_z)
    1764          100 :       DEALLOCATE (mii, mij, mjj)
    1765              : 
    1766          100 :       CALL timestop(handle)
    1767              : 
    1768          200 :    END SUBROUTINE crazy_rotations
    1769              : 
    1770              : ! **************************************************************************************************
    1771              : !> \brief use the exponential parametrization as described in to perform a direct mini
    1772              : !>        Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
    1773              : !> none of the input is modified for the time being, just finds the rotations
    1774              : !> that minimizes, and throws it away afterwards :-)
    1775              : !> apart from being expensive and not cleaned, this works fine
    1776              : !> useful to try different spread functionals
    1777              : !> \param weights ...
    1778              : !> \param zij ...
    1779              : !> \param vectors ...
    1780              : !> \param max_iter ...
    1781              : !> \param eps_localization ...
    1782              : !> \param iterations ...
    1783              : ! **************************************************************************************************
    1784            2 :    SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
    1785              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1786              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
    1787              :       INTEGER, INTENT(IN)                                :: max_iter
    1788              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
    1789              :       INTEGER                                            :: iterations
    1790              : 
    1791              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'direct_mini'
    1792              :       COMPLEX(KIND=dp), PARAMETER                        :: cone = (1.0_dp, 0.0_dp), &
    1793              :                                                             czero = (0.0_dp, 0.0_dp)
    1794              :       REAL(KIND=dp), PARAMETER                           :: gold_sec = 0.3819_dp
    1795              : 
    1796              :       COMPLEX(KIND=dp)                                   :: lk, ll, tmp
    1797            2 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: evals_exp
    1798            2 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag_z
    1799              :       INTEGER                                            :: handle, i, icol, idim, irow, &
    1800              :                                                             line_search_count, line_searches, lsl, &
    1801              :                                                             lsm, lsr, n, ncol_local, ndim, &
    1802              :                                                             nrow_local, output_unit
    1803            2 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1804              :       LOGICAL                                            :: new_direction
    1805              :       REAL(KIND=dp)                                      :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
    1806              :                                                             fb, fc, nom, normg, normg_cross, &
    1807              :                                                             normg_old, npos, omega, tol, val, x0, &
    1808              :                                                             x1, xa, xb, xc
    1809              :       REAL(KIND=dp), DIMENSION(150)                      :: energy, grad, pos
    1810            2 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: evals, fval, fvald
    1811              :       TYPE(cp_cfm_type)                                  :: cmat_A, cmat_B, cmat_M, cmat_R, cmat_t1, &
    1812              :                                                             cmat_t2, cmat_U
    1813            2 :       TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:)       :: c_zij
    1814              :       TYPE(cp_fm_type)                                   :: matrix_A, matrix_G, matrix_G_old, &
    1815              :                                                             matrix_G_search, matrix_H, matrix_R, &
    1816              :                                                             matrix_T
    1817              : 
    1818            2 :       NULLIFY (evals, evals_exp, diag_z, fval, fvald)
    1819              : 
    1820            2 :       CALL timeset(routineN, handle)
    1821            2 :       output_unit = cp_logger_get_default_io_unit()
    1822              : 
    1823            2 :       n = zij(1, 1)%matrix_struct%nrow_global
    1824            2 :       ndim = (SIZE(zij, 2))
    1825              : 
    1826            2 :       IF (output_unit > 0) THEN
    1827            1 :          WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
    1828            1 :          WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
    1829              :       END IF
    1830              : 
    1831           20 :       ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
    1832           18 :       ALLOCATE (c_zij(ndim))
    1833              : 
    1834              :       ! create the three complex matrices Z
    1835           14 :       DO idim = 1, ndim
    1836           12 :          CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
    1837              :          c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
    1838          158 :                                         zij(2, idim)%local_data, dp)
    1839              :       END DO
    1840              : 
    1841            2 :       CALL cp_fm_create(matrix_A, zij(1, 1)%matrix_struct)
    1842            2 :       CALL cp_fm_create(matrix_G, zij(1, 1)%matrix_struct)
    1843            2 :       CALL cp_fm_create(matrix_T, zij(1, 1)%matrix_struct)
    1844            2 :       CALL cp_fm_create(matrix_H, zij(1, 1)%matrix_struct)
    1845            2 :       CALL cp_fm_create(matrix_G_search, zij(1, 1)%matrix_struct)
    1846            2 :       CALL cp_fm_create(matrix_G_old, zij(1, 1)%matrix_struct)
    1847            2 :       CALL cp_fm_create(matrix_R, zij(1, 1)%matrix_struct)
    1848            2 :       CALL cp_fm_set_all(matrix_R, 0.0_dp, 1.0_dp)
    1849              : 
    1850            2 :       CALL cp_fm_set_all(matrix_A, 0.0_dp)
    1851              : !    CALL cp_fm_init_random ( matrix_A )
    1852              : 
    1853            2 :       CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
    1854            2 :       CALL cp_cfm_create(cmat_U, zij(1, 1)%matrix_struct)
    1855            2 :       CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
    1856            2 :       CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
    1857            2 :       CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
    1858            2 :       CALL cp_cfm_create(cmat_B, zij(1, 1)%matrix_struct)
    1859            2 :       CALL cp_cfm_create(cmat_M, zij(1, 1)%matrix_struct)
    1860              : 
    1861              :       CALL cp_cfm_get_info(cmat_B, nrow_local=nrow_local, ncol_local=ncol_local, &
    1862            2 :                            row_indices=row_indices, col_indices=col_indices)
    1863              : 
    1864            2 :       CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
    1865            2 :       CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
    1866            2 :       normg_old = 1.0E30_dp
    1867            2 :       ds_min = 1.0_dp
    1868            2 :       new_direction = .TRUE.
    1869            2 :       Iterations = 0
    1870            2 :       line_searches = 0
    1871            2 :       line_search_count = 0
    1872          204 :       DO
    1873          204 :          iterations = iterations + 1
    1874              :          ! compute U,R,evals given A
    1875         2652 :          cmat_A%local_data = CMPLX(0.0_dp, matrix_A%local_data, dp) ! cmat_A is hermitian, evals are reals
    1876          204 :          CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
    1877         1020 :          evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
    1878          204 :          CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
    1879          204 :          CALL cp_cfm_column_scale(cmat_t1, evals_exp)
    1880          204 :          CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_U)
    1881         2652 :          cmat_U%local_data = REAL(cmat_U%local_data, KIND=dp) ! enforce numerics, U is a real matrix
    1882              : 
    1883          204 :          IF (new_direction .AND. MOD(line_searches, 20) == 5) THEN ! reset with A .eq. 0
    1884            0 :             DO idim = 1, ndim
    1885            0 :                CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1)
    1886            0 :                CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, c_zij(idim))
    1887              :             END DO
    1888              :             ! collect rotation matrix
    1889            0 :             matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
    1890            0 :             CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
    1891            0 :             CALL cp_fm_to_fm(matrix_T, matrix_R)
    1892              : 
    1893            0 :             CALL cp_cfm_set_all(cmat_U, czero, cone)
    1894            0 :             CALL cp_cfm_set_all(cmat_R, czero, cone)
    1895            0 :             CALL cp_cfm_set_all(cmat_A, czero)
    1896            0 :             CALL cp_fm_set_all(matrix_A, 0.0_dp)
    1897            0 :             evals(:) = 0.0_dp
    1898            0 :             evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
    1899            0 :             CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
    1900            0 :             CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
    1901            0 :             normg_old = 1.0E30_dp
    1902              :          END IF
    1903              : 
    1904              :          ! compute Omega and M
    1905          204 :          CALL cp_cfm_set_all(cmat_M, czero)
    1906          204 :          omega = 0.0_dp
    1907         1428 :          DO idim = 1, ndim
    1908         1224 :             CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1) ! t1=ZU
    1909         1224 :             CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
    1910         6120 :             DO i = 1, n
    1911         4896 :                CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
    1912              :                SELECT CASE (2) ! allows for selection of different spread functionals
    1913              :                CASE (1)
    1914              :                   fval(i) = -weights(idim)*LOG(ABS(diag_z(i, idim))**2)
    1915              :                   fvald(i) = -weights(idim)/(ABS(diag_z(i, idim))**2)
    1916              :                CASE (2) ! corresponds to the jacobi setup
    1917         4896 :                   fval(i) = weights(idim) - weights(idim)*ABS(diag_z(i, idim))**2
    1918         4896 :                   fvald(i) = -weights(idim)
    1919              :                END SELECT
    1920         6120 :                omega = omega + fval(i)
    1921              :             END DO
    1922         6324 :             DO icol = 1, ncol_local
    1923        15912 :                DO irow = 1, nrow_local
    1924         9792 :                   tmp = cmat_t1%local_data(irow, icol)*CONJG(diag_z(col_indices(icol), idim))
    1925              :                   cmat_M%local_data(irow, icol) = cmat_M%local_data(irow, icol) &
    1926        14688 :                                                   + 4.0_dp*fvald(col_indices(icol))*REAL(tmp, KIND=dp)
    1927              :                END DO
    1928              :             END DO
    1929              :          END DO
    1930              : 
    1931              :          ! compute Hessian diagonal approximation for the preconditioner
    1932              :          IF (.TRUE.) THEN
    1933          204 :             CALL gradsq_at_0(diag_z, weights, matrix_H, ndim)
    1934              :          ELSE
    1935              :             CALL cp_fm_set_all(matrix_H, 1.0_dp)
    1936              :          END IF
    1937              : 
    1938              :          ! compute B
    1939         1020 :          DO icol = 1, ncol_local
    1940         2652 :             DO irow = 1, nrow_local
    1941         1632 :                ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
    1942         1632 :                lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
    1943         2448 :                IF (ABS(ll - lk) < 0.5_dp) THEN ! use a series expansion to avoid loss of precision
    1944          622 :                   tmp = 1.0_dp
    1945          622 :                   cmat_B%local_data(irow, icol) = 0.0_dp
    1946        10574 :                   DO i = 1, 16
    1947         9952 :                      cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol) + tmp
    1948        10574 :                      tmp = tmp*(ll - lk)/(i + 1)
    1949              :                   END DO
    1950          622 :                   cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol)*EXP(lk)
    1951              :                ELSE
    1952         1010 :                   cmat_B%local_data(irow, icol) = (EXP(lk) - EXP(ll))/(lk - ll)
    1953              :                END IF
    1954              :             END DO
    1955              :          END DO
    1956              :          ! compute gradient matrix_G
    1957              : 
    1958          204 :          CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_M, cmat_R, czero, cmat_t1) ! t1=(M^T)(R^T)
    1959          204 :          CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_R, cmat_t1, czero, cmat_t2) ! t2=(R)t1
    1960          204 :          CALL cp_cfm_schur_product(cmat_t2, cmat_B, cmat_t1)
    1961          204 :          CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_t2)
    1962          204 :          CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_R, cmat_t2, czero, cmat_t1)
    1963         2652 :          matrix_G%local_data = REAL(cmat_t1%local_data, KIND=dp)
    1964          204 :          CALL cp_fm_transpose(matrix_G, matrix_T)
    1965          204 :          CALL cp_fm_scale_and_add(-1.0_dp, matrix_G, 1.0_dp, matrix_T)
    1966          204 :          CALL cp_fm_maxabsval(matrix_G, tol)
    1967              : 
    1968              :          ! from here on, minimizing technology
    1969          204 :          IF (new_direction) THEN
    1970              :             ! energy converged up to machine precision ?
    1971           10 :             line_searches = line_searches + 1
    1972              :             ! DO i=1,line_search_count
    1973              :             !   write(15,*) pos(i),energy(i)
    1974              :             ! ENDDO
    1975              :             ! write(15,*) ""
    1976              :             ! CALL m_flush(15)
    1977              :             !write(16,*) evals(:)
    1978              :             !write(17,*) matrix_A%local_data(:,:)
    1979              :             !write(18,*) matrix_G%local_data(:,:)
    1980           10 :             IF (output_unit > 0) THEN
    1981            5 :                WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, Iterations, Omega, tol, ds_min
    1982            5 :                CALL m_flush(output_unit)
    1983              :             END IF
    1984           10 :             IF (tol < eps_localization .OR. iterations > max_iter) EXIT
    1985              : 
    1986              :             IF (.TRUE.) THEN ! do conjugate gradient CG
    1987            8 :                CALL cp_fm_trace(matrix_G, matrix_G_old, normg_cross)
    1988            8 :                normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
    1989              :                ! apply the preconditioner
    1990           40 :                DO icol = 1, ncol_local
    1991          104 :                   DO irow = 1, nrow_local
    1992           96 :                      matrix_G_old%local_data(irow, icol) = matrix_G%local_data(irow, icol)/matrix_H%local_data(irow, icol)
    1993              :                   END DO
    1994              :                END DO
    1995            8 :                CALL cp_fm_trace(matrix_G, matrix_G_old, normg)
    1996            8 :                normg = normg*0.5_dp
    1997            8 :                beta_pr = (normg - normg_cross)/normg_old
    1998            8 :                normg_old = normg
    1999            8 :                beta_pr = MAX(beta_pr, 0.0_dp)
    2000            8 :                CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
    2001            8 :                CALL cp_fm_trace(matrix_G_search, matrix_G_old, normg_cross)
    2002            8 :                IF (normg_cross >= 0) THEN ! back to SD
    2003            0 :                   IF (matrix_A%matrix_struct%para_env%is_source()) THEN
    2004            0 :                      WRITE (cp_logger_get_default_unit_nr(), *) "!"
    2005              :                   END IF
    2006            0 :                   beta_pr = 0.0_dp
    2007            0 :                   CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
    2008              :                END IF
    2009              :             ELSE ! SD
    2010              :                CALL cp_fm_scale_and_add(0.0_dp, matrix_G_search, -1.0_dp, matrix_G)
    2011              :             END IF
    2012              :             ! ds_min=1.0E-4_dp
    2013              :             line_search_count = 0
    2014              :          END IF
    2015          202 :          line_search_count = line_search_count + 1
    2016          202 :          energy(line_search_count) = Omega
    2017              : 
    2018              :          ! line search section
    2019              :          SELECT CASE (3)
    2020              :          CASE (1) ! two point line search
    2021              :             SELECT CASE (line_search_count)
    2022              :             CASE (1)
    2023              :                pos(1) = 0.0_dp
    2024              :                pos(2) = ds_min
    2025              :                CALL cp_fm_trace(matrix_G, matrix_G_search, grad(1))
    2026              :                grad(1) = grad(1)/2.0_dp
    2027              :                new_direction = .FALSE.
    2028              :             CASE (2)
    2029              :                new_direction = .TRUE.
    2030              :                x0 = pos(1) ! 0.0_dp
    2031              :                c = energy(1)
    2032              :                b = grad(1)
    2033              :                x1 = pos(2)
    2034              :                a = (energy(2) - b*x1 - c)/(x1**2)
    2035              :                IF (a <= 0.0_dp) a = 1.0E-15_dp
    2036              :                npos = -b/(2.0_dp*a)
    2037              :                val = a*npos**2 + b*npos + c
    2038              :                IF (val < energy(1) .AND. val <= energy(2)) THEN
    2039              :                   ! we go to a minimum, but ...
    2040              :                   ! we take a guard against too large steps
    2041              :                   pos(3) = MIN(npos, MAXVAL(pos(1:2))*4.0_dp)
    2042              :                ELSE ! just take an extended step
    2043              :                   pos(3) = MAXVAL(pos(1:2))*2.0_dp
    2044              :                END IF
    2045              :             END SELECT
    2046              :          CASE (2) ! 3 point line search
    2047              :             SELECT CASE (line_search_count)
    2048              :             CASE (1)
    2049              :                new_direction = .FALSE.
    2050              :                pos(1) = 0.0_dp
    2051              :                pos(2) = ds_min*0.8_dp
    2052              :             CASE (2)
    2053              :                new_direction = .FALSE.
    2054              :                IF (energy(2) > energy(1)) THEN
    2055              :                   pos(3) = ds_min*0.7_dp
    2056              :                ELSE
    2057              :                   pos(3) = ds_min*1.4_dp
    2058              :                END IF
    2059              :             CASE (3)
    2060              :                new_direction = .TRUE.
    2061              :                xa = pos(1)
    2062              :                xb = pos(2)
    2063              :                xc = pos(3)
    2064              :                fa = energy(1)
    2065              :                fb = energy(2)
    2066              :                fc = energy(3)
    2067              :                nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
    2068              :                denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
    2069              :                IF (ABS(denom) <= 1.0E-18_dp*MAX(ABS(fb - fc), ABS(fb - fa))) THEN
    2070              :                   npos = xb
    2071              :                ELSE
    2072              :                   npos = xb - 0.5_dp*nom/denom ! position of the stationary point
    2073              :                END IF
    2074              :                val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
    2075              :                      (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
    2076              :                      (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
    2077              :                IF (val < fa .AND. val <= fb .AND. val <= fc) THEN ! OK, we go to a minimum
    2078              :                   ! we take a guard against too large steps
    2079              :                   pos(4) = MAX(MAXVAL(pos(1:3))*0.01_dp, &
    2080              :                                MIN(npos, MAXVAL(pos(1:3))*4.0_dp))
    2081              :                ELSE ! just take an extended step
    2082              :                   pos(4) = MAXVAL(pos(1:3))*2.0_dp
    2083              :                END IF
    2084              :             END SELECT
    2085              :          CASE (3) ! golden section hunt
    2086          202 :             new_direction = .FALSE.
    2087          202 :             IF (line_search_count == 1) THEN
    2088            8 :                lsl = 1
    2089            8 :                lsr = 0
    2090            8 :                lsm = 1
    2091            8 :                pos(1) = 0.0_dp
    2092            8 :                pos(2) = ds_min/gold_sec
    2093              :             ELSE
    2094          194 :                IF (line_search_count == 150) CPABORT("Too many")
    2095          194 :                IF (lsr == 0) THEN
    2096           14 :                   IF (energy(line_search_count - 1) < energy(line_search_count)) THEN
    2097            8 :                      lsr = line_search_count
    2098            8 :                      pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
    2099              :                   ELSE
    2100            6 :                      lsl = lsm
    2101            6 :                      lsm = line_search_count
    2102            6 :                      pos(line_search_count + 1) = pos(line_search_count)/gold_sec
    2103              :                   END IF
    2104              :                ELSE
    2105          180 :                   IF (pos(line_search_count) < pos(lsm)) THEN
    2106           92 :                      IF (energy(line_search_count) < energy(lsm)) THEN
    2107              :                         lsr = lsm
    2108              :                         lsm = line_search_count
    2109              :                      ELSE
    2110           76 :                         lsl = line_search_count
    2111              :                      END IF
    2112              :                   ELSE
    2113           88 :                      IF (energy(line_search_count) < energy(lsm)) THEN
    2114              :                         lsl = lsm
    2115              :                         lsm = line_search_count
    2116              :                      ELSE
    2117           60 :                         lsr = line_search_count
    2118              :                      END IF
    2119              :                   END IF
    2120          180 :                   IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
    2121           86 :                      pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
    2122              :                   ELSE
    2123           94 :                      pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
    2124              :                   END IF
    2125          180 :                   IF ((pos(lsr) - pos(lsl)) < 1.0E-3_dp*pos(lsr)) THEN
    2126            8 :                      new_direction = .TRUE.
    2127              :                   END IF
    2128              :                END IF ! lsr .eq. 0
    2129              :             END IF ! first step
    2130              :          END SELECT
    2131              :          ! now go to the suggested point
    2132          202 :          ds_min = pos(line_search_count + 1)
    2133          202 :          ds = pos(line_search_count + 1) - pos(line_search_count)
    2134          204 :          CALL cp_fm_scale_and_add(1.0_dp, matrix_A, ds, matrix_G_search)
    2135              :       END DO
    2136              : 
    2137              :       ! collect rotation matrix
    2138           26 :       matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
    2139            2 :       CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
    2140            2 :       CALL cp_fm_to_fm(matrix_T, matrix_R)
    2141            2 :       CALL rotate_orbitals(matrix_R, vectors)
    2142            2 :       CALL cp_fm_release(matrix_R)
    2143              : 
    2144            2 :       CALL cp_fm_release(matrix_A)
    2145            2 :       CALL cp_fm_release(matrix_G)
    2146            2 :       CALL cp_fm_release(matrix_H)
    2147            2 :       CALL cp_fm_release(matrix_T)
    2148            2 :       CALL cp_fm_release(matrix_G_search)
    2149            2 :       CALL cp_fm_release(matrix_G_old)
    2150            2 :       CALL cp_cfm_release(cmat_A)
    2151            2 :       CALL cp_cfm_release(cmat_U)
    2152            2 :       CALL cp_cfm_release(cmat_R)
    2153            2 :       CALL cp_cfm_release(cmat_t1)
    2154            2 :       CALL cp_cfm_release(cmat_t2)
    2155            2 :       CALL cp_cfm_release(cmat_B)
    2156            2 :       CALL cp_cfm_release(cmat_M)
    2157              : 
    2158            2 :       DEALLOCATE (evals, evals_exp, fval, fvald)
    2159              : 
    2160           14 :       DO idim = 1, SIZE(c_zij)
    2161          156 :          zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
    2162          156 :          zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
    2163           14 :          CALL cp_cfm_release(c_zij(idim))
    2164              :       END DO
    2165            2 :       DEALLOCATE (c_zij)
    2166            2 :       DEALLOCATE (diag_z)
    2167              : 
    2168            2 :       CALL timestop(handle)
    2169              : 
    2170            6 :    END SUBROUTINE direct_mini
    2171              : 
    2172              : ! **************************************************************************************************
    2173              : !> \brief Parallel algorithm for jacobi rotations
    2174              : !> \param weights ...
    2175              : !> \param zij ...
    2176              : !> \param vectors ...
    2177              : !> \param para_env ...
    2178              : !> \param max_iter ...
    2179              : !> \param eps_localization ...
    2180              : !> \param sweeps ...
    2181              : !> \param out_each ...
    2182              : !> \param target_time ...
    2183              : !> \param start_time ...
    2184              : !> \param restricted ...
    2185              : !> \par History
    2186              : !>      use allgather for improved performance
    2187              : !> \author MI (11.2009)
    2188              : ! **************************************************************************************************
    2189          390 :    SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
    2190              :                               sweeps, out_each, target_time, start_time, restricted)
    2191              : 
    2192              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    2193              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
    2194              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2195              :       INTEGER, INTENT(IN)                                :: max_iter
    2196              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
    2197              :       INTEGER                                            :: sweeps
    2198              :       INTEGER, INTENT(IN)                                :: out_each
    2199              :       REAL(dp)                                           :: target_time, start_time
    2200              :       INTEGER                                            :: restricted
    2201              : 
    2202              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'jacobi_rot_para'
    2203              : 
    2204              :       INTEGER                                            :: dim2, handle, i, idim, ii, ilow1, ip, j, &
    2205              :                                                             nblock, nblock_max, ns_me, nstate, &
    2206              :                                                             output_unit
    2207          390 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: ns_bound
    2208          390 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: rotmat, z_ij_loc_im, z_ij_loc_re
    2209              :       REAL(KIND=dp)                                      :: xstate
    2210              :       TYPE(cp_fm_type)                                   :: rmat
    2211          390 :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: cz_ij_loc
    2212              : 
    2213          390 :       CALL timeset(routineN, handle)
    2214              : 
    2215          390 :       output_unit = cp_logger_get_default_io_unit()
    2216              : 
    2217          390 :       NULLIFY (cz_ij_loc)
    2218              : 
    2219          390 :       dim2 = SIZE(zij, 2)
    2220              : 
    2221          390 :       CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
    2222          390 :       CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
    2223              : 
    2224          390 :       CALL cp_fm_get_info(rmat, nrow_global=nstate)
    2225              : 
    2226          390 :       IF (restricted > 0) THEN
    2227            0 :          IF (output_unit > 0) THEN
    2228            0 :             WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
    2229              :          END IF
    2230            0 :          nstate = nstate - restricted
    2231              :       END IF
    2232              : 
    2233              :       ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
    2234          390 :       xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
    2235         1560 :       ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
    2236         1170 :       DO ip = 1, para_env%num_pe
    2237          780 :          ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
    2238         1170 :          ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
    2239              :       END DO
    2240          390 :       nblock_max = 0
    2241         1170 :       DO ip = 0, para_env%num_pe - 1
    2242          780 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2243         1170 :          nblock_max = MAX(nblock_max, nblock)
    2244              :       END DO
    2245              : 
    2246              :       ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
    2247         1560 :       ALLOCATE (z_ij_loc_re(nstate, nblock_max))
    2248         1170 :       ALLOCATE (z_ij_loc_im(nstate, nblock_max))
    2249         2364 :       ALLOCATE (cz_ij_loc(dim2))
    2250         1584 :       DO idim = 1, dim2
    2251         3972 :          DO ip = 0, para_env%num_pe - 1
    2252         2388 :             nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2253         2388 :             CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
    2254         2388 :             CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
    2255         3582 :             IF (para_env%mepos == ip) THEN
    2256         4752 :                ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
    2257         5034 :                DO i = 1, nblock
    2258        37830 :                   DO j = 1, nstate
    2259        36636 :                      cz_ij_loc(idim)%c_array(j, i) = CMPLX(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
    2260              :                   END DO
    2261              :                END DO
    2262              :             END IF
    2263              :          END DO ! ip
    2264              :       END DO
    2265          390 :       DEALLOCATE (z_ij_loc_re)
    2266          390 :       DEALLOCATE (z_ij_loc_im)
    2267              : 
    2268         1560 :       ALLOCATE (rotmat(nstate, 2*nblock_max))
    2269              : 
    2270              :       CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
    2271              :                                 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
    2272          390 :                                 target_time=target_time, start_time=start_time)
    2273              : 
    2274          390 :       ilow1 = ns_bound(para_env%mepos, 1)
    2275          390 :       ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
    2276         1170 :       ALLOCATE (z_ij_loc_re(nstate, nblock_max))
    2277         1170 :       ALLOCATE (z_ij_loc_im(nstate, nblock_max))
    2278         1584 :       DO idim = 1, dim2
    2279         3972 :          DO ip = 0, para_env%num_pe - 1
    2280        79824 :             z_ij_loc_re = 0.0_dp
    2281        79824 :             z_ij_loc_im = 0.0_dp
    2282         2388 :             nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2283         2388 :             IF (ip == para_env%mepos) THEN
    2284         5034 :                ns_me = nblock
    2285         5034 :                DO i = 1, ns_me
    2286        36636 :                   ii = ilow1 + i - 1
    2287        37830 :                   DO j = 1, nstate
    2288        32796 :                      z_ij_loc_re(j, i) = REAL(cz_ij_loc(idim)%c_array(j, i), dp)
    2289        36636 :                      z_ij_loc_im(j, i) = AIMAG(cz_ij_loc(idim)%c_array(j, i))
    2290              :                   END DO
    2291              :                END DO
    2292              :             END IF
    2293         2388 :             CALL para_env%bcast(z_ij_loc_re, ip)
    2294         2388 :             CALL para_env%bcast(z_ij_loc_im, ip)
    2295         2388 :             CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
    2296         3582 :             CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
    2297              :          END DO ! ip
    2298              :       END DO
    2299              : 
    2300         1170 :       DO ip = 0, para_env%num_pe - 1
    2301        25704 :          z_ij_loc_re = 0.0_dp
    2302          780 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2303          780 :          IF (ip == para_env%mepos) THEN
    2304         1633 :             ns_me = nblock
    2305         1633 :             DO i = 1, ns_me
    2306        11774 :                ii = ilow1 + i - 1
    2307        12164 :                DO j = 1, nstate
    2308        11774 :                   z_ij_loc_re(j, i) = rotmat(j, i)
    2309              :                END DO
    2310              :             END DO
    2311              :          END IF
    2312          780 :          CALL para_env%bcast(z_ij_loc_re, ip)
    2313         1170 :          CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
    2314              :       END DO
    2315              : 
    2316          390 :       DEALLOCATE (z_ij_loc_re)
    2317          390 :       DEALLOCATE (z_ij_loc_im)
    2318         1584 :       DO idim = 1, dim2
    2319         1584 :          DEALLOCATE (cz_ij_loc(idim)%c_array)
    2320              :       END DO
    2321          390 :       DEALLOCATE (cz_ij_loc)
    2322              : 
    2323          390 :       CALL para_env%sync()
    2324          390 :       CALL rotate_orbitals(rmat, vectors)
    2325          390 :       CALL cp_fm_release(rmat)
    2326              : 
    2327          390 :       DEALLOCATE (rotmat)
    2328          390 :       DEALLOCATE (ns_bound)
    2329              : 
    2330          390 :       CALL timestop(handle)
    2331              : 
    2332         1170 :    END SUBROUTINE jacobi_rot_para
    2333              : 
    2334              : ! **************************************************************************************************
    2335              : !> \brief almost identical to 'jacobi_rot_para' but with different inout variables
    2336              : !> \param weights ...
    2337              : !> \param czij ...
    2338              : !> \param para_env ...
    2339              : !> \param max_iter ...
    2340              : !> \param rmat ...
    2341              : !> \param tol_out ...
    2342              : !> \author Soumya Ghosh (08/21)
    2343              : ! **************************************************************************************************
    2344           32 :    SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
    2345              : 
    2346              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    2347              :       TYPE(cp_cfm_type), INTENT(IN)                      :: czij(:)
    2348              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2349              :       INTEGER, INTENT(IN)                                :: max_iter
    2350              :       TYPE(cp_cfm_type), INTENT(IN)                      :: rmat
    2351              :       REAL(dp), INTENT(OUT), OPTIONAL                    :: tol_out
    2352              : 
    2353              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'jacobi_rot_para_1'
    2354              : 
    2355           32 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :)     :: czij_array
    2356              :       INTEGER                                            :: dim2, handle, i, idim, ii, ilow1, ip, j, &
    2357              :                                                             nblock, nblock_max, ns_me, nstate, &
    2358              :                                                             sweeps
    2359              :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: ns_bound
    2360           32 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: rotmat, z_ij_loc_re
    2361              :       REAL(KIND=dp)                                      :: xstate
    2362           32 :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: cz_ij_loc
    2363              : 
    2364           32 :       CALL timeset(routineN, handle)
    2365              : 
    2366           32 :       dim2 = SIZE(czij)
    2367              : 
    2368           32 :       CALL cp_cfm_set_all(rmat, CMPLX(0._dp, 0._dp, dp), CMPLX(1._dp, 0._dp, dp))
    2369              : 
    2370           32 :       CALL cp_cfm_get_info(rmat, nrow_global=nstate)
    2371              : 
    2372              :       ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
    2373           32 :       xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
    2374          128 :       ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
    2375           96 :       DO ip = 1, para_env%num_pe
    2376           64 :          ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
    2377           96 :          ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
    2378              :       END DO
    2379           32 :       nblock_max = 0
    2380           96 :       DO ip = 0, para_env%num_pe - 1
    2381           64 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2382           96 :          nblock_max = MAX(nblock_max, nblock)
    2383              :       END DO
    2384              : 
    2385              :       ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
    2386          128 :       ALLOCATE (czij_array(nstate, nblock_max))
    2387          192 :       ALLOCATE (cz_ij_loc(dim2))
    2388          128 :       DO idim = 1, dim2
    2389          320 :          DO ip = 0, para_env%num_pe - 1
    2390          192 :             nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2391              :             ! cfm --> allocatable
    2392          192 :             CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
    2393          288 :             IF (para_env%mepos == ip) THEN
    2394           96 :                ns_me = nblock
    2395          384 :                ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
    2396         2160 :                DO i = 1, ns_me
    2397        90912 :                   DO j = 1, nstate
    2398        90816 :                      cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
    2399              :                   END DO
    2400              :                END DO
    2401              :             END IF
    2402              :          END DO ! ip
    2403              :       END DO
    2404           32 :       DEALLOCATE (czij_array)
    2405              : 
    2406          128 :       ALLOCATE (rotmat(nstate, 2*nblock_max))
    2407              : 
    2408              :       CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
    2409           32 :                                 cz_ij_loc, rotmat, 0, tol_out=tol_out)
    2410              : 
    2411           32 :       ilow1 = ns_bound(para_env%mepos, 1)
    2412           32 :       ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
    2413          128 :       ALLOCATE (z_ij_loc_re(nstate, nblock_max))
    2414              : 
    2415           96 :       DO ip = 0, para_env%num_pe - 1
    2416        62016 :          z_ij_loc_re = 0.0_dp
    2417           64 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2418           64 :          IF (ip == para_env%mepos) THEN
    2419          720 :             ns_me = nblock
    2420          720 :             DO i = 1, ns_me
    2421        30272 :                ii = ilow1 + i - 1
    2422        30304 :                DO j = 1, nstate
    2423        30272 :                   z_ij_loc_re(j, i) = rotmat(j, i)
    2424              :                END DO
    2425              :             END DO
    2426              :          END IF
    2427           64 :          CALL para_env%bcast(z_ij_loc_re, ip)
    2428        62048 :          CALL cp_cfm_set_submatrix(rmat, CMPLX(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
    2429              :       END DO
    2430              : 
    2431           32 :       DEALLOCATE (z_ij_loc_re)
    2432          128 :       DO idim = 1, dim2
    2433          128 :          DEALLOCATE (cz_ij_loc(idim)%c_array)
    2434              :       END DO
    2435           32 :       DEALLOCATE (cz_ij_loc)
    2436              : 
    2437           32 :       CALL para_env%sync()
    2438              : 
    2439           32 :       DEALLOCATE (rotmat)
    2440           32 :       DEALLOCATE (ns_bound)
    2441              : 
    2442           32 :       CALL timestop(handle)
    2443              : 
    2444           64 :    END SUBROUTINE jacobi_rot_para_1
    2445              : 
    2446              : ! **************************************************************************************************
    2447              : !> \brief Parallel algorithm for jacobi rotations
    2448              : !> \param weights ...
    2449              : !> \param para_env ...
    2450              : !> \param max_iter ...
    2451              : !> \param sweeps ...
    2452              : !> \param out_each ...
    2453              : !> \param dim2 ...
    2454              : !> \param nstate ...
    2455              : !> \param nblock_max ...
    2456              : !> \param ns_bound ...
    2457              : !> \param cz_ij_loc ...
    2458              : !> \param rotmat ...
    2459              : !> \param output_unit ...
    2460              : !> \param tol_out ...
    2461              : !> \param eps_localization ...
    2462              : !> \param target_time ...
    2463              : !> \param start_time ...
    2464              : !> \par History
    2465              : !>      split out to reuse with different input types
    2466              : !> \author HF (05.2022)
    2467              : ! **************************************************************************************************
    2468         1266 :    SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
    2469          422 :                                    ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
    2470              : 
    2471              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    2472              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2473              :       INTEGER, INTENT(IN)                                :: max_iter
    2474              :       INTEGER, INTENT(OUT)                               :: sweeps
    2475              :       INTEGER, INTENT(IN)                                :: out_each, dim2, nstate, nblock_max
    2476              :       INTEGER, DIMENSION(0:, :), INTENT(IN)              :: ns_bound
    2477              :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: cz_ij_loc
    2478              :       REAL(dp), DIMENSION(:, :), INTENT(OUT)             :: rotmat
    2479              :       INTEGER, INTENT(IN)                                :: output_unit
    2480              :       REAL(dp), INTENT(OUT), OPTIONAL                    :: tol_out
    2481              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: eps_localization
    2482              :       REAL(dp), OPTIONAL                                 :: target_time, start_time
    2483              : 
    2484              :       COMPLEX(KIND=dp)                                   :: zi, zj
    2485          422 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)  :: c_array_me, c_array_partner
    2486              :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
    2487              :       INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
    2488              :          ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
    2489              :          iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
    2490              :          ns_recv_from, ns_recv_partner
    2491          422 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: rcount, rdispl
    2492          422 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: list_pair
    2493              :       LOGICAL                                            :: should_stop
    2494          422 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: gmat, rmat_loc, rmat_recv, rmat_send
    2495          422 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: rmat_recv_all
    2496              :       REAL(KIND=dp)                                      :: ct, func, gmax, grad, ri, rj, st, t1, &
    2497              :                                                             t2, theta, tolerance, zc, zr
    2498          422 :       TYPE(set_c_1d_type), DIMENSION(:), POINTER         :: zdiag_all, zdiag_me
    2499          422 :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: xyz_mix, xyz_mix_ns
    2500              : 
    2501          422 :       NULLIFY (zdiag_all, zdiag_me)
    2502          422 :       NULLIFY (xyz_mix, xyz_mix_ns)
    2503              :       NULLIFY (mii, mij, mjj)
    2504              : 
    2505         2110 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
    2506              : 
    2507         1266 :       ALLOCATE (rcount(para_env%num_pe), STAT=istat)
    2508          844 :       ALLOCATE (rdispl(para_env%num_pe), STAT=istat)
    2509              : 
    2510          422 :       tolerance = 1.0e10_dp
    2511          422 :       sweeps = 0
    2512              : 
    2513              :       ! number of processor pairs and number of permutations
    2514          422 :       npair = (para_env%num_pe + 1)/2
    2515          422 :       nperm = para_env%num_pe - MOD(para_env%num_pe + 1, 2)
    2516         1266 :       ALLOCATE (list_pair(2, npair))
    2517              : 
    2518              :       ! initialize rotation matrix
    2519        87298 :       rotmat = 0.0_dp
    2520         2353 :       DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2521         1931 :          ii = i - ns_bound(para_env%mepos, 1) + 1
    2522         2353 :          rotmat(i, ii) = 1.0_dp
    2523              :       END DO
    2524              : 
    2525         2556 :       ALLOCATE (xyz_mix(dim2))
    2526         2134 :       ALLOCATE (xyz_mix_ns(dim2))
    2527         2556 :       ALLOCATE (zdiag_me(dim2))
    2528         2134 :       ALLOCATE (zdiag_all(dim2))
    2529              : 
    2530          422 :       ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
    2531          422 :       IF (ns_me /= 0) THEN
    2532         2070 :          ALLOCATE (c_array_me(nstate, ns_me, dim2))
    2533         1680 :          DO idim = 1, dim2
    2534         5478 :             ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
    2535              :          END DO
    2536         1656 :          ALLOCATE (gmat(nstate, ns_me))
    2537              :       END IF
    2538              : 
    2539         1712 :       DO idim = 1, dim2
    2540         3870 :          ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
    2541         7530 :          zdiag_me(idim)%c_array = z_zero
    2542         3870 :          ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
    2543        14192 :          zdiag_all(idim)%c_array = z_zero
    2544              :       END DO
    2545         1688 :       ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
    2546         1266 :       ALLOCATE (rmat_send(nblock_max*2, nblock_max))
    2547              : 
    2548              :       ! buffer for message passing
    2549         2110 :       ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
    2550              : 
    2551          422 :       IF (output_unit > 0) THEN
    2552          195 :          WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
    2553          195 :          WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
    2554              :       END IF
    2555              : 
    2556        87204 :       DO lsweep = 1, max_iter + 1
    2557        87204 :          sweeps = lsweep
    2558        87204 :          IF (sweeps == max_iter + 1) THEN
    2559          156 :             IF (output_unit > 0) THEN
    2560           62 :                WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
    2561           62 :                WRITE (output_unit, *) '               Present  Max. gradient = ', tolerance
    2562              :             END IF
    2563              :             EXIT
    2564              :          END IF
    2565        87048 :          t1 = m_walltime()
    2566              : 
    2567       174096 :          DO iperm = 1, nperm
    2568              : 
    2569              :             ! fix partners for this permutation, and get the number of states
    2570        87048 :             CALL eberlein(iperm, para_env, list_pair)
    2571        87048 :             ip_partner = -1
    2572        87048 :             ns_partner = 0
    2573        87048 :             DO ipair = 1, npair
    2574        87048 :                IF (list_pair(1, ipair) == para_env%mepos) THEN
    2575        43524 :                   ip_partner = list_pair(2, ipair)
    2576        43524 :                   EXIT
    2577        43524 :                ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
    2578        43524 :                   ip_partner = list_pair(1, ipair)
    2579        43524 :                   EXIT
    2580              :                END IF
    2581              :             END DO
    2582        87048 :             IF (ip_partner >= 0) THEN
    2583        87048 :                ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
    2584              :             ELSE
    2585              :                ns_partner = 0
    2586              :             END IF
    2587              : 
    2588              :             ! if there is a non-zero block connecting two partners, jacobi-sweep it.
    2589        87048 :             IF (ns_partner*ns_me /= 0) THEN
    2590              : 
    2591       348128 :                ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
    2592      5089000 :                rmat_loc = 0.0_dp
    2593       698518 :                DO i = 1, ns_me + ns_partner
    2594       698518 :                   rmat_loc(i, i) = 1.0_dp
    2595              :                END DO
    2596              : 
    2597       435160 :                ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
    2598              : 
    2599       349730 :                DO idim = 1, dim2
    2600      1050792 :                   ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
    2601      1276400 :                   DO i = 1, ns_me
    2602      7889886 :                      c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
    2603              :                   END DO
    2604              :                END DO
    2605              : 
    2606              :                CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
    2607        87032 :                                       msgout=c_array_partner, source=ip_partner)
    2608              : 
    2609        87032 :                n1 = ns_me
    2610        87032 :                n2 = ns_partner
    2611        87032 :                ilow1 = ns_bound(para_env%mepos, 1)
    2612        87032 :                iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
    2613        87032 :                ilow2 = ns_bound(ip_partner, 1)
    2614        87032 :                iup2 = ns_bound(ip_partner, 1) + n2 - 1
    2615        87032 :                IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
    2616        43516 :                   il1 = 1
    2617              :                   iu1 = n1
    2618        43516 :                   iu1 = n1
    2619        43516 :                   il2 = 1 + n1
    2620        43516 :                   iu2 = n1 + n2
    2621              :                ELSE
    2622        43516 :                   il1 = 1 + n2
    2623              :                   iu1 = n1 + n2
    2624        43516 :                   iu1 = n1 + n2
    2625        43516 :                   il2 = 1
    2626        43516 :                   iu2 = n2
    2627              :                END IF
    2628              : 
    2629       349730 :                DO idim = 1, dim2
    2630      1189368 :                   DO i = 1, n1
    2631      4337346 :                      xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
    2632      4479210 :                      xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
    2633              :                   END DO
    2634      1276400 :                   DO i = 1, n2
    2635      4337346 :                      xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
    2636      4479210 :                      xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
    2637              :                   END DO
    2638              :                END DO
    2639              : 
    2640       698518 :                DO istate = 1, n1 + n2
    2641      2588016 :                   DO jstate = istate + 1, n1 + n2
    2642      7663346 :                      DO idim = 1, dim2
    2643      5773848 :                         mii(idim) = xyz_mix(idim)%c_array(istate, istate)
    2644      5773848 :                         mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
    2645      7663346 :                         mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
    2646              :                      END DO
    2647      1889498 :                      CALL get_angle(mii, mjj, mij, weights, theta)
    2648      1889498 :                      st = SIN(theta)
    2649      1889498 :                      ct = COS(theta)
    2650      7663346 :                      DO idim = 1, dim2
    2651     51281802 :                         DO i = 1, n1 + n2
    2652     45507954 :                            zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
    2653     45507954 :                            zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
    2654     45507954 :                            xyz_mix(idim)%c_array(i, istate) = zi
    2655     51281802 :                            xyz_mix(idim)%c_array(i, jstate) = zj
    2656              :                         END DO
    2657     53171300 :                         DO i = 1, n1 + n2
    2658     45507954 :                            zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
    2659     45507954 :                            zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
    2660     45507954 :                            xyz_mix(idim)%c_array(istate, i) = zi
    2661     51281802 :                            xyz_mix(idim)%c_array(jstate, i) = zj
    2662              :                         END DO
    2663              :                      END DO
    2664              : 
    2665     17235398 :                      DO i = 1, n1 + n2
    2666     14734414 :                         ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
    2667     14734414 :                         rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
    2668     14734414 :                         rmat_loc(i, istate) = ri
    2669     16623912 :                         rmat_loc(i, jstate) = rj
    2670              :                      END DO
    2671              :                   END DO
    2672              :                END DO
    2673              : 
    2674        87032 :                k = nblock_max + 1
    2675              :                CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
    2676      5089000 :                                       rotmat(1:nstate, k:k + n2 - 1), ip_partner)
    2677              : 
    2678        87032 :                IF (ilow1 < ilow2) THEN
    2679              :                   ! no longer compiles in official sdgb:
    2680              :                   !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
    2681              :                   ! probably inefficient:
    2682              :                   CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
    2683      1467012 :                              n2, 0.0_dp, gmat(:, :), nstate)
    2684              :                   CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
    2685        43516 :                              n1 + n2, 1.0_dp, gmat(:, :), nstate)
    2686              :                ELSE
    2687              :                   CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
    2688        43516 :                              rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
    2689              :                   ! no longer compiles in official sdgb:
    2690              :                   !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
    2691              :                   ! probably inefficient:
    2692              :                   CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
    2693      1147004 :                              n1, 1.0_dp, gmat(:, :), nstate)
    2694              :                END IF
    2695              : 
    2696        87032 :                CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
    2697              : 
    2698       349730 :                DO idim = 1, dim2
    2699      1189368 :                   DO i = 1, n1
    2700      7889886 :                      xyz_mix_ns(idim)%c_array(1:nstate, i) = z_zero
    2701              :                   END DO
    2702              : 
    2703      1189368 :                   DO istate = 1, n1
    2704      7889886 :                      DO jstate = 1, nstate
    2705     33311712 :                         DO i = 1, n2
    2706              :                            xyz_mix_ns(idim)%c_array(jstate, istate) = &
    2707              :                               xyz_mix_ns(idim)%c_array(jstate, istate) + &
    2708     32385042 :                               c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
    2709              :                         END DO
    2710              :                      END DO
    2711              :                   END DO
    2712      1276400 :                   DO istate = 1, n1
    2713      7889886 :                      DO jstate = 1, nstate
    2714     34151136 :                         DO i = 1, n1
    2715              :                            xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
    2716     33224466 :                                                                  c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
    2717              :                         END DO
    2718              :                      END DO
    2719              :                   END DO
    2720              :                END DO ! idim
    2721              : 
    2722        87032 :                DEALLOCATE (c_array_partner)
    2723              : 
    2724              :             ELSE ! save my data
    2725           64 :                DO idim = 1, dim2
    2726           88 :                   DO i = 1, ns_me
    2727          120 :                      xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
    2728              :                   END DO
    2729              :                END DO
    2730              :             END IF
    2731              : 
    2732       349794 :             DO idim = 1, dim2
    2733      1276488 :                DO i = 1, ns_me
    2734      7889982 :                   cz_ij_loc(idim)%c_array(1:nstate, i) = z_zero
    2735              :                END DO
    2736              :             END DO
    2737              : 
    2738        87048 :             IF (ns_partner*ns_me /= 0) THEN
    2739              :                ! transpose rotation matrix rmat_loc
    2740       698518 :                DO i = 1, ns_me + ns_partner
    2741      2588016 :                   DO j = i + 1, ns_me + ns_partner
    2742      1889498 :                      ri = rmat_loc(i, j)
    2743      1889498 :                      rmat_loc(i, j) = rmat_loc(j, i)
    2744      2500984 :                      rmat_loc(j, i) = ri
    2745              :                   END DO
    2746              :                END DO
    2747              : 
    2748              :                ! prepare for distribution
    2749       392775 :                DO i = 1, n1
    2750      1510528 :                   rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
    2751              :                END DO
    2752        87032 :                ik = nblock_max
    2753       392775 :                DO i = 1, n2
    2754      1470263 :                   rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
    2755              :                END DO
    2756              :             ELSE
    2757           64 :                rmat_send = 0.0_dp
    2758              :             END IF
    2759              : 
    2760              :             ! collect data from all tasks (this takes some significant time)
    2761        87048 :             CALL para_env%allgather(rmat_send, rmat_recv_all)
    2762              : 
    2763              :             ! update blocks everywhere
    2764       261144 :             DO ip = 0, para_env%num_pe - 1
    2765              : 
    2766       174096 :                ip_recv_from = MOD(para_env%mepos - IP + para_env%num_pe, para_env%num_pe)
    2767      6456192 :                rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
    2768              : 
    2769       174096 :                ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
    2770              : 
    2771       261144 :                IF (ns_me /= 0) THEN
    2772       174080 :                   IF (ns_recv_from /= 0) THEN
    2773              :                      !look for the partner of ip_recv_from
    2774       174072 :                      ip_recv_partner = -1
    2775       174072 :                      ns_recv_partner = 0
    2776       174072 :                      DO ipair = 1, npair
    2777       174072 :                         IF (list_pair(1, ipair) == ip_recv_from) THEN
    2778        87040 :                            ip_recv_partner = list_pair(2, ipair)
    2779        87040 :                            EXIT
    2780        87032 :                         ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
    2781              :                            ip_recv_partner = list_pair(1, ipair)
    2782              :                            EXIT
    2783              :                         END IF
    2784              :                      END DO
    2785              : 
    2786       174072 :                      IF (ip_recv_partner >= 0) THEN
    2787       174072 :                         ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
    2788              :                      END IF
    2789       174072 :                      IF (ns_recv_partner > 0) THEN
    2790       174064 :                         il1 = ns_bound(para_env%mepos, 1)
    2791       174064 :                         il_recv = ns_bound(ip_recv_from, 1)
    2792       174064 :                         il_recv_partner = ns_bound(ip_recv_partner, 1)
    2793       174064 :                         ik = nblock_max
    2794              : 
    2795       699460 :                         DO idim = 1, dim2
    2796      2378736 :                            DO i = 1, ns_recv_from
    2797      1853340 :                               ii = il_recv + i - 1
    2798      9079254 :                               DO j = 1, ns_me
    2799     33224466 :                                  jj = j
    2800     35077806 :                                  DO k = 1, ns_recv_from
    2801     26523948 :                                     kk = il_recv + k - 1
    2802              :                                     cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
    2803     33224466 :                                                                       rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
    2804              :                                  END DO
    2805              :                               END DO
    2806              :                            END DO
    2807      2552800 :                            DO i = 1, ns_recv_from
    2808      1853340 :                               ii = il_recv + i - 1
    2809      9079254 :                               DO j = 1, ns_me
    2810     32385042 :                                  jj = j
    2811     34238382 :                                  DO k = 1, ns_recv_partner
    2812     25684524 :                                     kk = il_recv_partner + k - 1
    2813              :                                     cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
    2814     32385042 :                                                                       rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
    2815              :                                  END DO
    2816              :                               END DO
    2817              :                            END DO
    2818              :                         END DO ! idim
    2819              :                      ELSE
    2820            8 :                         il1 = ns_bound(para_env%mepos, 1)
    2821            8 :                         il_recv = ns_bound(ip_recv_from, 1)
    2822           32 :                         DO idim = 1, dim2
    2823           56 :                            DO j = 1, ns_me
    2824           48 :                               jj = j
    2825           72 :                               DO i = 1, ns_recv_from
    2826           24 :                                  ii = il_recv + i - 1
    2827           48 :                                  cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
    2828              :                               END DO
    2829              :                            END DO
    2830              :                         END DO ! idim
    2831              :                      END IF
    2832              :                   END IF
    2833              :                END IF ! ns_me
    2834              :             END DO ! ip
    2835              : 
    2836       174096 :             IF (ns_partner*ns_me /= 0) THEN
    2837        87032 :                DEALLOCATE (rmat_loc)
    2838       349730 :                DO idim = 1, dim2
    2839       349730 :                   DEALLOCATE (xyz_mix(idim)%c_array)
    2840              :                END DO
    2841              :             END IF
    2842              : 
    2843              :          END DO ! iperm
    2844              : 
    2845              :          ! calculate the max gradient
    2846       349794 :          DO idim = 1, dim2
    2847      1189440 :             DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2848       926694 :                ii = i - ns_bound(para_env%mepos, 1) + 1
    2849       926694 :                zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
    2850      1189440 :                zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
    2851              :             END DO
    2852       788238 :             rcount(:) = SIZE(zdiag_me(idim)%c_array)
    2853       262746 :             rdispl(1) = 0
    2854       525492 :             DO ip = 2, para_env%num_pe
    2855       525492 :                rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
    2856              :             END DO
    2857              :             ! collect all the diagonal elements in a replicated 1d array
    2858      3492450 :             CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
    2859              :          END DO
    2860              : 
    2861        87048 :          gmax = 0.0_dp
    2862       392799 :          DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2863       305751 :             k = j - ns_bound(para_env%mepos, 1) + 1
    2864      1337548 :             DO i = 1, j - 1
    2865              :                ! find the location of the diagonal element (i,i)
    2866      1087882 :                DO ip = 0, para_env%num_pe - 1
    2867      1087882 :                   IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
    2868              :                      ip_has_i = ip
    2869              :                      EXIT
    2870              :                   END IF
    2871              :                END DO
    2872       944749 :                ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
    2873              :                ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
    2874       944749 :                jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
    2875       944749 :                grad = 0.0_dp
    2876      3831673 :                DO idim = 1, dim2
    2877      2886924 :                   zi = zdiag_all(idim)%c_array(ii)
    2878      2886924 :                   zj = zdiag_all(idim)%c_array(jj)
    2879      3831673 :                   grad = grad + weights(idim)*REAL(4.0_dp*CONJG(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
    2880              :                END DO
    2881      1250500 :                gmax = MAX(gmax, ABS(grad))
    2882              :             END DO
    2883              :          END DO
    2884              : 
    2885        87048 :          CALL para_env%max(gmax)
    2886        87048 :          tolerance = gmax
    2887        87048 :          IF (PRESENT(tol_out)) tol_out = tolerance
    2888              : 
    2889        87048 :          func = 0.0_dp
    2890       392799 :          DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2891       305751 :             k = i - ns_bound(para_env%mepos, 1) + 1
    2892      1319493 :             DO idim = 1, dim2
    2893       926694 :                zr = REAL(cz_ij_loc(idim)%c_array(i, k), dp)
    2894       926694 :                zc = AIMAG(cz_ij_loc(idim)%c_array(i, k))
    2895      1232445 :                func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
    2896              :             END DO
    2897              :          END DO
    2898        87048 :          CALL para_env%sum(func)
    2899        87048 :          t2 = m_walltime()
    2900              : 
    2901        87048 :          IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
    2902          440 :             WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
    2903          440 :             CALL m_flush(output_unit)
    2904              :          END IF
    2905        87048 :          IF (PRESENT(eps_localization)) THEN
    2906        87016 :             IF (tolerance < eps_localization) EXIT
    2907              :          END IF
    2908        87048 :          IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
    2909        86750 :             CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
    2910        86750 :             IF (should_stop) EXIT
    2911              :          END IF
    2912              : 
    2913              :       END DO ! lsweep
    2914              : 
    2915              :       ! buffer for message passing
    2916          422 :       DEALLOCATE (rmat_recv_all)
    2917              : 
    2918          422 :       DEALLOCATE (rmat_recv)
    2919          422 :       DEALLOCATE (rmat_send)
    2920          422 :       IF (ns_me > 0) THEN
    2921          414 :          DEALLOCATE (c_array_me)
    2922              :       END IF
    2923         1712 :       DO idim = 1, dim2
    2924         1290 :          DEALLOCATE (zdiag_me(idim)%c_array)
    2925         1712 :          DEALLOCATE (zdiag_all(idim)%c_array)
    2926              :       END DO
    2927          422 :       DEALLOCATE (zdiag_me)
    2928          422 :       DEALLOCATE (zdiag_all)
    2929          422 :       DEALLOCATE (xyz_mix)
    2930         1712 :       DO idim = 1, dim2
    2931         1712 :          IF (ns_me /= 0) THEN
    2932         1266 :             DEALLOCATE (xyz_mix_ns(idim)%c_array)
    2933              :          END IF
    2934              :       END DO
    2935          422 :       DEALLOCATE (xyz_mix_ns)
    2936          422 :       IF (ns_me /= 0) THEN
    2937          414 :          DEALLOCATE (gmat)
    2938              :       END IF
    2939          422 :       DEALLOCATE (mii)
    2940          422 :       DEALLOCATE (mij)
    2941          422 :       DEALLOCATE (mjj)
    2942          422 :       DEALLOCATE (list_pair)
    2943              : 
    2944          844 :    END SUBROUTINE jacobi_rot_para_core
    2945              : 
    2946              : ! **************************************************************************************************
    2947              : !> \brief ...
    2948              : !> \param iperm ...
    2949              : !> \param para_env ...
    2950              : !> \param list_pair ...
    2951              : ! **************************************************************************************************
    2952        87048 :    SUBROUTINE eberlein(iperm, para_env, list_pair)
    2953              :       INTEGER, INTENT(IN)                                :: iperm
    2954              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2955              :       INTEGER, DIMENSION(:, :)                           :: list_pair
    2956              : 
    2957              :       INTEGER                                            :: i, ii, jj, npair
    2958              : 
    2959        87048 :       npair = (para_env%num_pe + 1)/2
    2960        87048 :       IF (iperm == 1) THEN
    2961              : !..set up initial ordering
    2962       261144 :          DO I = 0, para_env%num_pe - 1
    2963       174096 :             II = ((i + 1) + 1)/2
    2964       174096 :             JJ = MOD((i + 1) + 1, 2) + 1
    2965       261144 :             list_pair(JJ, II) = i
    2966              :          END DO
    2967        87048 :          IF (MOD(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
    2968            0 :       ELSEIF (MOD(iperm, 2) == 0) THEN
    2969              : !..a type shift
    2970            0 :          jj = list_pair(1, npair)
    2971            0 :          DO I = npair, 3, -1
    2972            0 :             list_pair(1, I) = list_pair(1, I - 1)
    2973              :          END DO
    2974            0 :          list_pair(1, 2) = list_pair(2, 1)
    2975            0 :          list_pair(2, 1) = jj
    2976              :       ELSE
    2977              : !..b type shift
    2978            0 :          jj = list_pair(2, 1)
    2979            0 :          DO I = 1, npair - 1
    2980            0 :             list_pair(2, I) = list_pair(2, I + 1)
    2981              :          END DO
    2982            0 :          list_pair(2, npair) = jj
    2983              :       END IF
    2984              : 
    2985        87048 :    END SUBROUTINE eberlein
    2986              : 
    2987              : ! **************************************************************************************************
    2988              : !> \brief ...
    2989              : !> \param vectors ...
    2990              : !> \param op_sm_set ...
    2991              : !> \param zij_fm_set ...
    2992              : ! **************************************************************************************************
    2993          490 :    SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
    2994              : 
    2995              :       TYPE(cp_fm_type), INTENT(IN)                       :: vectors
    2996              :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set
    2997              :       TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN)      :: zij_fm_set
    2998              : 
    2999              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'zij_matrix'
    3000              : 
    3001              :       INTEGER                                            :: handle, i, j, nao, nmoloc
    3002              :       TYPE(cp_fm_type)                                   :: opvec
    3003              : 
    3004          490 :       CALL timeset(routineN, handle)
    3005              : 
    3006              :       ! get rows and cols of the input
    3007          490 :       CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
    3008              :       ! replicate the input kind of matrix
    3009          490 :       CALL cp_fm_create(opvec, vectors%matrix_struct)
    3010              : 
    3011              :       ! Compute zij here
    3012         1996 :       DO i = 1, SIZE(zij_fm_set, 2)
    3013         5008 :          DO j = 1, SIZE(zij_fm_set, 1)
    3014         3012 :             CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
    3015         3012 :             CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
    3016              :             CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
    3017         4518 :                                zij_fm_set(j, i))
    3018              :          END DO
    3019              :       END DO
    3020              : 
    3021          490 :       CALL cp_fm_release(opvec)
    3022          490 :       CALL timestop(handle)
    3023              : 
    3024          490 :    END SUBROUTINE zij_matrix
    3025              : 
    3026              : ! **************************************************************************************************
    3027              : !> \brief ...
    3028              : !> \param vectors ...
    3029              : ! **************************************************************************************************
    3030           36 :    SUBROUTINE scdm_qrfact(vectors)
    3031              : 
    3032              :       TYPE(cp_fm_type), INTENT(IN)                       :: vectors
    3033              : 
    3034              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'scdm_qrfact'
    3035              : 
    3036              :       INTEGER                                            :: handle, ncolT, nrowT
    3037           36 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    3038              :       TYPE(cp_fm_struct_type), POINTER                   :: cstruct
    3039              :       TYPE(cp_fm_type)                                   :: CTp, Qf, tmp
    3040              : 
    3041           36 :       CALL timeset(routineN, handle)
    3042              : 
    3043              :       ! Create Transpose of Coefficient Matrix vectors
    3044           36 :       nrowT = vectors%matrix_struct%ncol_global
    3045           36 :       ncolT = vectors%matrix_struct%nrow_global
    3046              : 
    3047              :       CALL cp_fm_struct_create(cstruct, template_fmstruct=vectors%matrix_struct, &
    3048           36 :                                nrow_global=nrowT, ncol_global=ncolT)
    3049           36 :       CALL cp_fm_create(CTp, cstruct)
    3050           36 :       CALL cp_fm_struct_release(cstruct)
    3051              : 
    3052          108 :       ALLOCATE (tau(nrowT))
    3053              : 
    3054           36 :       CALL cp_fm_transpose(vectors, CTp)
    3055              : 
    3056              :       ! Get QR decomposition of CTs
    3057           36 :       CALL cp_fm_pdgeqpf(CTp, tau, nrowT, ncolT, 1, 1)
    3058              : 
    3059              :       ! Construction of Q from the scalapack output
    3060              :       CALL cp_fm_struct_create(cstruct, para_env=CTp%matrix_struct%para_env, &
    3061              :                                context=CTp%matrix_struct%context, nrow_global=CTp%matrix_struct%nrow_global, &
    3062           36 :                                ncol_global=CTp%matrix_struct%nrow_global)
    3063           36 :       CALL cp_fm_create(Qf, cstruct)
    3064           36 :       CALL cp_fm_struct_release(cstruct)
    3065           36 :       CALL cp_fm_to_fm_submat(CTp, Qf, nrowT, nrowT, 1, 1, 1, 1)
    3066              : 
    3067              :       ! Get Q
    3068           36 :       CALL cp_fm_pdorgqr(Qf, tau, nrowT, 1, 1)
    3069              : 
    3070              :       ! Transform original coefficient matrix vectors
    3071           36 :       CALL cp_fm_create(tmp, vectors%matrix_struct)
    3072           36 :       CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
    3073           36 :       CALL cp_fm_to_fm(vectors, tmp)
    3074           36 :       CALL parallel_gemm('N', 'N', ncolT, nrowT, nrowT, 1.0_dp, tmp, Qf, 0.0_dp, vectors)
    3075              : 
    3076              :       ! Cleanup
    3077           36 :       CALL cp_fm_release(CTp)
    3078           36 :       CALL cp_fm_release(tmp)
    3079           36 :       CALL cp_fm_release(Qf)
    3080           36 :       DEALLOCATE (tau)
    3081              : 
    3082           36 :       CALL timestop(handle)
    3083              : 
    3084          144 :    END SUBROUTINE scdm_qrfact
    3085              : 
    3086            0 : END MODULE qs_localization_methods
        

Generated by: LCOV version 2.0-1