LCOV - code coverage report
Current view: top level - src - qs_localization_methods.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:c5411e0) Lines: 1232 1507 81.8 %
Date: 2024-05-16 06:48:40 Functions: 18 26 69.2 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Localization methods such as 2x2 Jacobi rotations
      10             : !>                                   Steepest Decents
      11             : !>                                   Conjugate Gradient
      12             : !> \par History
      13             : !>      Initial parallellization of jacobi (JVDV 07.2003)
      14             : !>      direct minimization using exponential parametrization (JVDV 09.2003)
      15             : !>      crazy rotations go fast (JVDV 10.2003)
      16             : !> \author CJM (04.2003)
      17             : ! **************************************************************************************************
      18             : MODULE qs_localization_methods
      19             :    USE cell_types,                      ONLY: cell_type
      20             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      21             :    USE cp_cfm_basic_linalg,             ONLY: cp_cfm_column_scale,&
      22             :                                               cp_cfm_rot_cols,&
      23             :                                               cp_cfm_rot_rows,&
      24             :                                               cp_cfm_scale,&
      25             :                                               cp_cfm_scale_and_add,&
      26             :                                               cp_cfm_schur_product,&
      27             :                                               cp_cfm_trace
      28             :    USE cp_cfm_diag,                     ONLY: cp_cfm_heevd
      29             :    USE cp_cfm_types,                    ONLY: &
      30             :         cp_cfm_create, cp_cfm_get_element, cp_cfm_get_info, cp_cfm_get_submatrix, cp_cfm_release, &
      31             :         cp_cfm_set_all, cp_cfm_set_submatrix, cp_cfm_to_cfm, cp_cfm_to_fm, cp_cfm_type, &
      32             :         cp_fm_to_cfm
      33             :    USE cp_dbcsr_operations,             ONLY: cp_dbcsr_sm_fm_multiply
      34             :    USE cp_external_control,             ONLY: external_control
      35             :    USE cp_fm_basic_linalg,              ONLY: cp_fm_frobenius_norm,&
      36             :                                               cp_fm_pdgeqpf,&
      37             :                                               cp_fm_pdorgqr,&
      38             :                                               cp_fm_scale,&
      39             :                                               cp_fm_scale_and_add,&
      40             :                                               cp_fm_trace,&
      41             :                                               cp_fm_transpose,&
      42             :                                               cp_fm_triangular_multiply
      43             :    USE cp_fm_cholesky,                  ONLY: cp_fm_cholesky_decompose
      44             :    USE cp_fm_diag,                      ONLY: cp_fm_syevd
      45             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      46             :                                               cp_fm_struct_get,&
      47             :                                               cp_fm_struct_release,&
      48             :                                               cp_fm_struct_type
      49             :    USE cp_fm_types,                     ONLY: &
      50             :         cp_fm_create, cp_fm_get_element, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_init_random, &
      51             :         cp_fm_maxabsrownorm, cp_fm_maxabsval, cp_fm_release, cp_fm_set_all, cp_fm_set_submatrix, &
      52             :         cp_fm_to_fm, cp_fm_to_fm_submat, cp_fm_type
      53             :    USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit,&
      54             :                                               cp_logger_get_default_unit_nr
      55             :    USE dbcsr_api,                       ONLY: dbcsr_p_type
      56             :    USE kahan_sum,                       ONLY: accurate_sum
      57             :    USE kinds,                           ONLY: dp
      58             :    USE machine,                         ONLY: m_flush,&
      59             :                                               m_walltime
      60             :    USE mathconstants,                   ONLY: pi,&
      61             :                                               twopi
      62             :    USE matrix_exp,                      ONLY: exp_pade_real,&
      63             :                                               get_nsquare_norder
      64             :    USE message_passing,                 ONLY: mp_para_env_type
      65             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      66             : #include "./base/base_uses.f90"
      67             : 
      68             :    IMPLICIT NONE
      69             :    PUBLIC :: initialize_weights, crazy_rotations, &
      70             :              direct_mini, rotate_orbitals, approx_l1_norm_sd, jacobi_rotations, scdm_qrfact, zij_matrix, &
      71             :              jacobi_cg_edf_ls
      72             : 
      73             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
      74             : 
      75             :    PRIVATE
      76             : 
      77             :    TYPE set_c_1d_type
      78             :       COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array
      79             :    END TYPE
      80             :    TYPE set_c_2d_type
      81             :       COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array
      82             :    END TYPE
      83             : 
      84             : CONTAINS
      85             : ! **************************************************************************************************
      86             : !> \brief ...
      87             : !> \param C ...
      88             : !> \param iterations ...
      89             : !> \param eps ...
      90             : !> \param converged ...
      91             : !> \param sweeps ...
      92             : ! **************************************************************************************************
      93         210 :    SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
      94             :       TYPE(cp_fm_type), INTENT(IN)                       :: C
      95             :       INTEGER, INTENT(IN)                                :: iterations
      96             :       REAL(KIND=dp), INTENT(IN)                          :: eps
      97             :       LOGICAL, INTENT(INOUT)                             :: converged
      98             :       INTEGER, INTENT(INOUT)                             :: sweeps
      99             : 
     100             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'approx_l1_norm_sd'
     101             :       INTEGER, PARAMETER                                 :: taylor_order = 100
     102             :       REAL(KIND=dp), PARAMETER                           :: alpha = 0.1_dp, f2_eps = 0.01_dp
     103             : 
     104             :       INTEGER                                            :: handle, i, istep, k, n, ncol_local, &
     105             :                                                             nrow_local, output_unit, p
     106             :       REAL(KIND=dp)                                      :: expfactor, f2, f2old, gnorm, tnorm
     107             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     108             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_k_k
     109             :       TYPE(cp_fm_type)                                   :: CTmp, G, Gp1, Gp2, U
     110             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     111             : 
     112          30 :       CALL timeset(routineN, handle)
     113             : 
     114          30 :       NULLIFY (context, para_env, fm_struct_k_k)
     115             : 
     116          30 :       output_unit = cp_logger_get_default_io_unit()
     117             : 
     118             :       CALL cp_fm_struct_get(C%matrix_struct, nrow_global=n, ncol_global=k, &
     119             :                             nrow_local=nrow_local, ncol_local=ncol_local, &
     120          30 :                             para_env=para_env, context=context)
     121             :       CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
     122          30 :                                nrow_global=k, ncol_global=k)
     123          30 :       CALL cp_fm_create(CTmp, C%matrix_struct)
     124          30 :       CALL cp_fm_create(U, fm_struct_k_k)
     125          30 :       CALL cp_fm_create(G, fm_struct_k_k)
     126          30 :       CALL cp_fm_create(Gp1, fm_struct_k_k)
     127          30 :       CALL cp_fm_create(Gp2, fm_struct_k_k)
     128             :       !
     129             :       ! printing
     130          30 :       IF (output_unit > 0) THEN
     131          15 :          WRITE (output_unit, '(1X)')
     132          15 :          WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
     133          15 :          WRITE (output_unit, '(A,I5)') '      Nbr iterations =', iterations
     134          15 :          WRITE (output_unit, '(A,E10.2)') '     eps convergence =', eps
     135          15 :          WRITE (output_unit, '(A,I5)') '    Max Taylor order =', taylor_order
     136          15 :          WRITE (output_unit, '(A,E10.2)') '              f2 eps =', f2_eps
     137          15 :          WRITE (output_unit, '(A,E10.2)') '               alpha =', alpha
     138          15 :          WRITE (output_unit, '(A)') '     iteration    approx_l1_norm    g_norm   rel_err'
     139             :       END IF
     140             :       !
     141          30 :       f2old = 0.0_dp
     142          30 :       converged = .FALSE.
     143             :       !
     144             :       ! Start the steepest descent
     145        1496 :       DO istep = 1, iterations
     146             :          !
     147             :          !-------------------------------------------------------------------
     148             :          ! compute f_2
     149             :          ! f_2(x)=(x^2+eps)^1/2
     150        1486 :          f2 = 0.0_dp
     151       23004 :          DO p = 1, ncol_local ! p
     152     1113692 :             DO i = 1, nrow_local ! i
     153     1112206 :                f2 = f2 + SQRT(C%local_data(i, p)**2 + f2_eps)
     154             :             END DO
     155             :          END DO
     156        1486 :          CALL C%matrix_struct%para_env%sum(f2)
     157             :          !write(*,*) 'qs_localize: f_2=',f2
     158             :          !-------------------------------------------------------------------
     159             :          ! compute the derivative of f_2
     160             :          ! f_2(x)=(x^2+eps)^1/2
     161       23004 :          DO p = 1, ncol_local ! p
     162     1113692 :             DO i = 1, nrow_local ! i
     163     1112206 :                CTmp%local_data(i, p) = C%local_data(i, p)/SQRT(C%local_data(i, p)**2 + f2_eps)
     164             :             END DO
     165             :          END DO
     166        1486 :          CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, CTmp, C, 0.0_dp, G)
     167             :          ! antisymmetrize
     168        1486 :          CALL cp_fm_transpose(G, U)
     169        1486 :          CALL cp_fm_scale_and_add(-0.5_dp, G, 0.5_dp, U)
     170             :          !
     171             :          !-------------------------------------------------------------------
     172             :          !
     173        1486 :          gnorm = cp_fm_frobenius_norm(G)
     174             :          !write(*,*) 'qs_localize: norm(G)=',gnorm
     175             :          !
     176             :          ! rescale for steepest descent
     177        1486 :          CALL cp_fm_scale(-alpha, G)
     178             :          !
     179             :          ! compute unitary transform
     180             :          ! zeroth order
     181        1486 :          CALL cp_fm_set_all(U, 0.0_dp, 1.0_dp)
     182             :          ! first order
     183        1486 :          expfactor = 1.0_dp
     184        1486 :          CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, G)
     185        1486 :          tnorm = cp_fm_frobenius_norm(G)
     186             :          !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
     187        1486 :          IF (tnorm .GT. 1.0E-10_dp) THEN
     188             :             ! other orders
     189        1486 :             CALL cp_fm_to_fm(G, Gp1)
     190        4938 :             DO i = 2, taylor_order
     191             :                ! new power of G
     192        4938 :                CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, G, Gp1, 0.0_dp, Gp2)
     193        4938 :                CALL cp_fm_to_fm(Gp2, Gp1)
     194             :                ! add to the taylor expansion so far
     195        4938 :                expfactor = expfactor/REAL(i, KIND=dp)
     196        4938 :                CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, Gp1)
     197        4938 :                tnorm = cp_fm_frobenius_norm(Gp1)
     198             :                !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
     199        4938 :                IF (tnorm*expfactor .LT. 1.0E-10_dp) EXIT
     200             :             END DO
     201             :          END IF
     202             :          !
     203             :          ! incrementaly rotate the MOs
     204        1486 :          CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, C, U, 0.0_dp, CTmp)
     205        1486 :          CALL cp_fm_to_fm(CTmp, C)
     206             :          !
     207             :          ! printing
     208        1486 :          IF (output_unit .GT. 0) THEN
     209         743 :             WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, ABS((f2 - f2old)/f2)
     210             :          END IF
     211             :          !
     212             :          ! Are we done?
     213        1486 :          sweeps = istep
     214             :          !IF(gnorm.LE.grad_thresh.AND.ABS((f2-f2old)/f2).LE.f2_thresh.AND.istep.GT.1) THEN
     215        1486 :          IF (ABS((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1) THEN
     216          20 :             converged = .TRUE.
     217          20 :             EXIT
     218             :          END IF
     219        1476 :          f2old = f2
     220             :       END DO
     221             :       !
     222             :       ! here we should do one refine step to enforce C'*S*C=1 for any case
     223             :       !
     224             :       ! Print the final result
     225          30 :       IF (output_unit .GT. 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
     226             :       !
     227             :       ! sparsity
     228             :       !DO i=1,size(thresh,1)
     229             :       !   gnorm = 0.0_dp
     230             :       !   DO o=1,ncol_local
     231             :       !      DO p=1,nrow_local
     232             :       !         IF(ABS(C%local_data(p,o)).GT.thresh(i)) THEN
     233             :       !            gnorm = gnorm + 1.0_dp
     234             :       !         ENDIF
     235             :       !      ENDDO
     236             :       !   ENDDO
     237             :       !   CALL C%matrix_struct%para_env%sum(gnorm)
     238             :       !   IF(output_unit.GT.0) THEN
     239             :       !      WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
     240             :       !   ENDIF
     241             :       !ENDDO
     242             :       !
     243             :       ! deallocate
     244          30 :       CALL cp_fm_struct_release(fm_struct_k_k)
     245          30 :       CALL cp_fm_release(CTmp)
     246          30 :       CALL cp_fm_release(U)
     247          30 :       CALL cp_fm_release(G)
     248          30 :       CALL cp_fm_release(Gp1)
     249          30 :       CALL cp_fm_release(Gp2)
     250             : 
     251          30 :       CALL timestop(handle)
     252             : 
     253          30 :    END SUBROUTINE approx_l1_norm_sd
     254             : ! **************************************************************************************************
     255             : !> \brief ...
     256             : !> \param cell ...
     257             : !> \param weights ...
     258             : ! **************************************************************************************************
     259         458 :    SUBROUTINE initialize_weights(cell, weights)
     260             : 
     261             :       TYPE(cell_type), POINTER                           :: cell
     262             :       REAL(KIND=dp), DIMENSION(:)                        :: weights
     263             : 
     264             :       REAL(KIND=dp), DIMENSION(3, 3)                     :: metric
     265             : 
     266         458 :       CPASSERT(ASSOCIATED(cell))
     267             : 
     268         458 :       metric = 0.0_dp
     269         458 :       CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
     270             : 
     271         458 :       weights(1) = METRIC(1, 1) - METRIC(1, 2) - METRIC(1, 3)
     272         458 :       weights(2) = METRIC(2, 2) - METRIC(1, 2) - METRIC(2, 3)
     273         458 :       weights(3) = METRIC(3, 3) - METRIC(1, 3) - METRIC(2, 3)
     274         458 :       weights(4) = METRIC(1, 2)
     275         458 :       weights(5) = METRIC(1, 3)
     276         458 :       weights(6) = METRIC(2, 3)
     277             : 
     278         458 :    END SUBROUTINE initialize_weights
     279             : 
     280             : ! **************************************************************************************************
     281             : !> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
     282             : !>        can deal with serial para_envs.
     283             : !> \param weights ...
     284             : !> \param zij ...
     285             : !> \param vectors ...
     286             : !> \param para_env ...
     287             : !> \param max_iter ...
     288             : !> \param eps_localization ...
     289             : !> \param sweeps ...
     290             : !> \param out_each ...
     291             : !> \param target_time ...
     292             : !> \param start_time ...
     293             : !> \param restricted ...
     294             : !> \par History
     295             : !> \author Joost VandeVondele (02.2010)
     296             : ! **************************************************************************************************
     297         388 :    SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
     298             :                                eps_localization, sweeps, out_each, target_time, start_time, restricted)
     299             : 
     300             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     301             :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
     302             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     303             :       INTEGER, INTENT(IN)                                :: max_iter
     304             :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
     305             :       INTEGER                                            :: sweeps
     306             :       INTEGER, INTENT(IN)                                :: out_each
     307             :       REAL(dp)                                           :: target_time, start_time
     308             :       INTEGER                                            :: restricted
     309             : 
     310         388 :       IF (para_env%num_pe == 1) THEN
     311             :          CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
     312           0 :                                       sweeps, out_each, restricted=restricted)
     313             :       ELSE
     314             :          CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
     315         388 :                               sweeps, out_each, target_time, start_time, restricted=restricted)
     316             :       END IF
     317             : 
     318         388 :    END SUBROUTINE jacobi_rotations
     319             : 
     320             : ! **************************************************************************************************
     321             : !> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
     322             : !>        while the routine below works in parallel, it is too slow to be useful
     323             : !> \param weights ...
     324             : !> \param zij ...
     325             : !> \param vectors ...
     326             : !> \param max_iter ...
     327             : !> \param eps_localization ...
     328             : !> \param sweeps ...
     329             : !> \param out_each ...
     330             : !> \param restricted ...
     331             : ! **************************************************************************************************
     332           0 :    SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
     333             :                                       out_each, restricted)
     334             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     335             :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
     336             :       INTEGER, INTENT(IN)                                :: max_iter
     337             :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
     338             :       INTEGER                                            :: sweeps
     339             :       INTEGER, INTENT(IN)                                :: out_each
     340             :       INTEGER                                            :: restricted
     341             : 
     342             :       CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial'
     343             : 
     344           0 :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
     345             :       INTEGER                                            :: dim2, handle, idim, istate, jstate, &
     346             :                                                             nstate, unit_nr
     347             :       REAL(KIND=dp)                                      :: ct, st, t1, t2, theta, tolerance
     348             :       TYPE(cp_cfm_type)                                  :: c_rmat
     349             :       TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:)       :: c_zij
     350             :       TYPE(cp_fm_type)                                   :: rmat
     351             : 
     352           0 :       CALL timeset(routineN, handle)
     353             : 
     354           0 :       dim2 = SIZE(zij, 2)
     355           0 :       ALLOCATE (c_zij(dim2))
     356             :       NULLIFY (mii, mij, mjj)
     357           0 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
     358             : 
     359           0 :       CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
     360           0 :       CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
     361             : 
     362           0 :       CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
     363           0 :       CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
     364           0 :       DO idim = 1, dim2
     365           0 :          CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
     366             :          c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
     367           0 :                                         zij(2, idim)%local_data, dp)
     368             :       END DO
     369             : 
     370           0 :       CALL cp_fm_get_info(rmat, nrow_global=nstate)
     371           0 :       tolerance = 1.0e10_dp
     372             : 
     373           0 :       sweeps = 0
     374           0 :       unit_nr = -1
     375           0 :       IF (rmat%matrix_struct%para_env%is_source()) THEN
     376           0 :          unit_nr = cp_logger_get_default_unit_nr()
     377           0 :          WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
     378             :       END IF
     379             : 
     380           0 :       IF (restricted > 0) THEN
     381           0 :          unit_nr = cp_logger_get_default_unit_nr()
     382           0 :          WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
     383           0 :          nstate = nstate - restricted
     384             :       END IF
     385             : 
     386             :       ! do jacobi sweeps until converged
     387           0 :       DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
     388           0 :          sweeps = sweeps + 1
     389           0 :          t1 = m_walltime()
     390             : 
     391           0 :          DO istate = 1, nstate
     392           0 :             DO jstate = istate + 1, nstate
     393           0 :                DO idim = 1, dim2
     394           0 :                   CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
     395           0 :                   CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
     396           0 :                   CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
     397             :                END DO
     398           0 :                CALL get_angle(mii, mjj, mij, weights, theta)
     399           0 :                st = SIN(theta)
     400           0 :                ct = COS(theta)
     401           0 :                CALL rotate_zij(istate, jstate, st, ct, c_zij)
     402             : 
     403           0 :                CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
     404             :             END DO
     405             :          END DO
     406             : 
     407           0 :          CALL check_tolerance(c_zij, weights, tolerance)
     408             : 
     409           0 :          t2 = m_walltime()
     410           0 :          IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
     411             :             WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
     412           0 :                "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
     413           0 :             CALL m_flush(unit_nr)
     414             :          END IF
     415             : 
     416             :       END DO
     417             : 
     418           0 :       DO idim = 1, dim2
     419           0 :          zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
     420           0 :          zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
     421           0 :          CALL cp_cfm_release(c_zij(idim))
     422             :       END DO
     423           0 :       DEALLOCATE (c_zij)
     424           0 :       DEALLOCATE (mii, mij, mjj)
     425           0 :       rmat%local_data = REAL(c_rmat%local_data, dp)
     426           0 :       CALL cp_cfm_release(c_rmat)
     427           0 :       CALL rotate_orbitals(rmat, vectors)
     428           0 :       CALL cp_fm_release(rmat)
     429             : 
     430           0 :       CALL timestop(handle)
     431             : 
     432           0 :    END SUBROUTINE jacobi_rotations_serial
     433             : ! **************************************************************************************************
     434             : !> \brief very similar to jacobi_rotations_serial with some extra output options
     435             : !> \param weights ...
     436             : !> \param c_zij ...
     437             : !> \param max_iter ...
     438             : !> \param c_rmat ...
     439             : !> \param eps_localization ...
     440             : !> \param tol_out ...
     441             : !> \param jsweeps ...
     442             : !> \param out_each ...
     443             : !> \param c_zij_out ...
     444             : !> \param grad_final ...
     445             : ! **************************************************************************************************
     446           0 :    SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
     447           0 :                                         tol_out, jsweeps, out_each, c_zij_out, grad_final)
     448             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     449             :       TYPE(cp_cfm_type), INTENT(IN)                      :: c_zij(:)
     450             :       INTEGER, INTENT(IN)                                :: max_iter
     451             :       TYPE(cp_cfm_type), INTENT(IN)                      :: c_rmat
     452             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: eps_localization
     453             :       REAL(KIND=dp), INTENT(OUT), OPTIONAL               :: tol_out
     454             :       INTEGER, INTENT(OUT), OPTIONAL                     :: jsweeps
     455             :       INTEGER, INTENT(IN), OPTIONAL                      :: out_each
     456             :       TYPE(cp_cfm_type), INTENT(IN), OPTIONAL            :: c_zij_out(:)
     457             :       TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER   :: grad_final
     458             : 
     459             :       CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial_1'
     460             : 
     461             :       COMPLEX(KIND=dp)                                   :: mzii
     462             :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
     463             :       INTEGER                                            :: dim2, handle, idim, istate, jstate, &
     464             :                                                             nstate, sweeps, unit_nr
     465             :       REAL(KIND=dp)                                      :: alpha, avg_spread_ii, ct, spread_ii, st, &
     466             :                                                             sum_spread_ii, t1, t2, theta, tolerance
     467             :       TYPE(cp_cfm_type)                                  :: c_rmat_local
     468             :       TYPE(cp_cfm_type), ALLOCATABLE                     :: c_zij_local(:)
     469             : 
     470           0 :       CALL timeset(routineN, handle)
     471             : 
     472           0 :       dim2 = SIZE(c_zij)
     473             :       NULLIFY (mii, mij, mjj)
     474           0 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
     475             : 
     476           0 :       ALLOCATE (c_zij_local(dim2))
     477           0 :       CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
     478           0 :       CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
     479           0 :       DO idim = 1, dim2
     480           0 :          CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
     481           0 :          c_zij_local(idim)%local_data = c_zij(idim)%local_data
     482             :       END DO
     483             : 
     484           0 :       CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
     485           0 :       tolerance = 1.0e10_dp
     486             : 
     487           0 :       IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
     488             : 
     489           0 :       sweeps = 0
     490           0 :       IF (PRESENT(out_each)) THEN
     491           0 :          unit_nr = -1
     492           0 :          IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
     493           0 :             unit_nr = cp_logger_get_default_unit_nr()
     494             :          END IF
     495           0 :          alpha = 0.0_dp
     496           0 :          DO idim = 1, dim2
     497           0 :             alpha = alpha + weights(idim)
     498             :          END DO
     499             :       END IF
     500             : 
     501             :       ! do jacobi sweeps until converged
     502           0 :       DO WHILE (sweeps < max_iter)
     503           0 :          sweeps = sweeps + 1
     504           0 :          IF (PRESENT(eps_localization)) THEN
     505           0 :             IF (tolerance < eps_localization) EXIT
     506             :          END IF
     507           0 :          IF (PRESENT(out_each)) t1 = m_walltime()
     508             : 
     509           0 :          DO istate = 1, nstate
     510           0 :             DO jstate = istate + 1, nstate
     511           0 :                DO idim = 1, dim2
     512           0 :                   CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
     513           0 :                   CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
     514           0 :                   CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
     515             :                END DO
     516           0 :                CALL get_angle(mii, mjj, mij, weights, theta)
     517           0 :                st = SIN(theta)
     518           0 :                ct = COS(theta)
     519           0 :                CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
     520             : 
     521           0 :                CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
     522             :             END DO
     523             :          END DO
     524             : 
     525           0 :          IF (PRESENT(grad_final)) THEN
     526           0 :             CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
     527             :          ELSE
     528           0 :             CALL check_tolerance(c_zij_local, weights, tolerance)
     529             :          END IF
     530           0 :          IF (PRESENT(tol_out)) tol_out = tolerance
     531             : 
     532           0 :          IF (PRESENT(out_each)) THEN
     533           0 :             t2 = m_walltime()
     534           0 :             IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
     535           0 :                sum_spread_ii = 0.0_dp
     536           0 :                DO istate = 1, nstate
     537             :                   spread_ii = 0.0_dp
     538           0 :                   DO idim = 1, dim2
     539           0 :                      CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
     540             :                      spread_ii = spread_ii + weights(idim)* &
     541           0 :                                  ABS(mzii)**2/twopi/twopi
     542             :                   END DO
     543           0 :                   sum_spread_ii = sum_spread_ii + spread_ii
     544             :                END DO
     545           0 :                sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
     546           0 :                avg_spread_ii = sum_spread_ii/nstate
     547             :                WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
     548           0 :                   "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
     549             :                WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
     550           0 :                   sweeps, avg_spread_ii, tolerance, t2 - t1
     551           0 :                CALL m_flush(unit_nr)
     552             :             END IF
     553           0 :             IF (PRESENT(jsweeps)) jsweeps = sweeps
     554             :          END IF
     555             : 
     556             :       END DO
     557             : 
     558           0 :       IF (PRESENT(c_zij_out)) THEN
     559           0 :          DO idim = 1, dim2
     560           0 :             CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
     561             :          END DO
     562             :       END IF
     563           0 :       CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
     564             : 
     565           0 :       DEALLOCATE (mii, mij, mjj)
     566           0 :       DO idim = 1, dim2
     567           0 :          CALL cp_cfm_release(c_zij_local(idim))
     568             :       END DO
     569           0 :       DEALLOCATE (c_zij_local)
     570           0 :       CALL cp_cfm_release(c_rmat_local)
     571             : 
     572           0 :       CALL timestop(handle)
     573             : 
     574           0 :    END SUBROUTINE jacobi_rotations_serial_1
     575             : ! **************************************************************************************************
     576             : !> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
     577             : !>        for partially occupied wannier functions
     578             : !> \param para_env ...
     579             : !> \param weights ...
     580             : !> \param zij ...
     581             : !> \param vectors ...
     582             : !> \param max_iter ...
     583             : !> \param eps_localization ...
     584             : !> \param iter ...
     585             : !> \param out_each ...
     586             : !> \param nextra ...
     587             : !> \param do_cg ...
     588             : !> \param nmo ...
     589             : !> \param vectors_2 ...
     590             : !> \param mos_guess ...
     591             : ! **************************************************************************************************
     592           2 :    SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
     593             :                                iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
     594             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     595             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     596             :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
     597             :       INTEGER, INTENT(IN)                                :: max_iter
     598             :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
     599             :       INTEGER                                            :: iter
     600             :       INTEGER, INTENT(IN)                                :: out_each, nextra
     601             :       LOGICAL, INTENT(IN)                                :: do_cg
     602             :       INTEGER, INTENT(IN), OPTIONAL                      :: nmo
     603             :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: vectors_2, mos_guess
     604             : 
     605             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'jacobi_cg_edf_ls'
     606             :       COMPLEX(KIND=dp), PARAMETER                        :: cone = (1.0_dp, 0.0_dp), &
     607             :                                                             czero = (0.0_dp, 0.0_dp)
     608             :       REAL(KIND=dp), PARAMETER                           :: gold_sec = 0.3819_dp
     609             : 
     610             :       COMPLEX(KIND=dp)                                   :: cnorm2_Gct, cnorm2_Gct_cross, mzii
     611           2 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :)     :: tmp_cmat
     612           2 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: arr_zii
     613           2 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: matrix_zii
     614             :       INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
     615             :          lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
     616             :       INTEGER, DIMENSION(1)                              :: iloc
     617             :       LOGICAL                                            :: do_cinit_mo, do_cinit_random, &
     618             :                                                             do_U_guess_mo, new_direction
     619             :       REAL(KIND=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_Gct, &
     620             :          norm2_Gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
     621           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: sum_spread
     622           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: tmp_mat, tmp_mat_1
     623             :       REAL(KIND=dp), DIMENSION(50)                       :: energy, pos
     624           2 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tmp_arr
     625             :       TYPE(cp_blacs_env_type), POINTER                   :: context
     626             :       TYPE(cp_cfm_type)                                  :: c_tilde, ctrans_lambda, Gct_old, &
     627             :                                                             grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
     628             :                                                             tmp_cfm_2, U, UL, V, VL, zdiag
     629           2 :       TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:)       :: c_zij, zij_0
     630             :       TYPE(cp_fm_struct_type), POINTER                   :: tmp_fm_struct
     631             :       TYPE(cp_fm_type)                                   :: id_nextra, matrix_U, matrix_V, &
     632             :                                                             matrix_V_all, rmat, tmp_fm, vectors_all
     633             : 
     634           2 :       CALL timeset(routineN, handle)
     635             : 
     636           2 :       dim2 = SIZE(zij, 2)
     637           2 :       NULLIFY (context)
     638           2 :       NULLIFY (matrix_zii, arr_zii)
     639           2 :       NULLIFY (tmp_fm_struct)
     640           2 :       NULLIFY (tmp_arr)
     641             : 
     642          12 :       ALLOCATE (c_zij(dim2))
     643             : 
     644           2 :       CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
     645             : 
     646           6 :       ALLOCATE (sum_spread(nstate))
     647           8 :       ALLOCATE (matrix_zii(nstate, dim2))
     648         266 :       matrix_zii = czero
     649          88 :       sum_spread = 0.0_dp
     650             : 
     651             :       alpha = 0.0_dp
     652           8 :       DO idim = 1, dim2
     653           6 :          alpha = alpha + weights(idim)
     654           6 :          CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
     655             :          c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
     656        5813 :                                         zij(2, idim)%local_data, dp)
     657             :       END DO
     658             : 
     659          10 :       ALLOCATE (zij_0(dim2))
     660             : 
     661           2 :       CALL cp_cfm_create(U, zij(1, 1)%matrix_struct)
     662           2 :       CALL cp_fm_create(matrix_U, zij(1, 1)%matrix_struct)
     663             : 
     664           2 :       CALL cp_cfm_set_all(U, czero, cone)
     665           2 :       CALL cp_fm_set_all(matrix_U, 0.0_dp, 1.0_dp)
     666             : 
     667           2 :       CALL cp_fm_get_info(vectors, nrow_global=nao)
     668           2 :       IF (nextra > 0) THEN
     669           2 :          IF (PRESENT(mos_guess)) THEN
     670           2 :             do_cinit_random = .FALSE.
     671           2 :             do_cinit_mo = .TRUE.
     672           2 :             CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
     673             :          ELSE
     674           0 :             do_cinit_random = .TRUE.
     675           0 :             do_cinit_mo = .FALSE.
     676           0 :             ndummy = nstate
     677             :          END IF
     678             : 
     679             :          IF (do_cinit_random) THEN
     680             :             icinit = 1
     681             :             do_U_guess_mo = .FALSE.
     682             :          ELSEIF (do_cinit_mo) THEN
     683           2 :             icinit = 2
     684           2 :             do_U_guess_mo = .TRUE.
     685             :          END IF
     686             : 
     687           2 :          nocc = nstate - nextra
     688           2 :          northo = nmo - nocc
     689           2 :          norextra = nmo - nstate
     690           2 :          CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
     691             : 
     692           8 :          ALLOCATE (tmp_cmat(nstate, nstate))
     693             :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
     694           2 :                                   para_env=para_env, context=context)
     695           8 :          DO idim = 1, dim2
     696           6 :             CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
     697           6 :             CALL cp_cfm_set_all(zij_0(idim), czero, cone)
     698           6 :             CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
     699           8 :             CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
     700             :          END DO
     701           2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     702           2 :          DEALLOCATE (tmp_cmat)
     703             : 
     704             :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
     705           2 :                                   para_env=para_env, context=context)
     706           2 :          CALL cp_cfm_create(V, tmp_fm_struct)
     707           2 :          CALL cp_fm_create(matrix_V, tmp_fm_struct)
     708           2 :          CALL cp_cfm_create(zdiag, tmp_fm_struct)
     709           2 :          CALL cp_fm_create(rmat, tmp_fm_struct)
     710           2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     711           2 :          CALL cp_cfm_set_all(V, czero, cone)
     712           2 :          CALL cp_fm_set_all(matrix_V, 0.0_dp, 1.0_dp)
     713             : 
     714             :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
     715           2 :                                   para_env=para_env, context=context)
     716           2 :          CALL cp_fm_create(matrix_V_all, tmp_fm_struct)
     717           2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     718           2 :          CALL cp_fm_set_all(matrix_V_all, 0._dp, 1._dp)
     719             : 
     720           6 :          ALLOCATE (arr_zii(nstate))
     721             : 
     722             :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
     723           2 :                                   para_env=para_env, context=context)
     724           2 :          CALL cp_cfm_create(c_tilde, tmp_fm_struct)
     725           2 :          CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
     726           2 :          CALL cp_cfm_create(Gct_old, tmp_fm_struct)
     727           2 :          CALL cp_cfm_create(skc, tmp_fm_struct)
     728           2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     729           2 :          CALL cp_cfm_set_all(c_tilde, czero)
     730           2 :          CALL cp_cfm_set_all(Gct_old, czero)
     731           2 :          CALL cp_cfm_set_all(skc, czero)
     732             : 
     733             :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
     734           2 :                                   para_env=para_env, context=context)
     735           2 :          CALL cp_cfm_create(VL, tmp_fm_struct)
     736           2 :          CALL cp_cfm_set_all(VL, czero)
     737           2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     738             : 
     739             :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
     740           2 :                                   para_env=para_env, context=context)
     741           2 :          CALL cp_fm_create(id_nextra, tmp_fm_struct)
     742           2 :          CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
     743           2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     744           2 :          CALL cp_cfm_set_all(ctrans_lambda, czero)
     745           2 :          CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
     746             : 
     747             :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
     748           2 :                                   para_env=para_env, context=context)
     749           2 :          CALL cp_cfm_create(UL, tmp_fm_struct)
     750           2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     751           2 :          CALL cp_cfm_set_all(UL, czero)
     752             : 
     753             :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
     754           2 :                                   para_env=para_env, context=context)
     755           2 :          CALL cp_fm_create(vectors_all, tmp_fm_struct)
     756           2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     757           8 :          ALLOCATE (tmp_mat(nao, nstate))
     758           2 :          CALL cp_fm_get_submatrix(vectors, tmp_mat)
     759           2 :          CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
     760           2 :          DEALLOCATE (tmp_mat)
     761           8 :          ALLOCATE (tmp_mat(nao, norextra))
     762           2 :          CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
     763           2 :          CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
     764           2 :          DEALLOCATE (tmp_mat)
     765             : 
     766             :          ! initialize c_tilde
     767          14 :          SELECT CASE (icinit)
     768             :          CASE (1) ! random coefficients
     769             :             !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
     770           0 :             CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
     771           0 :             CALL cp_fm_init_random(tmp_fm, nextra)
     772           0 :             CALL ortho_vectors(tmp_fm)
     773           0 :             c_tilde%local_data = tmp_fm%local_data
     774           0 :             CALL cp_fm_release(tmp_fm)
     775           0 :             ALLOCATE (tmp_cmat(northo, nextra))
     776           0 :             CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
     777           0 :             CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
     778           0 :             DEALLOCATE (tmp_cmat)
     779             :          CASE (2) ! MO based coeffs
     780           2 :             CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_V_all)
     781           6 :             ALLOCATE (tmp_arr(nmo))
     782           8 :             ALLOCATE (tmp_mat(nmo, ndummy))
     783           8 :             ALLOCATE (tmp_mat_1(nmo, nstate))
     784             :             ! normalize matrix_V_all
     785           2 :             CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat)
     786          88 :             DO istate = 1, ndummy
     787        4558 :                tmp_arr(:) = tmp_mat(:, istate)
     788        4558 :                norm = norm2(tmp_arr)
     789        4558 :                tmp_arr(:) = tmp_arr(:)/norm
     790        4560 :                tmp_mat(:, istate) = tmp_arr(:)
     791             :             END DO
     792           2 :             CALL cp_fm_set_submatrix(matrix_V_all, tmp_mat)
     793           2 :             CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat_1, 1, 1, nmo, nstate)
     794           2 :             CALL cp_fm_set_submatrix(matrix_V, tmp_mat_1)
     795           2 :             DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
     796           2 :             CALL cp_fm_to_cfm(msourcer=matrix_V, mtarget=V)
     797           8 :             ALLOCATE (tmp_mat(northo, ndummy))
     798           8 :             ALLOCATE (tmp_mat_1(northo, nextra))
     799           2 :             CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat, nocc + 1, 1, northo, ndummy)
     800           6 :             ALLOCATE (tmp_arr(ndummy))
     801          88 :             tmp_arr = 0.0_dp
     802          88 :             DO istate = 1, ndummy
     803        1034 :                tmp_arr(istate) = norm2(tmp_mat(:, istate))
     804             :             END DO
     805             :             ! find edfs
     806           6 :             DO istate = 1, nextra
     807         180 :                iloc = MAXLOC(tmp_arr)
     808          48 :                tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
     809           6 :                tmp_arr(iloc(1)) = 0.0_dp
     810             :             END DO
     811             : 
     812           2 :             DEALLOCATE (tmp_arr, tmp_mat)
     813             : 
     814             :             CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
     815           2 :                                      para_env=para_env, context=context)
     816           2 :             CALL cp_fm_create(tmp_fm, tmp_fm_struct)
     817           2 :             CALL cp_fm_struct_release(tmp_fm_struct)
     818           2 :             CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
     819           2 :             DEALLOCATE (tmp_mat_1)
     820           2 :             CALL ortho_vectors(tmp_fm)
     821           2 :             CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
     822           2 :             CALL cp_fm_release(tmp_fm)
     823             :             ! initialize U
     824           2 :             IF (do_U_guess_mo) THEN
     825           8 :                ALLOCATE (tmp_cmat(nocc, nstate))
     826           2 :                CALL cp_cfm_get_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
     827           2 :                CALL cp_cfm_set_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
     828           2 :                DEALLOCATE (tmp_cmat)
     829           8 :                ALLOCATE (tmp_cmat(northo, nstate))
     830           2 :                CALL cp_cfm_get_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
     831           2 :                CALL cp_cfm_set_submatrix(VL, tmp_cmat, 1, 1, northo, nstate)
     832           2 :                DEALLOCATE (tmp_cmat)
     833           2 :                CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, VL, czero, UL)
     834           8 :                ALLOCATE (tmp_cmat(nextra, nstate))
     835           2 :                CALL cp_cfm_get_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
     836           2 :                CALL cp_cfm_set_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
     837           2 :                DEALLOCATE (tmp_cmat)
     838           2 :                CALL cp_fm_create(tmp_fm, U%matrix_struct)
     839        1937 :                tmp_fm%local_data = REAL(U%local_data, KIND=dp)
     840           2 :                CALL ortho_vectors(tmp_fm)
     841           2 :                CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=U)
     842           2 :                CALL cp_fm_release(tmp_fm)
     843           4 :                CALL cp_cfm_to_fm(U, matrix_U)
     844             :             END IF
     845             :             ! reevaluate V
     846           8 :             ALLOCATE (tmp_cmat(nocc, nstate))
     847           2 :             CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
     848           2 :             CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
     849           2 :             DEALLOCATE (tmp_cmat)
     850           8 :             ALLOCATE (tmp_cmat(nextra, nstate))
     851           2 :             CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
     852           2 :             CALL cp_cfm_set_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
     853           2 :             DEALLOCATE (tmp_cmat)
     854           2 :             CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
     855           8 :             ALLOCATE (tmp_cmat(northo, nstate))
     856           2 :             CALL cp_cfm_get_submatrix(VL, tmp_cmat)
     857           2 :             CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
     858           6 :             DEALLOCATE (tmp_cmat)
     859             :          END SELECT
     860             :       ELSE
     861           0 :          DO idim = 1, dim2
     862           0 :             CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
     863           0 :             CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
     864             :          END DO
     865           0 :          CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
     866           0 :          CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
     867             :       END IF
     868             : 
     869           2 :       unit_nr = -1
     870           2 :       IF (rmat%matrix_struct%para_env%is_source()) THEN
     871           1 :          unit_nr = cp_logger_get_default_unit_nr()
     872           1 :          WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
     873             :       END IF
     874             : 
     875           2 :       norm2_old = 1.0E30_dp
     876           2 :       ds_min = 1.0_dp
     877           2 :       new_direction = .TRUE.
     878           2 :       iter = 0
     879           2 :       line_searches = 0
     880           2 :       line_search_count = 0
     881           2 :       tol = 1.0E+20_dp
     882           2 :       mintol = 1.0E+10_dp
     883           2 :       miniter = 0
     884             : 
     885             :       !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
     886             :       !                            do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
     887             : 
     888             :       ! do conjugate gradient until converged
     889          34 :       DO WHILE (iter < max_iter)
     890          34 :          iter = iter + 1
     891             :          !WRITE(*,*) 'iter = ', iter
     892          34 :          t1 = m_walltime()
     893             : 
     894          34 :          IF (iter > 1) THEN
     895             :             ! comput U
     896          32 :             CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
     897          32 :             CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
     898          32 :             IF (para_env%num_pe == 1) THEN
     899           0 :                CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
     900             :             ELSE
     901          32 :                CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
     902             :             END IF
     903          32 :             CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, U, tmp_cfm_2, czero, tmp_cfm)
     904          32 :             CALL cp_cfm_to_cfm(tmp_cfm, U)
     905          32 :             CALL cp_cfm_release(tmp_cfm)
     906          32 :             CALL cp_cfm_release(tmp_cfm_2)
     907             :          END IF
     908             : 
     909          34 :          IF (nextra > 0) THEN
     910         136 :             ALLOCATE (tmp_cmat(nextra, nstate))
     911          34 :             CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
     912          34 :             CALL cp_cfm_set_submatrix(UL, tmp_cmat)
     913          34 :             DEALLOCATE (tmp_cmat)
     914          34 :             IF (iter > 1) THEN
     915             :                ! orthonormalize c_tilde
     916          32 :                CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
     917         448 :                tmp_fm%local_data = REAL(c_tilde%local_data, KIND=dp)
     918          32 :                CALL ortho_vectors(tmp_fm)
     919          32 :                CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
     920          32 :                CALL cp_fm_release(tmp_fm)
     921             : 
     922         128 :                ALLOCATE (tmp_cmat(nocc, nstate))
     923          32 :                CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
     924          32 :                CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
     925          32 :                DEALLOCATE (tmp_cmat)
     926          32 :                CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
     927         128 :                ALLOCATE (tmp_cmat(northo, nstate))
     928          32 :                CALL cp_cfm_get_submatrix(VL, tmp_cmat)
     929          32 :                CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
     930          64 :                DEALLOCATE (tmp_cmat)
     931             :             END IF
     932             : 
     933             :             ! reset if new_direction
     934          34 :             IF (new_direction .AND. MOD(line_searches, 20) .EQ. 5) THEN
     935           0 :                CALL cp_cfm_set_all(skc, czero)
     936           0 :                CALL cp_cfm_set_all(Gct_old, czero)
     937           0 :                norm2_old = 1.0E30_dp
     938             :             END IF
     939             : 
     940          34 :             CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
     941          34 :             CALL cp_cfm_to_cfm(V, tmp_cfm)
     942          34 :             CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
     943          68 :             ndummy = nmo
     944             :          ELSE
     945           0 :             CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
     946           0 :             CALL cp_cfm_to_cfm(U, tmp_cfm)
     947           0 :             CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
     948           0 :             ndummy = nstate
     949             :          END IF
     950             :          ! update z_ij
     951         136 :          DO idim = 1, dim2
     952             :             ! 'tmp_cfm_1 = zij_0*tmp_cfm'
     953             :             CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
     954         102 :                                tmp_cfm, czero, tmp_cfm_1)
     955             :             ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
     956             :             CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
     957         136 :                                czero, c_zij(idim))
     958             :          END DO
     959          34 :          CALL cp_cfm_release(tmp_cfm)
     960          34 :          CALL cp_cfm_release(tmp_cfm_1)
     961             :          ! compute spread
     962        1496 :          DO istate = 1, nstate
     963        1462 :             spread_ii = 0.0_dp
     964        5848 :             DO idim = 1, dim2
     965        4386 :                CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
     966             :                spread_ii = spread_ii + weights(idim)* &
     967        4386 :                            ABS(mzii)**2/twopi/twopi
     968        5848 :                matrix_zii(istate, idim) = mzii
     969             :             END DO
     970             :             !WRITE(*,*) 'spread_ii', spread_ii
     971        1496 :             sum_spread(istate) = spread_ii
     972             :          END DO
     973          34 :          CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
     974          34 :          spread_sum = accurate_sum(sum_spread)
     975             : 
     976          34 :          IF (nextra > 0) THEN
     977             :             ! update c_tilde
     978          34 :             CALL cp_cfm_set_all(zdiag, czero)
     979          34 :             CALL cp_cfm_set_all(grad_ctilde, czero)
     980          34 :             CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
     981          34 :             CALL cp_cfm_set_all(tmp_cfm, czero)
     982          34 :             CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
     983          34 :             CALL cp_cfm_set_all(tmp_cfm_1, czero)
     984         136 :             ALLOCATE (tmp_cmat(northo, nstate))
     985         136 :             DO idim = 1, dim2
     986         102 :                weight = weights(idim)
     987        8874 :                arr_zii = matrix_zii(:, idim)
     988             :                ! tmp_cfm = zij_0*V
     989             :                CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
     990         102 :                                   zij_0(idim), V, czero, tmp_cfm)
     991             :                ! tmp_cfm = tmp_cfm*diag_zij_dagg
     992        4488 :                CALL cp_cfm_column_scale(tmp_cfm, CONJG(arr_zii))
     993             :                ! tmp_cfm_1 = tmp_cfm*U_dagg
     994             :                CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
     995         102 :                                   U, czero, tmp_cfm_1)
     996         102 :                CALL cp_cfm_scale(weight, tmp_cfm_1)
     997             :                ! zdiag = zdiag + tmp_cfm_1'
     998         102 :                CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
     999             : 
    1000             :                ! tmp_cfm = zij_0_dagg*V
    1001             :                CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
    1002         102 :                                   zij_0(idim), V, czero, tmp_cfm)
    1003             : 
    1004             :                ! tmp_cfm = tmp_cfm*diag_zij
    1005         102 :                CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
    1006             :                ! tmp_cfm_1 = tmp_cfm*U_dagg
    1007             :                CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
    1008         102 :                                   U, czero, tmp_cfm_1)
    1009         102 :                CALL cp_cfm_scale(weight, tmp_cfm_1)
    1010             :                ! zdiag = zdiag + tmp_cfm_1'
    1011         136 :                CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
    1012             :             END DO ! idim
    1013          34 :             CALL cp_cfm_release(tmp_cfm)
    1014          34 :             CALL cp_cfm_release(tmp_cfm_1)
    1015          34 :             DEALLOCATE (tmp_cmat)
    1016         136 :             ALLOCATE (tmp_cmat(northo, nextra))
    1017             :             CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
    1018          34 :                                       northo, nextra, .FALSE.)
    1019             :             ! 'grad_ctilde'
    1020          34 :             CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
    1021          34 :             DEALLOCATE (tmp_cmat)
    1022             :             ! ctrans_lambda = c_tilde_dagg*grad_ctilde
    1023          34 :             CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
    1024             :             !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
    1025             :             ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
    1026         102 :             CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
    1027             :          END IF ! nextra > 0
    1028             : 
    1029             :          ! tolerance
    1030          34 :          IF (nextra > 0) THEN
    1031             :             tolc = 0.0_dp
    1032          34 :             CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
    1033          34 :             CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
    1034          34 :             CALL cp_fm_maxabsval(tmp_fm, tolc)
    1035          34 :             CALL cp_fm_release(tmp_fm)
    1036             :             !WRITE(*,*) 'tolc = ', tolc
    1037          34 :             tol = tol + tolc
    1038             :          END IF
    1039             :          !WRITE(*,*) 'tol = ', tol
    1040             : 
    1041          36 :          IF (nextra > 0) THEN
    1042             :             !WRITE(*,*) 'new_direction: ', new_direction
    1043          34 :             IF (new_direction) THEN
    1044           6 :                line_searches = line_searches + 1
    1045           6 :                IF (mintol > tol) THEN
    1046           4 :                   mintol = tol
    1047           4 :                   miniter = iter
    1048             :                END IF
    1049             : 
    1050           6 :                IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
    1051           0 :                   sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
    1052           0 :                   avg_spread_ii = sum_spread_ii/nstate
    1053             :                   WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
    1054           0 :                      "Iteration", "Avg. Spread_ii", "Tolerance"
    1055             :                   WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
    1056           0 :                      iter, avg_spread_ii, tol
    1057           0 :                   CALL m_flush(unit_nr)
    1058             :                END IF
    1059           6 :                IF (tol < eps_localization) EXIT
    1060             : 
    1061           4 :                IF (do_cg) THEN
    1062             :                   cnorm2_Gct = czero
    1063             :                   cnorm2_Gct_cross = czero
    1064           4 :                   CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct_cross)
    1065           4 :                   norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
    1066          56 :                   Gct_old%local_data = grad_ctilde%local_data
    1067           4 :                   CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct)
    1068           4 :                   norm2_Gct = REAL(cnorm2_Gct, KIND=dp)
    1069             :                   ! compute beta_pr
    1070           4 :                   beta_pr = (norm2_Gct - norm2_Gct_cross)/norm2_old
    1071           4 :                   norm2_old = norm2_Gct
    1072           4 :                   beta = MAX(0.0_dp, beta_pr)
    1073             :                   !WRITE(*,*) 'beta = ', beta
    1074             :                   ! compute skc / ska = beta * skc / ska + grad_ctilde / G
    1075           4 :                   CALL cp_cfm_scale(beta, skc)
    1076           4 :                   CALL cp_cfm_scale_and_add(cone, skc, cone, Gct_old)
    1077           4 :                   CALL cp_cfm_trace(skc, Gct_old, cnorm2_Gct_cross)
    1078           4 :                   norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
    1079           4 :                   IF (norm2_Gct_cross .LE. 0.0_dp) THEN ! back to steepest ascent
    1080           0 :                      CALL cp_cfm_scale_and_add(czero, skc, cone, Gct_old)
    1081             :                   END IF
    1082             :                ELSE
    1083           0 :                   CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
    1084             :                END IF
    1085             :                line_search_count = 0
    1086             :             END IF
    1087             : 
    1088          32 :             line_search_count = line_search_count + 1
    1089             :             !WRITE(*,*) 'line_search_count = ', line_search_count
    1090          32 :             energy(line_search_count) = spread_sum
    1091             : 
    1092             :             ! gold line search
    1093          32 :             new_direction = .FALSE.
    1094          32 :             IF (line_search_count .EQ. 1) THEN
    1095           4 :                lsl = 1
    1096           4 :                lsr = 0
    1097           4 :                lsm = 1
    1098           4 :                pos(1) = 0.0_dp
    1099           4 :                pos(2) = ds_min/gold_sec
    1100           4 :                ds = pos(2)
    1101             :             ELSE
    1102          28 :                IF (line_search_count .EQ. 50) THEN
    1103           0 :                   IF (ABS(energy(line_search_count) - energy(line_search_count - 1)) < 1.0E-4_dp) THEN
    1104           0 :                      CPWARN("Line search failed to converge properly")
    1105           0 :                      ds_min = 0.1_dp
    1106           0 :                      new_direction = .TRUE.
    1107             :                      ds = pos(line_search_count)
    1108           0 :                      line_search_count = 0
    1109             :                   ELSE
    1110           0 :                      CPABORT("No. of line searches exceeds 50")
    1111             :                   END IF
    1112             :                ELSE
    1113          28 :                   IF (lsr .EQ. 0) THEN
    1114          28 :                      IF (energy(line_search_count - 1) .GT. energy(line_search_count)) THEN
    1115           0 :                         lsr = line_search_count
    1116           0 :                         pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
    1117             :                      ELSE
    1118          28 :                         lsl = lsm
    1119          28 :                         lsm = line_search_count
    1120          28 :                         pos(line_search_count + 1) = pos(line_search_count)/gold_sec
    1121             :                      END IF
    1122             :                   ELSE
    1123           0 :                      IF (pos(line_search_count) .LT. pos(lsm)) THEN
    1124           0 :                         IF (energy(line_search_count) .GT. energy(lsm)) THEN
    1125             :                            lsr = lsm
    1126             :                            lsm = line_search_count
    1127             :                         ELSE
    1128           0 :                            lsl = line_search_count
    1129             :                         END IF
    1130             :                      ELSE
    1131           0 :                         IF (energy(line_search_count) .GT. energy(lsm)) THEN
    1132             :                            lsl = lsm
    1133             :                            lsm = line_search_count
    1134             :                         ELSE
    1135           0 :                            lsr = line_search_count
    1136             :                         END IF
    1137             :                      END IF
    1138           0 :                      IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
    1139           0 :                         pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
    1140             :                      ELSE
    1141           0 :                         pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
    1142             :                      END IF
    1143           0 :                      IF ((pos(lsr) - pos(lsl)) .LT. 1.0E-3_dp*pos(lsr)) THEN
    1144           0 :                         new_direction = .TRUE.
    1145             :                      END IF
    1146             :                   END IF ! lsr .eq. 0
    1147             :                END IF ! line_search_count .eq. 50
    1148             :                ! now go to the suggested point
    1149          28 :                ds = pos(line_search_count + 1) - pos(line_search_count)
    1150             :                !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
    1151          28 :                IF ((ABS(ds) < 1.0E-10_dp) .AND. (lsl == 1)) THEN
    1152           0 :                   new_direction = .TRUE.
    1153           0 :                   ds_min = 0.5_dp/alpha
    1154          28 :                ELSEIF (ABS(ds) > 10.0_dp) THEN
    1155           4 :                   new_direction = .TRUE.
    1156           4 :                   ds_min = 0.5_dp/alpha
    1157             :                ELSE
    1158             :                   ds_min = pos(line_search_count + 1)
    1159             :                END IF
    1160             :             END IF ! first step
    1161             :             ! 'c_tilde = c_tilde + d*skc'
    1162          32 :             CALL cp_cfm_scale(ds, skc)
    1163          32 :             CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
    1164             :          ELSE
    1165           0 :             IF (mintol > tol) THEN
    1166           0 :                mintol = tol
    1167           0 :                miniter = iter
    1168             :             END IF
    1169           0 :             IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
    1170           0 :                sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
    1171           0 :                avg_spread_ii = sum_spread_ii/nstate
    1172             :                WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
    1173           0 :                   "Iteration", "Avg. Spread_ii", "Tolerance"
    1174             :                WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
    1175           0 :                   iter, avg_spread_ii, tol
    1176           0 :                CALL m_flush(unit_nr)
    1177             :             END IF
    1178           0 :             IF (tol < eps_localization) EXIT
    1179             :          END IF ! nextra > 0
    1180             : 
    1181             :       END DO ! iteration
    1182             : 
    1183           2 :       IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
    1184           0 :          WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
    1185           0 :          WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
    1186           0 :          CALL m_flush(unit_nr)
    1187             :       END IF
    1188             : 
    1189           2 :       CALL cp_cfm_to_fm(U, matrix_U)
    1190             : 
    1191           2 :       IF (nextra > 0) THEN
    1192        2324 :          rmat%local_data = REAL(V%local_data, KIND=dp)
    1193           2 :          CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
    1194             : 
    1195           2 :          CALL cp_cfm_release(c_tilde)
    1196           2 :          CALL cp_cfm_release(grad_ctilde)
    1197           2 :          CALL cp_cfm_release(Gct_old)
    1198           2 :          CALL cp_cfm_release(skc)
    1199           2 :          CALL cp_cfm_release(UL)
    1200           2 :          CALL cp_cfm_release(zdiag)
    1201           2 :          CALL cp_cfm_release(ctrans_lambda)
    1202           2 :          CALL cp_fm_release(id_nextra)
    1203           2 :          CALL cp_fm_release(vectors_all)
    1204           2 :          CALL cp_cfm_release(V)
    1205           2 :          CALL cp_fm_release(matrix_V)
    1206           2 :          CALL cp_fm_release(matrix_V_all)
    1207           2 :          CALL cp_cfm_release(VL)
    1208           2 :          DEALLOCATE (arr_zii)
    1209             :       ELSE
    1210           0 :          rmat%local_data = matrix_U%local_data
    1211           0 :          CALL rotate_orbitals(rmat, vectors)
    1212             :       END IF
    1213           8 :       DO idim = 1, dim2
    1214           8 :          CALL cp_cfm_release(zij_0(idim))
    1215             :       END DO
    1216           2 :       DEALLOCATE (zij_0)
    1217             : 
    1218           8 :       DO idim = 1, dim2
    1219        5811 :          zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
    1220        5811 :          zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
    1221           8 :          CALL cp_cfm_release(c_zij(idim))
    1222             :       END DO
    1223           2 :       DEALLOCATE (c_zij)
    1224           2 :       CALL cp_fm_release(rmat)
    1225           2 :       CALL cp_cfm_release(U)
    1226           2 :       CALL cp_fm_release(matrix_U)
    1227           2 :       DEALLOCATE (matrix_zii, sum_spread)
    1228             : 
    1229           2 :       CALL timestop(handle)
    1230             : 
    1231          10 :    END SUBROUTINE jacobi_cg_edf_ls
    1232             : 
    1233             : ! **************************************************************************************************
    1234             : !> \brief ...
    1235             : !> \param vmatrix ...
    1236             : ! **************************************************************************************************
    1237         108 :    SUBROUTINE ortho_vectors(vmatrix)
    1238             : 
    1239             :       TYPE(cp_fm_type), INTENT(IN)                       :: vmatrix
    1240             : 
    1241             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'ortho_vectors'
    1242             : 
    1243             :       INTEGER                                            :: handle, n, ncol
    1244             :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
    1245             :       TYPE(cp_fm_type)                                   :: overlap_vv
    1246             : 
    1247          36 :       CALL timeset(routineN, handle)
    1248             : 
    1249          36 :       NULLIFY (fm_struct_tmp)
    1250             : 
    1251          36 :       CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
    1252             : 
    1253             :       CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
    1254             :                                para_env=vmatrix%matrix_struct%para_env, &
    1255          36 :                                context=vmatrix%matrix_struct%context)
    1256          36 :       CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
    1257          36 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1258             : 
    1259          36 :       CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
    1260          36 :       CALL cp_fm_cholesky_decompose(overlap_vv)
    1261          36 :       CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.TRUE.)
    1262             : 
    1263          36 :       CALL cp_fm_release(overlap_vv)
    1264             : 
    1265          36 :       CALL timestop(handle)
    1266             : 
    1267          36 :    END SUBROUTINE ortho_vectors
    1268             : 
    1269             : ! **************************************************************************************************
    1270             : !> \brief ...
    1271             : !> \param istate ...
    1272             : !> \param jstate ...
    1273             : !> \param st ...
    1274             : !> \param ct ...
    1275             : !> \param zij ...
    1276             : ! **************************************************************************************************
    1277           0 :    SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
    1278             :       INTEGER, INTENT(IN)                                :: istate, jstate
    1279             :       REAL(KIND=dp), INTENT(IN)                          :: st, ct
    1280             :       TYPE(cp_cfm_type)                                  :: zij(:)
    1281             : 
    1282             :       INTEGER                                            :: id
    1283             : 
    1284             : ! Locals
    1285             : 
    1286           0 :       DO id = 1, SIZE(zij, 1)
    1287           0 :          CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
    1288           0 :          CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
    1289             :       END DO
    1290             : 
    1291           0 :    END SUBROUTINE rotate_zij
    1292             : ! **************************************************************************************************
    1293             : !> \brief ...
    1294             : !> \param istate ...
    1295             : !> \param jstate ...
    1296             : !> \param st ...
    1297             : !> \param ct ...
    1298             : !> \param rmat ...
    1299             : ! **************************************************************************************************
    1300           0 :    SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
    1301             :       INTEGER, INTENT(IN)                                :: istate, jstate
    1302             :       REAL(KIND=dp), INTENT(IN)                          :: st, ct
    1303             :       TYPE(cp_cfm_type), INTENT(IN)                      :: rmat
    1304             : 
    1305           0 :       CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
    1306             : 
    1307           0 :    END SUBROUTINE rotate_rmat
    1308             : ! **************************************************************************************************
    1309             : !> \brief ...
    1310             : !> \param mii ...
    1311             : !> \param mjj ...
    1312             : !> \param mij ...
    1313             : !> \param weights ...
    1314             : !> \param theta ...
    1315             : !> \param grad_ij ...
    1316             : !> \param step ...
    1317             : ! **************************************************************************************************
    1318     2328754 :    SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
    1319             :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mjj(:), mij(:)
    1320             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1321             :       REAL(KIND=dp), INTENT(OUT)                         :: theta
    1322             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: grad_ij, step
    1323             : 
    1324             :       COMPLEX(KIND=dp)                                   :: z11, z12, z22
    1325             :       INTEGER                                            :: dim_m, idim
    1326             :       REAL(KIND=dp)                                      :: a12, b12, d2, ratio
    1327             : 
    1328     2328754 :       a12 = 0.0_dp
    1329     2328754 :       b12 = 0.0_dp
    1330     2328754 :       dim_m = SIZE(mii)
    1331     9421510 :       DO idim = 1, dim_m
    1332     7092756 :          z11 = mii(idim)
    1333     7092756 :          z22 = mjj(idim)
    1334     7092756 :          z12 = mij(idim)
    1335     7092756 :          a12 = a12 + weights(idim)*REAL(CONJG(z12)*(z11 - z22), KIND=dp)
    1336             :          b12 = b12 + weights(idim)*REAL((z12*CONJG(z12) - &
    1337     9421510 :                                          0.25_dp*(z11 - z22)*(CONJG(z11) - CONJG(z22))), KIND=dp)
    1338             :       END DO
    1339     2328754 :       IF (ABS(b12) > 1.e-10_dp) THEN
    1340     2328754 :          ratio = -a12/b12
    1341     2328754 :          theta = 0.25_dp*ATAN(ratio)
    1342           0 :       ELSEIF (ABS(b12) < 1.e-10_dp) THEN
    1343           0 :          b12 = 0.0_dp
    1344           0 :          theta = 0.0_dp
    1345             :       ELSE
    1346           0 :          theta = 0.25_dp*pi
    1347             :       END IF
    1348     2328754 :       IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
    1349             : ! Check second derivative info
    1350     2328754 :       d2 = a12*SIN(4._dp*theta) - b12*COS(4._dp*theta)
    1351     2328754 :       IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
    1352        3226 :          IF (theta > 0.0_dp) THEN ! make theta as small as possible
    1353        1666 :             theta = theta - 0.25_dp*pi
    1354             :          ELSE
    1355        1560 :             theta = theta + 0.25_dp*pi
    1356             :          END IF
    1357             :       END IF
    1358     2328754 :    END SUBROUTINE get_angle
    1359             : ! **************************************************************************************************
    1360             : !> \brief ...
    1361             : !> \param zij ...
    1362             : !> \param weights ...
    1363             : !> \param tolerance ...
    1364             : !> \param grad ...
    1365             : ! **************************************************************************************************
    1366           0 :    SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
    1367             :       TYPE(cp_cfm_type)                                  :: zij(:)
    1368             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1369             :       REAL(KIND=dp), INTENT(OUT)                         :: tolerance
    1370             :       TYPE(cp_fm_type), INTENT(OUT), OPTIONAL            :: grad
    1371             : 
    1372             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'check_tolerance'
    1373             : 
    1374             :       INTEGER                                            :: handle
    1375             :       TYPE(cp_fm_type)                                   :: force
    1376             : 
    1377           0 :       CALL timeset(routineN, handle)
    1378             : 
    1379             : ! compute gradient at t=0
    1380             : 
    1381           0 :       CALL cp_fm_create(force, zij(1)%matrix_struct)
    1382           0 :       CALL cp_fm_set_all(force, 0._dp)
    1383           0 :       CALL grad_at_0(zij, weights, force)
    1384           0 :       CALL cp_fm_maxabsval(force, tolerance)
    1385           0 :       IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
    1386           0 :       CALL cp_fm_release(force)
    1387             : 
    1388           0 :       CALL timestop(handle)
    1389             : 
    1390           0 :    END SUBROUTINE check_tolerance
    1391             : ! **************************************************************************************************
    1392             : !> \brief ...
    1393             : !> \param rmat ...
    1394             : !> \param vectors ...
    1395             : ! **************************************************************************************************
    1396        1048 :    SUBROUTINE rotate_orbitals(rmat, vectors)
    1397             :       TYPE(cp_fm_type), INTENT(IN)                       :: rmat, vectors
    1398             : 
    1399             :       INTEGER                                            :: k, n
    1400             :       TYPE(cp_fm_type)                                   :: wf
    1401             : 
    1402         524 :       CALL cp_fm_create(wf, vectors%matrix_struct)
    1403         524 :       CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
    1404         524 :       CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
    1405         524 :       CALL cp_fm_to_fm(wf, vectors)
    1406         524 :       CALL cp_fm_release(wf)
    1407         524 :    END SUBROUTINE rotate_orbitals
    1408             : ! **************************************************************************************************
    1409             : !> \brief ...
    1410             : !> \param rmat ...
    1411             : !> \param vec_all ...
    1412             : !> \param vectors ...
    1413             : ! **************************************************************************************************
    1414           6 :    SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
    1415             :       TYPE(cp_fm_type), INTENT(IN)                       :: rmat, vec_all, vectors
    1416             : 
    1417             :       INTEGER                                            :: k, l, n
    1418             :       TYPE(cp_fm_type)                                   :: wf
    1419             : 
    1420           2 :       CALL cp_fm_create(wf, vectors%matrix_struct)
    1421           2 :       CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
    1422           2 :       CALL cp_fm_get_info(rmat, ncol_global=l)
    1423             : 
    1424           2 :       CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
    1425           2 :       CALL cp_fm_to_fm(wf, vectors)
    1426           2 :       CALL cp_fm_release(wf)
    1427           2 :    END SUBROUTINE rotate_orbitals_edf
    1428             : ! **************************************************************************************************
    1429             : !> \brief ...
    1430             : !> \param diag ...
    1431             : !> \param weights ...
    1432             : !> \param matrix ...
    1433             : !> \param ndim ...
    1434             : ! **************************************************************************************************
    1435         208 :    SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
    1436             :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
    1437             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1438             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1439             :       INTEGER, INTENT(IN)                                :: ndim
    1440             : 
    1441             :       COMPLEX(KIND=dp)                                   :: zii, zjj
    1442             :       INTEGER                                            :: idim, istate, jstate, ncol_local, &
    1443             :                                                             nrow_global, nrow_local
    1444         208 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1445             :       REAL(KIND=dp)                                      :: gradsq_ij
    1446             : 
    1447             :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
    1448             :                           ncol_local=ncol_local, nrow_global=nrow_global, &
    1449         208 :                           row_indices=row_indices, col_indices=col_indices)
    1450             : 
    1451         624 :       DO istate = 1, nrow_local
    1452        2288 :          DO jstate = 1, ncol_local
    1453             : ! get real and imaginary parts
    1454        1664 :             gradsq_ij = 0.0_dp
    1455       11648 :             DO idim = 1, ndim
    1456        9984 :                zii = diag(row_indices(istate), idim)
    1457        9984 :                zjj = diag(col_indices(jstate), idim)
    1458             :                gradsq_ij = gradsq_ij + weights(idim)* &
    1459       11648 :                            4.0_dp*REAL((CONJG(zii)*zii + CONJG(zjj)*zjj), KIND=dp)
    1460             :             END DO
    1461        2080 :             matrix%local_data(istate, jstate) = gradsq_ij
    1462             :          END DO
    1463             :       END DO
    1464         208 :    END SUBROUTINE gradsq_at_0
    1465             : ! **************************************************************************************************
    1466             : !> \brief ...
    1467             : !> \param matrix_p ...
    1468             : !> \param weights ...
    1469             : !> \param matrix ...
    1470             : ! **************************************************************************************************
    1471           0 :    SUBROUTINE grad_at_0(matrix_p, weights, matrix)
    1472             :       TYPE(cp_cfm_type)                                  :: matrix_p(:)
    1473             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1474             :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1475             : 
    1476             :       COMPLEX(KIND=dp)                                   :: zii, zij, zjj
    1477           0 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
    1478             :       INTEGER                                            :: dim_m, idim, istate, jstate, ncol_local, &
    1479             :                                                             nrow_global, nrow_local
    1480           0 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1481             :       REAL(KIND=dp)                                      :: grad_ij
    1482             : 
    1483           0 :       NULLIFY (diag)
    1484             :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
    1485             :                           ncol_local=ncol_local, nrow_global=nrow_global, &
    1486           0 :                           row_indices=row_indices, col_indices=col_indices)
    1487           0 :       dim_m = SIZE(matrix_p, 1)
    1488           0 :       ALLOCATE (diag(nrow_global, dim_m))
    1489             : 
    1490           0 :       DO idim = 1, dim_m
    1491           0 :          DO istate = 1, nrow_global
    1492           0 :             CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
    1493             :          END DO
    1494             :       END DO
    1495             : 
    1496           0 :       DO istate = 1, nrow_local
    1497           0 :          DO jstate = 1, ncol_local
    1498             : ! get real and imaginary parts
    1499             :             grad_ij = 0.0_dp
    1500           0 :             DO idim = 1, dim_m
    1501           0 :                zii = diag(row_indices(istate), idim)
    1502           0 :                zjj = diag(col_indices(jstate), idim)
    1503           0 :                zij = matrix_p(idim)%local_data(istate, jstate)
    1504             :                grad_ij = grad_ij + weights(idim)* &
    1505           0 :                          REAL(4.0_dp*CONJG(zij)*(zjj - zii), dp)
    1506             :             END DO
    1507           0 :             matrix%local_data(istate, jstate) = grad_ij
    1508             :          END DO
    1509             :       END DO
    1510           0 :       DEALLOCATE (diag)
    1511           0 :    END SUBROUTINE grad_at_0
    1512             : 
    1513             : ! return energy and maximum gradient in the current point
    1514             : ! **************************************************************************************************
    1515             : !> \brief ...
    1516             : !> \param weights ...
    1517             : !> \param zij ...
    1518             : !> \param tolerance ...
    1519             : !> \param value ...
    1520             : ! **************************************************************************************************
    1521        4296 :    SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
    1522             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1523             :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :)
    1524             :       REAL(KIND=dp)                                      :: tolerance, value
    1525             : 
    1526             :       COMPLEX(KIND=dp)                                   :: kii, kij, kjj
    1527        4296 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
    1528             :       INTEGER                                            :: idim, istate, jstate, ncol_local, &
    1529             :                                                             nrow_global, nrow_local
    1530        4296 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1531             :       REAL(KIND=dp)                                      :: grad_ij, ra, rb
    1532             : 
    1533        4296 :       NULLIFY (diag)
    1534             :       CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
    1535             :                           ncol_local=ncol_local, nrow_global=nrow_global, &
    1536        4296 :                           row_indices=row_indices, col_indices=col_indices)
    1537       17184 :       ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
    1538        4296 :       value = 0.0_dp
    1539       17298 :       DO idim = 1, SIZE(zij, 2)
    1540      193614 :          DO istate = 1, nrow_global
    1541      176316 :             CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
    1542      176316 :             CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
    1543      176316 :             diag(istate, idim) = CMPLX(ra, rb, dp)
    1544      189318 :             value = value + weights(idim) - weights(idim)*ABS(diag(istate, idim))**2
    1545             :          END DO
    1546             :       END DO
    1547        4296 :       tolerance = 0.0_dp
    1548       33587 :       DO istate = 1, nrow_local
    1549      501896 :          DO jstate = 1, ncol_local
    1550     1874661 :             grad_ij = 0.0_dp
    1551     1874661 :             DO idim = 1, SIZE(zij, 2)
    1552     1406352 :                kii = diag(row_indices(istate), idim)
    1553     1406352 :                kjj = diag(col_indices(jstate), idim)
    1554     1406352 :                ra = zij(1, idim)%local_data(istate, jstate)
    1555     1406352 :                rb = zij(2, idim)%local_data(istate, jstate)
    1556     1406352 :                kij = CMPLX(ra, rb, dp)
    1557             :                grad_ij = grad_ij + weights(idim)* &
    1558     1874661 :                          REAL(4.0_dp*CONJG(kij)*(kjj - kii), dp)
    1559             :             END DO
    1560      497600 :             tolerance = MAX(ABS(grad_ij), tolerance)
    1561             :          END DO
    1562             :       END DO
    1563        4296 :       CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
    1564             : 
    1565        4296 :       DEALLOCATE (diag)
    1566             : 
    1567        4296 :    END SUBROUTINE check_tolerance_new
    1568             : 
    1569             : ! **************************************************************************************************
    1570             : !> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
    1571             : !>        and rotates them all at the same time (hoping for the best of course)
    1572             : !> \param weights ...
    1573             : !> \param zij ...
    1574             : !> \param vectors ...
    1575             : !> \param max_iter ...
    1576             : !> \param max_crazy_angle ...
    1577             : !> \param crazy_scale ...
    1578             : !> \param crazy_use_diag ...
    1579             : !> \param eps_localization ...
    1580             : !> \param iterations ...
    1581             : !> \param converged ...
    1582             : ! **************************************************************************************************
    1583         100 :    SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
    1584             :                               eps_localization, iterations, converged)
    1585             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1586             :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
    1587             :       INTEGER, INTENT(IN)                                :: max_iter
    1588             :       REAL(KIND=dp), INTENT(IN)                          :: max_crazy_angle
    1589             :       REAL(KIND=dp)                                      :: crazy_scale
    1590             :       LOGICAL                                            :: crazy_use_diag
    1591             :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
    1592             :       INTEGER                                            :: iterations
    1593             :       LOGICAL, INTENT(out), OPTIONAL                     :: converged
    1594             : 
    1595             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'crazy_rotations'
    1596             :       COMPLEX(KIND=dp), PARAMETER                        :: cone = (1.0_dp, 0.0_dp), &
    1597             :                                                             czero = (0.0_dp, 0.0_dp)
    1598             : 
    1599             :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: evals_exp
    1600         100 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag_z
    1601             :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
    1602             :       INTEGER                                            :: dim2, handle, i, icol, idim, irow, &
    1603             :                                                             method, ncol_global, ncol_local, &
    1604             :                                                             norder, nrow_global, nrow_local, &
    1605             :                                                             nsquare, unit_nr
    1606         100 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1607             :       LOGICAL                                            :: do_emd
    1608             :       REAL(KIND=dp)                                      :: eps_exp, limit_crazy_angle, maxeval, &
    1609             :                                                             norm, ra, rb, theta, tolerance, value
    1610             :       REAL(KIND=dp), DIMENSION(:), POINTER               :: evals
    1611             :       TYPE(cp_cfm_type)                                  :: cmat_A, cmat_R, cmat_t1
    1612             :       TYPE(cp_fm_type)                                   :: mat_R, mat_t, mat_theta, mat_U
    1613             : 
    1614         100 :       CALL timeset(routineN, handle)
    1615         100 :       NULLIFY (row_indices, col_indices)
    1616             :       CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
    1617             :                           ncol_global=ncol_global, &
    1618             :                           row_indices=row_indices, col_indices=col_indices, &
    1619         100 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1620             : 
    1621         100 :       limit_crazy_angle = max_crazy_angle
    1622             : 
    1623         100 :       NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
    1624         100 :       dim2 = SIZE(zij, 2)
    1625         400 :       ALLOCATE (diag_z(nrow_global, dim2))
    1626         300 :       ALLOCATE (evals(nrow_global))
    1627         300 :       ALLOCATE (evals_exp(nrow_global))
    1628             : 
    1629         100 :       CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
    1630         100 :       CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
    1631         100 :       CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
    1632             : 
    1633         100 :       CALL cp_fm_create(mat_U, zij(1, 1)%matrix_struct)
    1634         100 :       CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
    1635         100 :       CALL cp_fm_create(mat_R, zij(1, 1)%matrix_struct)
    1636             : 
    1637         100 :       CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
    1638             : 
    1639         100 :       CALL cp_fm_set_all(mat_R, 0.0_dp, 1.0_dp)
    1640         100 :       CALL cp_fm_set_all(mat_t, 0.0_dp)
    1641         500 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
    1642         406 :       DO idim = 1, dim2
    1643         306 :          CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
    1644         406 :          CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
    1645             :       END DO
    1646         100 :       CALL cp_fm_syevd(mat_t, mat_U, evals)
    1647         406 :       DO idim = 1, dim2
    1648             :          ! rotate z's
    1649         306 :          CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
    1650         306 :          CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
    1651         306 :          CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
    1652         406 :          CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
    1653             :       END DO
    1654             :       ! collect rotation matrix
    1655         100 :       CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
    1656         100 :       CALL cp_fm_to_fm(mat_t, mat_R)
    1657             : 
    1658         100 :       unit_nr = -1
    1659         100 :       IF (cmat_A%matrix_struct%para_env%is_source()) THEN
    1660          50 :          unit_nr = cp_logger_get_default_unit_nr()
    1661             :          WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
    1662          50 :             "CRAZY| ", "Iter", "value    ", "gradient", "Max. eval", "limit"
    1663             :       END IF
    1664             : 
    1665         100 :       iterations = 0
    1666         100 :       tolerance = 1.0_dp
    1667             : 
    1668             :       DO
    1669        4296 :          iterations = iterations + 1
    1670       17298 :          DO idim = 1, dim2
    1671      193614 :             DO i = 1, nrow_global
    1672      176316 :                CALL cp_fm_get_element(zij(1, idim), i, i, ra)
    1673      176316 :                CALL cp_fm_get_element(zij(2, idim), i, i, rb)
    1674      189318 :                diag_z(i, idim) = CMPLX(ra, rb, dp)
    1675             :             END DO
    1676             :          END DO
    1677       33587 :          DO irow = 1, nrow_local
    1678      501896 :             DO icol = 1, ncol_local
    1679     1874661 :                DO idim = 1, dim2
    1680     1406352 :                   ra = zij(1, idim)%local_data(irow, icol)
    1681     1406352 :                   rb = zij(2, idim)%local_data(irow, icol)
    1682     1406352 :                   mij(idim) = CMPLX(ra, rb, dp)
    1683     1406352 :                   mii(idim) = diag_z(row_indices(irow), idim)
    1684     1874661 :                   mjj(idim) = diag_z(col_indices(icol), idim)
    1685             :                END DO
    1686      497600 :                IF (row_indices(irow) .NE. col_indices(icol)) THEN
    1687      439018 :                   CALL get_angle(mii, mjj, mij, weights, theta)
    1688      439018 :                   theta = crazy_scale*theta
    1689      439018 :                   IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle
    1690      439018 :                   IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle
    1691      439018 :                   IF (crazy_use_diag) THEN
    1692           0 :                      cmat_A%local_data(irow, icol) = -CMPLX(0.0_dp, theta, dp)
    1693             :                   ELSE
    1694      439018 :                      mat_theta%local_data(irow, icol) = -theta
    1695             :                   END IF
    1696             :                ELSE
    1697       29291 :                   IF (crazy_use_diag) THEN
    1698           0 :                      cmat_A%local_data(irow, icol) = czero
    1699             :                   ELSE
    1700       29291 :                      mat_theta%local_data(irow, icol) = 0.0_dp
    1701             :                   END IF
    1702             :                END IF
    1703             :             END DO
    1704             :          END DO
    1705             : 
    1706             :          ! construct rotation matrix U based on A using diagonalization
    1707             :          ! alternatively, exp based on repeated squaring could be faster
    1708        4296 :          IF (crazy_use_diag) THEN
    1709           0 :             CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
    1710           0 :             maxeval = MAXVAL(ABS(evals))
    1711           0 :             evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
    1712           0 :             CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
    1713           0 :             CALL cp_cfm_column_scale(cmat_t1, evals_exp)
    1714             :             CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
    1715           0 :                                cmat_t1, cmat_R, czero, cmat_A)
    1716           0 :             mat_U%local_data = REAL(cmat_A%local_data, KIND=dp) ! U is a real matrix
    1717             :          ELSE
    1718        4296 :             do_emd = .FALSE.
    1719        4296 :             method = 2
    1720        4296 :             eps_exp = 1.0_dp*EPSILON(eps_exp)
    1721        4296 :             CALL cp_fm_maxabsrownorm(mat_theta, norm)
    1722        4296 :             maxeval = norm ! an upper bound
    1723        4296 :             CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
    1724        4296 :             CALL exp_pade_real(mat_U, mat_theta, nsquare, norder)
    1725             :          END IF
    1726             : 
    1727       17298 :          DO idim = 1, dim2
    1728             :             ! rotate z's
    1729       13002 :             CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
    1730       13002 :             CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
    1731       13002 :             CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
    1732       17298 :             CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
    1733             :          END DO
    1734             :          ! collect rotation matrix
    1735        4296 :          CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
    1736        4296 :          CALL cp_fm_to_fm(mat_t, mat_R)
    1737             : 
    1738        4296 :          CALL check_tolerance_new(weights, zij, tolerance, value)
    1739             : 
    1740        4296 :          IF (unit_nr > 0) THEN
    1741             :             WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
    1742        2148 :                "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
    1743        2148 :             CALL m_flush(unit_nr)
    1744             :          END IF
    1745        4296 :          IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter) EXIT
    1746             :       END DO
    1747             : 
    1748         100 :       IF (PRESENT(converged)) converged = (tolerance .LT. eps_localization)
    1749             : 
    1750         100 :       CALL cp_cfm_release(cmat_A)
    1751         100 :       CALL cp_cfm_release(cmat_R)
    1752         100 :       CALL cp_cfm_release(cmat_T1)
    1753             : 
    1754         100 :       CALL cp_fm_release(mat_U)
    1755         100 :       CALL cp_fm_release(mat_T)
    1756         100 :       CALL cp_fm_release(mat_theta)
    1757             : 
    1758         100 :       CALL rotate_orbitals(mat_R, vectors)
    1759             : 
    1760         100 :       CALL cp_fm_release(mat_R)
    1761         100 :       DEALLOCATE (evals_exp, evals, diag_z)
    1762         100 :       DEALLOCATE (mii, mij, mjj)
    1763             : 
    1764         100 :       CALL timestop(handle)
    1765             : 
    1766         200 :    END SUBROUTINE crazy_rotations
    1767             : 
    1768             : ! **************************************************************************************************
    1769             : !> \brief use the exponential parametrization as described in to perform a direct mini
    1770             : !>        Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
    1771             : !> none of the input is modified for the time being, just finds the rotations
    1772             : !> that minimizes, and throws it away afterwards :-)
    1773             : !> apart from being expensive and not cleaned, this works fine
    1774             : !> useful to try different spread functionals
    1775             : !> \param weights ...
    1776             : !> \param zij ...
    1777             : !> \param vectors ...
    1778             : !> \param max_iter ...
    1779             : !> \param eps_localization ...
    1780             : !> \param iterations ...
    1781             : ! **************************************************************************************************
    1782           2 :    SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
    1783             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1784             :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
    1785             :       INTEGER, INTENT(IN)                                :: max_iter
    1786             :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
    1787             :       INTEGER                                            :: iterations
    1788             : 
    1789             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'direct_mini'
    1790             :       COMPLEX(KIND=dp), PARAMETER                        :: cone = (1.0_dp, 0.0_dp), &
    1791             :                                                             czero = (0.0_dp, 0.0_dp)
    1792             :       REAL(KIND=dp), PARAMETER                           :: gold_sec = 0.3819_dp
    1793             : 
    1794             :       COMPLEX(KIND=dp)                                   :: lk, ll, tmp
    1795           2 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: evals_exp
    1796           2 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag_z
    1797             :       INTEGER                                            :: handle, i, icol, idim, irow, &
    1798             :                                                             line_search_count, line_searches, lsl, &
    1799             :                                                             lsm, lsr, n, ncol_local, ndim, &
    1800             :                                                             nrow_local, output_unit
    1801           2 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1802             :       LOGICAL                                            :: new_direction
    1803             :       REAL(KIND=dp)                                      :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
    1804             :                                                             fb, fc, nom, normg, normg_cross, &
    1805             :                                                             normg_old, npos, omega, tol, val, x0, &
    1806             :                                                             x1, xa, xb, xc
    1807             :       REAL(KIND=dp), DIMENSION(150)                      :: energy, grad, pos
    1808           2 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: evals, fval, fvald
    1809             :       TYPE(cp_cfm_type)                                  :: cmat_A, cmat_B, cmat_M, cmat_R, cmat_t1, &
    1810             :                                                             cmat_t2, cmat_U
    1811           2 :       TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:)       :: c_zij
    1812             :       TYPE(cp_fm_type)                                   :: matrix_A, matrix_G, matrix_G_old, &
    1813             :                                                             matrix_G_search, matrix_H, matrix_R, &
    1814             :                                                             matrix_T
    1815             : 
    1816           2 :       NULLIFY (evals, evals_exp, diag_z, fval, fvald)
    1817             : 
    1818           2 :       CALL timeset(routineN, handle)
    1819           2 :       output_unit = cp_logger_get_default_io_unit()
    1820             : 
    1821           2 :       n = zij(1, 1)%matrix_struct%nrow_global
    1822           2 :       ndim = (SIZE(zij, 2))
    1823             : 
    1824           2 :       IF (output_unit > 0) THEN
    1825           1 :          WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
    1826           1 :          WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
    1827             :       END IF
    1828             : 
    1829          20 :       ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
    1830          18 :       ALLOCATE (c_zij(ndim))
    1831             : 
    1832             :       ! create the three complex matrices Z
    1833          14 :       DO idim = 1, ndim
    1834          12 :          CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
    1835             :          c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
    1836         158 :                                         zij(2, idim)%local_data, dp)
    1837             :       END DO
    1838             : 
    1839           2 :       CALL cp_fm_create(matrix_A, zij(1, 1)%matrix_struct)
    1840           2 :       CALL cp_fm_create(matrix_G, zij(1, 1)%matrix_struct)
    1841           2 :       CALL cp_fm_create(matrix_T, zij(1, 1)%matrix_struct)
    1842           2 :       CALL cp_fm_create(matrix_H, zij(1, 1)%matrix_struct)
    1843           2 :       CALL cp_fm_create(matrix_G_search, zij(1, 1)%matrix_struct)
    1844           2 :       CALL cp_fm_create(matrix_G_old, zij(1, 1)%matrix_struct)
    1845           2 :       CALL cp_fm_create(matrix_R, zij(1, 1)%matrix_struct)
    1846           2 :       CALL cp_fm_set_all(matrix_R, 0.0_dp, 1.0_dp)
    1847             : 
    1848           2 :       CALL cp_fm_set_all(matrix_A, 0.0_dp)
    1849             : !    CALL cp_fm_init_random ( matrix_A )
    1850             : 
    1851           2 :       CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
    1852           2 :       CALL cp_cfm_create(cmat_U, zij(1, 1)%matrix_struct)
    1853           2 :       CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
    1854           2 :       CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
    1855           2 :       CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
    1856           2 :       CALL cp_cfm_create(cmat_B, zij(1, 1)%matrix_struct)
    1857           2 :       CALL cp_cfm_create(cmat_M, zij(1, 1)%matrix_struct)
    1858             : 
    1859             :       CALL cp_cfm_get_info(cmat_B, nrow_local=nrow_local, ncol_local=ncol_local, &
    1860           2 :                            row_indices=row_indices, col_indices=col_indices)
    1861             : 
    1862           2 :       CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
    1863           2 :       CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
    1864           2 :       normg_old = 1.0E30_dp
    1865           2 :       ds_min = 1.0_dp
    1866           2 :       new_direction = .TRUE.
    1867           2 :       Iterations = 0
    1868           2 :       line_searches = 0
    1869           2 :       line_search_count = 0
    1870         208 :       DO
    1871         208 :          iterations = iterations + 1
    1872             :          ! compute U,R,evals given A
    1873        2704 :          cmat_A%local_data = CMPLX(0.0_dp, matrix_A%local_data, dp) ! cmat_A is hermitian, evals are reals
    1874         208 :          CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
    1875        1040 :          evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
    1876         208 :          CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
    1877         208 :          CALL cp_cfm_column_scale(cmat_t1, evals_exp)
    1878         208 :          CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_U)
    1879        2704 :          cmat_U%local_data = REAL(cmat_U%local_data, KIND=dp) ! enforce numerics, U is a real matrix
    1880             : 
    1881         208 :          IF (new_direction .AND. MOD(line_searches, 20) .EQ. 5) THEN ! reset with A .eq. 0
    1882           0 :             DO idim = 1, ndim
    1883           0 :                CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1)
    1884           0 :                CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, c_zij(idim))
    1885             :             END DO
    1886             :             ! collect rotation matrix
    1887           0 :             matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
    1888           0 :             CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
    1889           0 :             CALL cp_fm_to_fm(matrix_T, matrix_R)
    1890             : 
    1891           0 :             CALL cp_cfm_set_all(cmat_U, czero, cone)
    1892           0 :             CALL cp_cfm_set_all(cmat_R, czero, cone)
    1893           0 :             CALL cp_cfm_set_all(cmat_A, czero)
    1894           0 :             CALL cp_fm_set_all(matrix_A, 0.0_dp)
    1895           0 :             evals(:) = 0.0_dp
    1896           0 :             evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
    1897           0 :             CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
    1898           0 :             CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
    1899           0 :             normg_old = 1.0E30_dp
    1900             :          END IF
    1901             : 
    1902             :          ! compute Omega and M
    1903         208 :          CALL cp_cfm_set_all(cmat_M, czero)
    1904         208 :          omega = 0.0_dp
    1905        1456 :          DO idim = 1, ndim
    1906        1248 :             CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1) ! t1=ZU
    1907        1248 :             CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
    1908        6240 :             DO i = 1, n
    1909        4992 :                CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
    1910             :                SELECT CASE (2) ! allows for selection of different spread functionals
    1911             :                CASE (1)
    1912             :                   fval(i) = -weights(idim)*LOG(ABS(diag_z(i, idim))**2)
    1913             :                   fvald(i) = -weights(idim)/(ABS(diag_z(i, idim))**2)
    1914             :                CASE (2) ! corresponds to the jacobi setup
    1915        4992 :                   fval(i) = weights(idim) - weights(idim)*ABS(diag_z(i, idim))**2
    1916        4992 :                   fvald(i) = -weights(idim)
    1917             :                END SELECT
    1918        6240 :                omega = omega + fval(i)
    1919             :             END DO
    1920        6448 :             DO icol = 1, ncol_local
    1921       16224 :                DO irow = 1, nrow_local
    1922        9984 :                   tmp = cmat_t1%local_data(irow, icol)*CONJG(diag_z(col_indices(icol), idim))
    1923             :                   cmat_M%local_data(irow, icol) = cmat_M%local_data(irow, icol) &
    1924       14976 :                                                   + 4.0_dp*fvald(col_indices(icol))*REAL(tmp, KIND=dp)
    1925             :                END DO
    1926             :             END DO
    1927             :          END DO
    1928             : 
    1929             :          ! compute Hessian diagonal approximation for the preconditioner
    1930             :          IF (.TRUE.) THEN
    1931         208 :             CALL gradsq_at_0(diag_z, weights, matrix_H, ndim)
    1932             :          ELSE
    1933             :             CALL cp_fm_set_all(matrix_H, 1.0_dp)
    1934             :          END IF
    1935             : 
    1936             :          ! compute B
    1937        1040 :          DO icol = 1, ncol_local
    1938        2704 :             DO irow = 1, nrow_local
    1939        1664 :                ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
    1940        1664 :                lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
    1941        2496 :                IF (ABS(ll - lk) .LT. 0.5_dp) THEN ! use a series expansion to avoid loss of precision
    1942         634 :                   tmp = 1.0_dp
    1943         634 :                   cmat_B%local_data(irow, icol) = 0.0_dp
    1944       10778 :                   DO i = 1, 16
    1945       10144 :                      cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol) + tmp
    1946       10778 :                      tmp = tmp*(ll - lk)/(i + 1)
    1947             :                   END DO
    1948         634 :                   cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol)*EXP(lk)
    1949             :                ELSE
    1950        1030 :                   cmat_B%local_data(irow, icol) = (EXP(lk) - EXP(ll))/(lk - ll)
    1951             :                END IF
    1952             :             END DO
    1953             :          END DO
    1954             :          ! compute gradient matrix_G
    1955             : 
    1956         208 :          CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_M, cmat_R, czero, cmat_t1) ! t1=(M^T)(R^T)
    1957         208 :          CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_R, cmat_t1, czero, cmat_t2) ! t2=(R)t1
    1958         208 :          CALL cp_cfm_schur_product(cmat_t2, cmat_B, cmat_t1)
    1959         208 :          CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_t2)
    1960         208 :          CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_R, cmat_t2, czero, cmat_t1)
    1961        2704 :          matrix_G%local_data = REAL(cmat_t1%local_data, KIND=dp)
    1962         208 :          CALL cp_fm_transpose(matrix_G, matrix_T)
    1963         208 :          CALL cp_fm_scale_and_add(-1.0_dp, matrix_G, 1.0_dp, matrix_T)
    1964         208 :          CALL cp_fm_maxabsval(matrix_G, tol)
    1965             : 
    1966             :          ! from here on, minimizing technology
    1967         208 :          IF (new_direction) THEN
    1968             :             ! energy converged up to machine precision ?
    1969          10 :             line_searches = line_searches + 1
    1970             :             ! DO i=1,line_search_count
    1971             :             !   write(15,*) pos(i),energy(i)
    1972             :             ! ENDDO
    1973             :             ! write(15,*) ""
    1974             :             ! CALL m_flush(15)
    1975             :             !write(16,*) evals(:)
    1976             :             !write(17,*) matrix_A%local_data(:,:)
    1977             :             !write(18,*) matrix_G%local_data(:,:)
    1978          10 :             IF (output_unit > 0) THEN
    1979           5 :                WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, Iterations, Omega, tol, ds_min
    1980           5 :                CALL m_flush(output_unit)
    1981             :             END IF
    1982          10 :             IF (tol < eps_localization .OR. iterations > max_iter) EXIT
    1983             : 
    1984             :             IF (.TRUE.) THEN ! do conjugate gradient CG
    1985           8 :                CALL cp_fm_trace(matrix_G, matrix_G_old, normg_cross)
    1986           8 :                normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
    1987             :                ! apply the preconditioner
    1988          40 :                DO icol = 1, ncol_local
    1989         104 :                   DO irow = 1, nrow_local
    1990          96 :                      matrix_G_old%local_data(irow, icol) = matrix_G%local_data(irow, icol)/matrix_H%local_data(irow, icol)
    1991             :                   END DO
    1992             :                END DO
    1993           8 :                CALL cp_fm_trace(matrix_G, matrix_G_old, normg)
    1994           8 :                normg = normg*0.5_dp
    1995           8 :                beta_pr = (normg - normg_cross)/normg_old
    1996           8 :                normg_old = normg
    1997           8 :                beta_pr = MAX(beta_pr, 0.0_dp)
    1998           8 :                CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
    1999           8 :                CALL cp_fm_trace(matrix_G_search, matrix_G_old, normg_cross)
    2000           8 :                IF (normg_cross .GE. 0) THEN ! back to SD
    2001           0 :                   IF (matrix_A%matrix_struct%para_env%is_source()) THEN
    2002           0 :                      WRITE (cp_logger_get_default_unit_nr(), *) "!"
    2003             :                   END IF
    2004           0 :                   beta_pr = 0.0_dp
    2005           0 :                   CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
    2006             :                END IF
    2007             :             ELSE ! SD
    2008             :                CALL cp_fm_scale_and_add(0.0_dp, matrix_G_search, -1.0_dp, matrix_G)
    2009             :             END IF
    2010             :             ! ds_min=1.0E-4_dp
    2011             :             line_search_count = 0
    2012             :          END IF
    2013         206 :          line_search_count = line_search_count + 1
    2014         206 :          energy(line_search_count) = Omega
    2015             : 
    2016             :          ! line search section
    2017             :          SELECT CASE (3)
    2018             :          CASE (1) ! two point line search
    2019             :             SELECT CASE (line_search_count)
    2020             :             CASE (1)
    2021             :                pos(1) = 0.0_dp
    2022             :                pos(2) = ds_min
    2023             :                CALL cp_fm_trace(matrix_G, matrix_G_search, grad(1))
    2024             :                grad(1) = grad(1)/2.0_dp
    2025             :                new_direction = .FALSE.
    2026             :             CASE (2)
    2027             :                new_direction = .TRUE.
    2028             :                x0 = pos(1) ! 0.0_dp
    2029             :                c = energy(1)
    2030             :                b = grad(1)
    2031             :                x1 = pos(2)
    2032             :                a = (energy(2) - b*x1 - c)/(x1**2)
    2033             :                IF (a .LE. 0.0_dp) a = 1.0E-15_dp
    2034             :                npos = -b/(2.0_dp*a)
    2035             :                val = a*npos**2 + b*npos + c
    2036             :                IF (val .LT. energy(1) .AND. val .LE. energy(2)) THEN
    2037             :                   ! we go to a minimum, but ...
    2038             :                   ! we take a guard against too large steps
    2039             :                   pos(3) = MIN(npos, MAXVAL(pos(1:2))*4.0_dp)
    2040             :                ELSE ! just take an extended step
    2041             :                   pos(3) = MAXVAL(pos(1:2))*2.0_dp
    2042             :                END IF
    2043             :             END SELECT
    2044             :          CASE (2) ! 3 point line search
    2045             :             SELECT CASE (line_search_count)
    2046             :             CASE (1)
    2047             :                new_direction = .FALSE.
    2048             :                pos(1) = 0.0_dp
    2049             :                pos(2) = ds_min*0.8_dp
    2050             :             CASE (2)
    2051             :                new_direction = .FALSE.
    2052             :                IF (energy(2) .GT. energy(1)) THEN
    2053             :                   pos(3) = ds_min*0.7_dp
    2054             :                ELSE
    2055             :                   pos(3) = ds_min*1.4_dp
    2056             :                END IF
    2057             :             CASE (3)
    2058             :                new_direction = .TRUE.
    2059             :                xa = pos(1)
    2060             :                xb = pos(2)
    2061             :                xc = pos(3)
    2062             :                fa = energy(1)
    2063             :                fb = energy(2)
    2064             :                fc = energy(3)
    2065             :                nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
    2066             :                denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
    2067             :                IF (ABS(denom) .LE. 1.0E-18_dp*MAX(ABS(fb - fc), ABS(fb - fa))) THEN
    2068             :                   npos = xb
    2069             :                ELSE
    2070             :                   npos = xb - 0.5_dp*nom/denom ! position of the stationary point
    2071             :                END IF
    2072             :                val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
    2073             :                      (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
    2074             :                      (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
    2075             :                IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc) THEN ! OK, we go to a minimum
    2076             :                   ! we take a guard against too large steps
    2077             :                   pos(4) = MAX(MAXVAL(pos(1:3))*0.01_dp, &
    2078             :                                MIN(npos, MAXVAL(pos(1:3))*4.0_dp))
    2079             :                ELSE ! just take an extended step
    2080             :                   pos(4) = MAXVAL(pos(1:3))*2.0_dp
    2081             :                END IF
    2082             :             END SELECT
    2083             :          CASE (3) ! golden section hunt
    2084         206 :             new_direction = .FALSE.
    2085         206 :             IF (line_search_count .EQ. 1) THEN
    2086           8 :                lsl = 1
    2087           8 :                lsr = 0
    2088           8 :                lsm = 1
    2089           8 :                pos(1) = 0.0_dp
    2090           8 :                pos(2) = ds_min/gold_sec
    2091             :             ELSE
    2092         198 :                IF (line_search_count .EQ. 150) CPABORT("Too many")
    2093         198 :                IF (lsr .EQ. 0) THEN
    2094          14 :                   IF (energy(line_search_count - 1) .LT. energy(line_search_count)) THEN
    2095           8 :                      lsr = line_search_count
    2096           8 :                      pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
    2097             :                   ELSE
    2098           6 :                      lsl = lsm
    2099           6 :                      lsm = line_search_count
    2100           6 :                      pos(line_search_count + 1) = pos(line_search_count)/gold_sec
    2101             :                   END IF
    2102             :                ELSE
    2103         184 :                   IF (pos(line_search_count) .LT. pos(lsm)) THEN
    2104          94 :                      IF (energy(line_search_count) .LT. energy(lsm)) THEN
    2105             :                         lsr = lsm
    2106             :                         lsm = line_search_count
    2107             :                      ELSE
    2108          80 :                         lsl = line_search_count
    2109             :                      END IF
    2110             :                   ELSE
    2111          90 :                      IF (energy(line_search_count) .LT. energy(lsm)) THEN
    2112             :                         lsl = lsm
    2113             :                         lsm = line_search_count
    2114             :                      ELSE
    2115          62 :                         lsr = line_search_count
    2116             :                      END IF
    2117             :                   END IF
    2118         184 :                   IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
    2119          86 :                      pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
    2120             :                   ELSE
    2121          98 :                      pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
    2122             :                   END IF
    2123         184 :                   IF ((pos(lsr) - pos(lsl)) .LT. 1.0E-3_dp*pos(lsr)) THEN
    2124           8 :                      new_direction = .TRUE.
    2125             :                   END IF
    2126             :                END IF ! lsr .eq. 0
    2127             :             END IF ! first step
    2128             :          END SELECT
    2129             :          ! now go to the suggested point
    2130         206 :          ds_min = pos(line_search_count + 1)
    2131         206 :          ds = pos(line_search_count + 1) - pos(line_search_count)
    2132         208 :          CALL cp_fm_scale_and_add(1.0_dp, matrix_A, ds, matrix_G_search)
    2133             :       END DO
    2134             : 
    2135             :       ! collect rotation matrix
    2136          26 :       matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
    2137           2 :       CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
    2138           2 :       CALL cp_fm_to_fm(matrix_T, matrix_R)
    2139           2 :       CALL rotate_orbitals(matrix_R, vectors)
    2140           2 :       CALL cp_fm_release(matrix_R)
    2141             : 
    2142           2 :       CALL cp_fm_release(matrix_A)
    2143           2 :       CALL cp_fm_release(matrix_G)
    2144           2 :       CALL cp_fm_release(matrix_H)
    2145           2 :       CALL cp_fm_release(matrix_T)
    2146           2 :       CALL cp_fm_release(matrix_G_search)
    2147           2 :       CALL cp_fm_release(matrix_G_old)
    2148           2 :       CALL cp_cfm_release(cmat_A)
    2149           2 :       CALL cp_cfm_release(cmat_U)
    2150           2 :       CALL cp_cfm_release(cmat_R)
    2151           2 :       CALL cp_cfm_release(cmat_t1)
    2152           2 :       CALL cp_cfm_release(cmat_t2)
    2153           2 :       CALL cp_cfm_release(cmat_B)
    2154           2 :       CALL cp_cfm_release(cmat_M)
    2155             : 
    2156           2 :       DEALLOCATE (evals, evals_exp, fval, fvald)
    2157             : 
    2158          14 :       DO idim = 1, SIZE(c_zij)
    2159         156 :          zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
    2160         156 :          zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
    2161          14 :          CALL cp_cfm_release(c_zij(idim))
    2162             :       END DO
    2163           2 :       DEALLOCATE (c_zij)
    2164           2 :       DEALLOCATE (diag_z)
    2165             : 
    2166           2 :       CALL timestop(handle)
    2167             : 
    2168           6 :    END SUBROUTINE
    2169             : 
    2170             : ! **************************************************************************************************
    2171             : !> \brief Parallel algorithm for jacobi rotations
    2172             : !> \param weights ...
    2173             : !> \param zij ...
    2174             : !> \param vectors ...
    2175             : !> \param para_env ...
    2176             : !> \param max_iter ...
    2177             : !> \param eps_localization ...
    2178             : !> \param sweeps ...
    2179             : !> \param out_each ...
    2180             : !> \param target_time ...
    2181             : !> \param start_time ...
    2182             : !> \param restricted ...
    2183             : !> \par History
    2184             : !>      use allgather for improved performance
    2185             : !> \author MI (11.2009)
    2186             : ! **************************************************************************************************
    2187         388 :    SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
    2188             :                               sweeps, out_each, target_time, start_time, restricted)
    2189             : 
    2190             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    2191             :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
    2192             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2193             :       INTEGER, INTENT(IN)                                :: max_iter
    2194             :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
    2195             :       INTEGER                                            :: sweeps
    2196             :       INTEGER, INTENT(IN)                                :: out_each
    2197             :       REAL(dp)                                           :: target_time, start_time
    2198             :       INTEGER                                            :: restricted
    2199             : 
    2200             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'jacobi_rot_para'
    2201             : 
    2202             :       INTEGER                                            :: dim2, handle, i, idim, ii, ilow1, ip, j, &
    2203             :                                                             nblock, nblock_max, ns_me, nstate, &
    2204             :                                                             output_unit
    2205         388 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: ns_bound
    2206         388 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: rotmat, z_ij_loc_im, z_ij_loc_re
    2207             :       REAL(KIND=dp)                                      :: xstate
    2208             :       TYPE(cp_fm_type)                                   :: rmat
    2209         388 :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: cz_ij_loc
    2210             : 
    2211         388 :       CALL timeset(routineN, handle)
    2212             : 
    2213         388 :       output_unit = cp_logger_get_default_io_unit()
    2214             : 
    2215         388 :       NULLIFY (cz_ij_loc)
    2216             : 
    2217         388 :       dim2 = SIZE(zij, 2)
    2218             : 
    2219         388 :       CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
    2220         388 :       CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
    2221             : 
    2222         388 :       CALL cp_fm_get_info(rmat, nrow_global=nstate)
    2223             : 
    2224         388 :       IF (restricted > 0) THEN
    2225           0 :          IF (output_unit > 0) THEN
    2226           0 :             WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
    2227             :          END IF
    2228           0 :          nstate = nstate - restricted
    2229             :       END IF
    2230             : 
    2231             :       ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
    2232         388 :       xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
    2233        1552 :       ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
    2234        1164 :       DO ip = 1, para_env%num_pe
    2235         776 :          ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
    2236        1164 :          ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
    2237             :       END DO
    2238         388 :       nblock_max = 0
    2239        1164 :       DO ip = 0, para_env%num_pe - 1
    2240         776 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2241        1164 :          nblock_max = MAX(nblock_max, nblock)
    2242             :       END DO
    2243             : 
    2244             :       ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
    2245        1552 :       ALLOCATE (z_ij_loc_re(nstate, nblock_max))
    2246        1164 :       ALLOCATE (z_ij_loc_im(nstate, nblock_max))
    2247        1164 :       ALLOCATE (cz_ij_loc(dim2))
    2248        1576 :       DO idim = 1, dim2
    2249        3952 :          DO ip = 0, para_env%num_pe - 1
    2250        2376 :             nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2251        2376 :             CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
    2252        2376 :             CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
    2253        3564 :             IF (para_env%mepos == ip) THEN
    2254        4731 :                ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
    2255        5025 :                DO i = 1, nblock
    2256       37818 :                   DO j = 1, nstate
    2257       36630 :                      cz_ij_loc(idim)%c_array(j, i) = CMPLX(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
    2258             :                   END DO
    2259             :                END DO
    2260             :             END IF
    2261             :          END DO ! ip
    2262             :       END DO
    2263         388 :       DEALLOCATE (z_ij_loc_re)
    2264         388 :       DEALLOCATE (z_ij_loc_im)
    2265             : 
    2266        1552 :       ALLOCATE (rotmat(nstate, 2*nblock_max))
    2267             : 
    2268             :       CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
    2269             :                                 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
    2270         388 :                                 target_time=target_time, start_time=start_time)
    2271             : 
    2272         388 :       ilow1 = ns_bound(para_env%mepos, 1)
    2273         388 :       ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
    2274        1164 :       ALLOCATE (z_ij_loc_re(nstate, nblock_max))
    2275        1164 :       ALLOCATE (z_ij_loc_im(nstate, nblock_max))
    2276        1576 :       DO idim = 1, dim2
    2277        3952 :          DO ip = 0, para_env%num_pe - 1
    2278       79788 :             z_ij_loc_re = 0.0_dp
    2279       79788 :             z_ij_loc_im = 0.0_dp
    2280        2376 :             nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2281        2376 :             IF (ip == para_env%mepos) THEN
    2282        5025 :                ns_me = nblock
    2283        5025 :                DO i = 1, ns_me
    2284       36630 :                   ii = ilow1 + i - 1
    2285       37818 :                   DO j = 1, nstate
    2286       32793 :                      z_ij_loc_re(j, i) = REAL(cz_ij_loc(idim)%c_array(j, i), dp)
    2287       36630 :                      z_ij_loc_im(j, i) = AIMAG(cz_ij_loc(idim)%c_array(j, i))
    2288             :                   END DO
    2289             :                END DO
    2290             :             END IF
    2291        2376 :             CALL para_env%bcast(z_ij_loc_re, ip)
    2292        2376 :             CALL para_env%bcast(z_ij_loc_im, ip)
    2293        2376 :             CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
    2294        3564 :             CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
    2295             :          END DO ! ip
    2296             :       END DO
    2297             : 
    2298        1164 :       DO ip = 0, para_env%num_pe - 1
    2299       25692 :          z_ij_loc_re = 0.0_dp
    2300         776 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2301         776 :          IF (ip == para_env%mepos) THEN
    2302        1630 :             ns_me = nblock
    2303        1630 :             DO i = 1, ns_me
    2304       11772 :                ii = ilow1 + i - 1
    2305       12160 :                DO j = 1, nstate
    2306       11772 :                   z_ij_loc_re(j, i) = rotmat(j, i)
    2307             :                END DO
    2308             :             END DO
    2309             :          END IF
    2310         776 :          CALL para_env%bcast(z_ij_loc_re, ip)
    2311        1164 :          CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
    2312             :       END DO
    2313             : 
    2314         388 :       DEALLOCATE (z_ij_loc_re)
    2315         388 :       DEALLOCATE (z_ij_loc_im)
    2316        1576 :       DO idim = 1, dim2
    2317        1576 :          DEALLOCATE (cz_ij_loc(idim)%c_array)
    2318             :       END DO
    2319         388 :       DEALLOCATE (cz_ij_loc)
    2320             : 
    2321         388 :       CALL para_env%sync()
    2322         388 :       CALL rotate_orbitals(rmat, vectors)
    2323         388 :       CALL cp_fm_release(rmat)
    2324             : 
    2325         388 :       DEALLOCATE (rotmat)
    2326         388 :       DEALLOCATE (ns_bound)
    2327             : 
    2328         388 :       CALL timestop(handle)
    2329             : 
    2330        1164 :    END SUBROUTINE jacobi_rot_para
    2331             : 
    2332             : ! **************************************************************************************************
    2333             : !> \brief almost identical to 'jacobi_rot_para' but with different inout variables
    2334             : !> \param weights ...
    2335             : !> \param czij ...
    2336             : !> \param para_env ...
    2337             : !> \param max_iter ...
    2338             : !> \param rmat ...
    2339             : !> \param tol_out ...
    2340             : !> \author Soumya Ghosh (08/21)
    2341             : ! **************************************************************************************************
    2342          32 :    SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
    2343             : 
    2344             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    2345             :       TYPE(cp_cfm_type), INTENT(IN)                      :: czij(:)
    2346             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2347             :       INTEGER, INTENT(IN)                                :: max_iter
    2348             :       TYPE(cp_cfm_type), INTENT(IN)                      :: rmat
    2349             :       REAL(dp), INTENT(OUT), OPTIONAL                    :: tol_out
    2350             : 
    2351             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'jacobi_rot_para_1'
    2352             : 
    2353          32 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :)     :: czij_array
    2354             :       INTEGER                                            :: dim2, handle, i, idim, ii, ilow1, ip, j, &
    2355             :                                                             nblock, nblock_max, ns_me, nstate, &
    2356             :                                                             sweeps
    2357             :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: ns_bound
    2358          32 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: rotmat, z_ij_loc_re
    2359             :       REAL(KIND=dp)                                      :: xstate
    2360          32 :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: cz_ij_loc
    2361             : 
    2362          32 :       CALL timeset(routineN, handle)
    2363             : 
    2364          32 :       dim2 = SIZE(czij)
    2365             : 
    2366          32 :       CALL cp_cfm_set_all(rmat, CMPLX(0._dp, 0._dp, dp), CMPLX(1._dp, 0._dp, dp))
    2367             : 
    2368          32 :       CALL cp_cfm_get_info(rmat, nrow_global=nstate)
    2369             : 
    2370             :       ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
    2371          32 :       xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
    2372         128 :       ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
    2373          96 :       DO ip = 1, para_env%num_pe
    2374          64 :          ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
    2375          96 :          ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
    2376             :       END DO
    2377          32 :       nblock_max = 0
    2378          96 :       DO ip = 0, para_env%num_pe - 1
    2379          64 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2380          96 :          nblock_max = MAX(nblock_max, nblock)
    2381             :       END DO
    2382             : 
    2383             :       ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
    2384         128 :       ALLOCATE (czij_array(nstate, nblock_max))
    2385          96 :       ALLOCATE (cz_ij_loc(dim2))
    2386         128 :       DO idim = 1, dim2
    2387         320 :          DO ip = 0, para_env%num_pe - 1
    2388         192 :             nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2389             :             ! cfm --> allocatable
    2390         192 :             CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
    2391         288 :             IF (para_env%mepos == ip) THEN
    2392          96 :                ns_me = nblock
    2393         384 :                ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
    2394        2160 :                DO i = 1, ns_me
    2395       90912 :                   DO j = 1, nstate
    2396       90816 :                      cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
    2397             :                   END DO
    2398             :                END DO
    2399             :             END IF
    2400             :          END DO ! ip
    2401             :       END DO
    2402          32 :       DEALLOCATE (czij_array)
    2403             : 
    2404         128 :       ALLOCATE (rotmat(nstate, 2*nblock_max))
    2405             : 
    2406             :       CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
    2407          32 :                                 cz_ij_loc, rotmat, 0, tol_out=tol_out)
    2408             : 
    2409          32 :       ilow1 = ns_bound(para_env%mepos, 1)
    2410          32 :       ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
    2411         128 :       ALLOCATE (z_ij_loc_re(nstate, nblock_max))
    2412             : 
    2413          96 :       DO ip = 0, para_env%num_pe - 1
    2414       62016 :          z_ij_loc_re = 0.0_dp
    2415          64 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2416          64 :          IF (ip == para_env%mepos) THEN
    2417         720 :             ns_me = nblock
    2418         720 :             DO i = 1, ns_me
    2419       30272 :                ii = ilow1 + i - 1
    2420       30304 :                DO j = 1, nstate
    2421       30272 :                   z_ij_loc_re(j, i) = rotmat(j, i)
    2422             :                END DO
    2423             :             END DO
    2424             :          END IF
    2425          64 :          CALL para_env%bcast(z_ij_loc_re, ip)
    2426       62048 :          CALL cp_cfm_set_submatrix(rmat, CMPLX(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
    2427             :       END DO
    2428             : 
    2429          32 :       DEALLOCATE (z_ij_loc_re)
    2430         128 :       DO idim = 1, dim2
    2431         128 :          DEALLOCATE (cz_ij_loc(idim)%c_array)
    2432             :       END DO
    2433          32 :       DEALLOCATE (cz_ij_loc)
    2434             : 
    2435          32 :       CALL para_env%sync()
    2436             : 
    2437          32 :       DEALLOCATE (rotmat)
    2438          32 :       DEALLOCATE (ns_bound)
    2439             : 
    2440          32 :       CALL timestop(handle)
    2441             : 
    2442          64 :    END SUBROUTINE jacobi_rot_para_1
    2443             : 
    2444             : ! **************************************************************************************************
    2445             : !> \brief Parallel algorithm for jacobi rotations
    2446             : !> \param weights ...
    2447             : !> \param para_env ...
    2448             : !> \param max_iter ...
    2449             : !> \param sweeps ...
    2450             : !> \param out_each ...
    2451             : !> \param dim2 ...
    2452             : !> \param nstate ...
    2453             : !> \param nblock_max ...
    2454             : !> \param ns_bound ...
    2455             : !> \param cz_ij_loc ...
    2456             : !> \param rotmat ...
    2457             : !> \param output_unit ...
    2458             : !> \param tol_out ...
    2459             : !> \param eps_localization ...
    2460             : !> \param target_time ...
    2461             : !> \param start_time ...
    2462             : !> \par History
    2463             : !>      split out to reuse with different input types
    2464             : !> \author HF (05.2022)
    2465             : ! **************************************************************************************************
    2466        1260 :    SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
    2467         420 :                                    ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
    2468             : 
    2469             :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    2470             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2471             :       INTEGER, INTENT(IN)                                :: max_iter
    2472             :       INTEGER, INTENT(OUT)                               :: sweeps
    2473             :       INTEGER, INTENT(IN)                                :: out_each, dim2, nstate, nblock_max
    2474             :       INTEGER, DIMENSION(0:, :), INTENT(IN)              :: ns_bound
    2475             :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: cz_ij_loc
    2476             :       REAL(dp), DIMENSION(:, :), INTENT(OUT)             :: rotmat
    2477             :       INTEGER, INTENT(IN)                                :: output_unit
    2478             :       REAL(dp), INTENT(OUT), OPTIONAL                    :: tol_out
    2479             :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: eps_localization
    2480             :       REAL(dp), OPTIONAL                                 :: target_time, start_time
    2481             : 
    2482             :       COMPLEX(KIND=dp)                                   :: zi, zj
    2483         420 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)  :: c_array_me, c_array_partner
    2484             :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
    2485             :       INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
    2486             :          ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
    2487             :          iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
    2488             :          ns_recv_from, ns_recv_partner
    2489         420 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: rcount, rdispl
    2490         420 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: list_pair
    2491             :       LOGICAL                                            :: should_stop
    2492         420 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: gmat, rmat_loc, rmat_recv, rmat_send
    2493         420 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: rmat_recv_all
    2494             :       REAL(KIND=dp)                                      :: ct, func, gmax, grad, ri, rj, st, t1, &
    2495             :                                                             t2, theta, tolerance, zc, zr
    2496         420 :       TYPE(set_c_1d_type), DIMENSION(:), POINTER         :: zdiag_all, zdiag_me
    2497         420 :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: xyz_mix, xyz_mix_ns
    2498             : 
    2499         420 :       NULLIFY (zdiag_all, zdiag_me)
    2500         420 :       NULLIFY (xyz_mix, xyz_mix_ns)
    2501             :       NULLIFY (mii, mij, mjj)
    2502             : 
    2503        2100 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
    2504             : 
    2505        1260 :       ALLOCATE (rcount(para_env%num_pe), STAT=istat)
    2506         840 :       ALLOCATE (rdispl(para_env%num_pe), STAT=istat)
    2507             : 
    2508         420 :       tolerance = 1.0e10_dp
    2509         420 :       sweeps = 0
    2510             : 
    2511             :       ! number of processor pairs and number of permutations
    2512         420 :       npair = (para_env%num_pe + 1)/2
    2513         420 :       nperm = para_env%num_pe - MOD(para_env%num_pe + 1, 2)
    2514        1260 :       ALLOCATE (list_pair(2, npair))
    2515             : 
    2516             :       ! initialize rotation matrix
    2517       87288 :       rotmat = 0.0_dp
    2518        2350 :       DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2519        1930 :          ii = i - ns_bound(para_env%mepos, 1) + 1
    2520        2350 :          rotmat(i, ii) = 1.0_dp
    2521             :       END DO
    2522             : 
    2523        1260 :       ALLOCATE (xyz_mix(dim2))
    2524         840 :       ALLOCATE (xyz_mix_ns(dim2))
    2525        1260 :       ALLOCATE (zdiag_me(dim2))
    2526         840 :       ALLOCATE (zdiag_all(dim2))
    2527             : 
    2528         420 :       ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
    2529         420 :       IF (ns_me /= 0) THEN
    2530        2065 :          ALLOCATE (c_array_me(nstate, ns_me, dim2))
    2531        1676 :          DO idim = 1, dim2
    2532        5465 :             ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
    2533             :          END DO
    2534        1652 :          ALLOCATE (gmat(nstate, ns_me))
    2535             :       END IF
    2536             : 
    2537        1704 :       DO idim = 1, dim2
    2538        3852 :          ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
    2539        7518 :          zdiag_me(idim)%c_array = CMPLX(0.0_dp, 0.0_dp, dp)
    2540        3852 :          ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
    2541       14172 :          zdiag_all(idim)%c_array = CMPLX(0.0_dp, 0.0_dp, dp)
    2542             :       END DO
    2543        1680 :       ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
    2544        1260 :       ALLOCATE (rmat_send(nblock_max*2, nblock_max))
    2545             : 
    2546             :       ! buffer for message passing
    2547        2100 :       ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
    2548             : 
    2549         420 :       IF (output_unit > 0) THEN
    2550         194 :          WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
    2551         194 :          WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
    2552             :       END IF
    2553             : 
    2554       87208 :       DO lsweep = 1, max_iter + 1
    2555       87208 :          sweeps = lsweep
    2556       87208 :          IF (sweeps == max_iter + 1) THEN
    2557         156 :             IF (output_unit > 0) THEN
    2558          62 :                WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
    2559          62 :                WRITE (output_unit, *) '               Present  Max. gradient = ', tolerance
    2560             :             END IF
    2561             :             EXIT
    2562             :          END IF
    2563       87052 :          t1 = m_walltime()
    2564             : 
    2565      174104 :          DO iperm = 1, nperm
    2566             : 
    2567             :             ! fix partners for this permutation, and get the number of states
    2568       87052 :             CALL eberlein(iperm, para_env, list_pair)
    2569       87052 :             ip_partner = -1
    2570       87052 :             ns_partner = 0
    2571       87052 :             DO ipair = 1, npair
    2572       87052 :                IF (list_pair(1, ipair) == para_env%mepos) THEN
    2573       43526 :                   ip_partner = list_pair(2, ipair)
    2574       43526 :                   EXIT
    2575       43526 :                ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
    2576       43526 :                   ip_partner = list_pair(1, ipair)
    2577       43526 :                   EXIT
    2578             :                END IF
    2579             :             END DO
    2580       87052 :             IF (ip_partner >= 0) THEN
    2581       87052 :                ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
    2582             :             ELSE
    2583             :                ns_partner = 0
    2584             :             END IF
    2585             : 
    2586             :             ! if there is a non-zero block connecting two partners, jacobi-sweep it.
    2587       87052 :             IF (ns_partner*ns_me /= 0) THEN
    2588             : 
    2589      348152 :                ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
    2590     5089598 :                rmat_loc = 0.0_dp
    2591      698582 :                DO i = 1, ns_me + ns_partner
    2592      698582 :                   rmat_loc(i, i) = 1.0_dp
    2593             :                END DO
    2594             : 
    2595      435190 :                ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
    2596             : 
    2597      349754 :                DO idim = 1, dim2
    2598     1050864 :                   ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
    2599     1276511 :                   DO i = 1, ns_me
    2600     7890792 :                      c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
    2601             :                   END DO
    2602             :                END DO
    2603             : 
    2604             :                CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
    2605       87038 :                                       msgout=c_array_partner, source=ip_partner)
    2606             : 
    2607       87038 :                n1 = ns_me
    2608       87038 :                n2 = ns_partner
    2609       87038 :                ilow1 = ns_bound(para_env%mepos, 1)
    2610       87038 :                iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
    2611       87038 :                ilow2 = ns_bound(ip_partner, 1)
    2612       87038 :                iup2 = ns_bound(ip_partner, 1) + n2 - 1
    2613       87038 :                IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
    2614       43519 :                   il1 = 1
    2615             :                   iu1 = n1
    2616       43519 :                   iu1 = n1
    2617       43519 :                   il2 = 1 + n1
    2618       43519 :                   iu2 = n1 + n2
    2619             :                ELSE
    2620       43519 :                   il1 = 1 + n2
    2621             :                   iu1 = n1 + n2
    2622       43519 :                   iu1 = n1 + n2
    2623       43519 :                   il2 = 1
    2624       43519 :                   iu2 = n2
    2625             :                END IF
    2626             : 
    2627      349754 :                DO idim = 1, dim2
    2628     1189473 :                   DO i = 1, n1
    2629     4337826 :                      xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
    2630     4479723 :                      xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
    2631             :                   END DO
    2632     1276511 :                   DO i = 1, n2
    2633     4337826 :                      xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
    2634     4479723 :                      xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
    2635             :                   END DO
    2636             :                END DO
    2637             : 
    2638      698582 :                DO istate = 1, n1 + n2
    2639     2588318 :                   DO jstate = istate + 1, n1 + n2
    2640     7664298 :                      DO idim = 1, dim2
    2641     5774562 :                         mii(idim) = xyz_mix(idim)%c_array(istate, istate)
    2642     5774562 :                         mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
    2643     7664298 :                         mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
    2644             :                      END DO
    2645     1889736 :                      CALL get_angle(mii, mjj, mij, weights, theta)
    2646     1889736 :                      st = SIN(theta)
    2647     1889736 :                      ct = COS(theta)
    2648     7664298 :                      DO idim = 1, dim2
    2649    51288858 :                         DO i = 1, n1 + n2
    2650    45514296 :                            zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
    2651    45514296 :                            zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
    2652    45514296 :                            xyz_mix(idim)%c_array(i, istate) = zi
    2653    51288858 :                            xyz_mix(idim)%c_array(i, jstate) = zj
    2654             :                         END DO
    2655    53178594 :                         DO i = 1, n1 + n2
    2656    45514296 :                            zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
    2657    45514296 :                            zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
    2658    45514296 :                            xyz_mix(idim)%c_array(istate, i) = zi
    2659    51288858 :                            xyz_mix(idim)%c_array(jstate, i) = zj
    2660             :                         END DO
    2661             :                      END DO
    2662             : 
    2663    17237808 :                      DO i = 1, n1 + n2
    2664    14736528 :                         ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
    2665    14736528 :                         rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
    2666    14736528 :                         rmat_loc(i, istate) = ri
    2667    16626264 :                         rmat_loc(i, jstate) = rj
    2668             :                      END DO
    2669             :                   END DO
    2670             :                END DO
    2671             : 
    2672       87038 :                k = nblock_max + 1
    2673             :                CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
    2674     5089598 :                                       rotmat(1:nstate, k:k + n2 - 1), ip_partner)
    2675             : 
    2676       87038 :                IF (ilow1 < ilow2) THEN
    2677             :                   ! no longer compiles in official sdgb:
    2678             :                   !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
    2679             :                   ! probably inefficient:
    2680             :                   CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
    2681     1467175 :                              n2, 0.0_dp, gmat(:, :), nstate)
    2682             :                   CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
    2683       43519 :                              n1 + n2, 1.0_dp, gmat(:, :), nstate)
    2684             :                ELSE
    2685             :                   CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
    2686       43519 :                              rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
    2687             :                   ! no longer compiles in official sdgb:
    2688             :                   !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
    2689             :                   ! probably inefficient:
    2690             :                   CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
    2691     1147207 :                              n1, 1.0_dp, gmat(:, :), nstate)
    2692             :                END IF
    2693             : 
    2694       87038 :                CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
    2695             : 
    2696      349754 :                DO idim = 1, dim2
    2697     1189473 :                   DO i = 1, n1
    2698     7890792 :                      xyz_mix_ns(idim)%c_array(1:nstate, i) = CMPLX(0.0_dp, 0.0_dp, dp)
    2699             :                   END DO
    2700             : 
    2701     1189473 :                   DO istate = 1, n1
    2702     7890792 :                      DO jstate = 1, nstate
    2703    33316224 :                         DO i = 1, n2
    2704             :                            xyz_mix_ns(idim)%c_array(jstate, istate) = &
    2705             :                               xyz_mix_ns(idim)%c_array(jstate, istate) + &
    2706    32389467 :                               c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
    2707             :                         END DO
    2708             :                      END DO
    2709             :                   END DO
    2710     1276511 :                   DO istate = 1, n1
    2711     7890792 :                      DO jstate = 1, nstate
    2712    34155543 :                         DO i = 1, n1
    2713             :                            xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
    2714    33228786 :                                                                  c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
    2715             :                         END DO
    2716             :                      END DO
    2717             :                   END DO
    2718             :                END DO ! idim
    2719             : 
    2720       87038 :                DEALLOCATE (c_array_partner)
    2721             : 
    2722             :             ELSE ! save my data
    2723          56 :                DO idim = 1, dim2
    2724          77 :                   DO i = 1, ns_me
    2725         105 :                      xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
    2726             :                   END DO
    2727             :                END DO
    2728             :             END IF
    2729             : 
    2730      349810 :             DO idim = 1, dim2
    2731     1276588 :                DO i = 1, ns_me
    2732     7890876 :                   cz_ij_loc(idim)%c_array(1:nstate, i) = CMPLX(0.0_dp, 0.0_dp, dp)
    2733             :                END DO
    2734             :             END DO
    2735             : 
    2736       87052 :             IF (ns_partner*ns_me /= 0) THEN
    2737             :                ! transpose rotation matrix rmat_loc
    2738      698582 :                DO i = 1, ns_me + ns_partner
    2739     2588318 :                   DO j = i + 1, ns_me + ns_partner
    2740     1889736 :                      ri = rmat_loc(i, j)
    2741     1889736 :                      rmat_loc(i, j) = rmat_loc(j, i)
    2742     2501280 :                      rmat_loc(j, i) = ri
    2743             :                   END DO
    2744             :                END DO
    2745             : 
    2746             :                ! prepare for distribution
    2747      392810 :                DO i = 1, n1
    2748     1510694 :                   rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
    2749             :                END DO
    2750       87038 :                ik = nblock_max
    2751      392810 :                DO i = 1, n2
    2752     1470434 :                   rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
    2753             :                END DO
    2754             :             ELSE
    2755          56 :                rmat_send = 0.0_dp
    2756             :             END IF
    2757             : 
    2758             :             ! collect data from all tasks (this takes some significant time)
    2759       87052 :             CALL para_env%allgather(rmat_send, rmat_recv_all)
    2760             : 
    2761             :             ! update blocks everywhere
    2762      261156 :             DO ip = 0, para_env%num_pe - 1
    2763             : 
    2764      174104 :                ip_recv_from = MOD(para_env%mepos - IP + para_env%num_pe, para_env%num_pe)
    2765     6456620 :                rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
    2766             : 
    2767      174104 :                ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
    2768             : 
    2769      261156 :                IF (ns_me /= 0) THEN
    2770      174090 :                   IF (ns_recv_from /= 0) THEN
    2771             :                      !look for the partner of ip_recv_from
    2772      174083 :                      ip_recv_partner = -1
    2773      174083 :                      ns_recv_partner = 0
    2774      174083 :                      DO ipair = 1, npair
    2775      174083 :                         IF (list_pair(1, ipair) == ip_recv_from) THEN
    2776       87045 :                            ip_recv_partner = list_pair(2, ipair)
    2777       87045 :                            EXIT
    2778       87038 :                         ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
    2779             :                            ip_recv_partner = list_pair(1, ipair)
    2780             :                            EXIT
    2781             :                         END IF
    2782             :                      END DO
    2783             : 
    2784      174083 :                      IF (ip_recv_partner >= 0) THEN
    2785      174083 :                         ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
    2786             :                      END IF
    2787      174083 :                      IF (ns_recv_partner > 0) THEN
    2788      174076 :                         il1 = ns_bound(para_env%mepos, 1)
    2789      174076 :                         il_recv = ns_bound(ip_recv_from, 1)
    2790      174076 :                         il_recv_partner = ns_bound(ip_recv_partner, 1)
    2791      174076 :                         ik = nblock_max
    2792             : 
    2793      699508 :                         DO idim = 1, dim2
    2794     2378946 :                            DO i = 1, ns_recv_from
    2795     1853514 :                               ii = il_recv + i - 1
    2796     9080265 :                               DO j = 1, ns_me
    2797    33228786 :                                  jj = j
    2798    35082300 :                                  DO k = 1, ns_recv_from
    2799    26527467 :                                     kk = il_recv + k - 1
    2800             :                                     cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
    2801    33228786 :                                                                       rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
    2802             :                                  END DO
    2803             :                               END DO
    2804             :                            END DO
    2805     2553022 :                            DO i = 1, ns_recv_from
    2806     1853514 :                               ii = il_recv + i - 1
    2807     9080265 :                               DO j = 1, ns_me
    2808    32389467 :                                  jj = j
    2809    34242981 :                                  DO k = 1, ns_recv_partner
    2810    25688148 :                                     kk = il_recv_partner + k - 1
    2811             :                                     cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
    2812    32389467 :                                                                       rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
    2813             :                                  END DO
    2814             :                               END DO
    2815             :                            END DO
    2816             :                         END DO ! idim
    2817             :                      ELSE
    2818           7 :                         il1 = ns_bound(para_env%mepos, 1)
    2819           7 :                         il_recv = ns_bound(ip_recv_from, 1)
    2820          28 :                         DO idim = 1, dim2
    2821          49 :                            DO j = 1, ns_me
    2822          42 :                               jj = j
    2823          63 :                               DO i = 1, ns_recv_from
    2824          21 :                                  ii = il_recv + i - 1
    2825          42 :                                  cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
    2826             :                               END DO
    2827             :                            END DO
    2828             :                         END DO ! idim
    2829             :                      END IF
    2830             :                   END IF
    2831             :                END IF ! ns_me
    2832             :             END DO ! ip
    2833             : 
    2834      174104 :             IF (ns_partner*ns_me /= 0) THEN
    2835       87038 :                DEALLOCATE (rmat_loc)
    2836      349754 :                DO idim = 1, dim2
    2837      349754 :                   DEALLOCATE (xyz_mix(idim)%c_array)
    2838             :                END DO
    2839             :             END IF
    2840             : 
    2841             :          END DO ! iperm
    2842             : 
    2843             :          ! calculate the max gradient
    2844      349810 :          DO idim = 1, dim2
    2845     1189536 :             DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2846      926778 :                ii = i - ns_bound(para_env%mepos, 1) + 1
    2847      926778 :                zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
    2848     1189536 :                zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
    2849             :             END DO
    2850      788274 :             rcount(:) = SIZE(zdiag_me(idim)%c_array)
    2851      262758 :             rdispl(1) = 0
    2852      525516 :             DO ip = 2, para_env%num_pe
    2853      525516 :                rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
    2854             :             END DO
    2855             :             ! collect all the diagonal elements in a replicated 1d array
    2856     3492664 :             CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
    2857             :          END DO
    2858             : 
    2859       87052 :          gmax = 0.0_dp
    2860      392831 :          DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2861      305779 :             k = j - ns_bound(para_env%mepos, 1) + 1
    2862     1337699 :             DO i = 1, j - 1
    2863             :                ! find the location of the diagonal element (i,i)
    2864     1088034 :                DO ip = 0, para_env%num_pe - 1
    2865     1088034 :                   IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
    2866             :                      ip_has_i = ip
    2867             :                      EXIT
    2868             :                   END IF
    2869             :                END DO
    2870      944868 :                ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
    2871             :                ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
    2872      944868 :                jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
    2873      944868 :                grad = 0.0_dp
    2874     3832149 :                DO idim = 1, dim2
    2875     2887281 :                   zi = zdiag_all(idim)%c_array(ii)
    2876     2887281 :                   zj = zdiag_all(idim)%c_array(jj)
    2877     3832149 :                   grad = grad + weights(idim)*REAL(4.0_dp*CONJG(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
    2878             :                END DO
    2879     1250647 :                gmax = MAX(gmax, ABS(grad))
    2880             :             END DO
    2881             :          END DO
    2882             : 
    2883       87052 :          CALL para_env%max(gmax)
    2884       87052 :          tolerance = gmax
    2885       87052 :          IF (PRESENT(tol_out)) tol_out = tolerance
    2886             : 
    2887       87052 :          func = 0.0_dp
    2888      392831 :          DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2889      305779 :             k = i - ns_bound(para_env%mepos, 1) + 1
    2890     1319609 :             DO idim = 1, dim2
    2891      926778 :                zr = REAL(cz_ij_loc(idim)%c_array(i, k), dp)
    2892      926778 :                zc = AIMAG(cz_ij_loc(idim)%c_array(i, k))
    2893     1232557 :                func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
    2894             :             END DO
    2895             :          END DO
    2896       87052 :          CALL para_env%sum(func)
    2897       87052 :          t2 = m_walltime()
    2898             : 
    2899       87052 :          IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
    2900         440 :             WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
    2901         440 :             CALL m_flush(output_unit)
    2902             :          END IF
    2903       87052 :          IF (PRESENT(eps_localization)) THEN
    2904       87020 :             IF (tolerance < eps_localization) EXIT
    2905             :          END IF
    2906       87052 :          IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
    2907       86756 :             CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
    2908       86756 :             IF (should_stop) EXIT
    2909             :          END IF
    2910             : 
    2911             :       END DO ! lsweep
    2912             : 
    2913             :       ! buffer for message passing
    2914         420 :       DEALLOCATE (rmat_recv_all)
    2915             : 
    2916         420 :       DEALLOCATE (rmat_recv)
    2917         420 :       DEALLOCATE (rmat_send)
    2918         420 :       IF (ns_me > 0) THEN
    2919         413 :          DEALLOCATE (c_array_me)
    2920             :       END IF
    2921        1704 :       DO idim = 1, dim2
    2922        1284 :          DEALLOCATE (zdiag_me(idim)%c_array)
    2923        1704 :          DEALLOCATE (zdiag_all(idim)%c_array)
    2924             :       END DO
    2925         420 :       DEALLOCATE (zdiag_me)
    2926         420 :       DEALLOCATE (zdiag_all)
    2927         420 :       DEALLOCATE (xyz_mix)
    2928        1704 :       DO idim = 1, dim2
    2929        1704 :          IF (ns_me /= 0) THEN
    2930        1263 :             DEALLOCATE (xyz_mix_ns(idim)%c_array)
    2931             :          END IF
    2932             :       END DO
    2933         420 :       DEALLOCATE (xyz_mix_ns)
    2934         420 :       IF (ns_me /= 0) THEN
    2935         413 :          DEALLOCATE (gmat)
    2936             :       END IF
    2937         420 :       DEALLOCATE (mii)
    2938         420 :       DEALLOCATE (mij)
    2939         420 :       DEALLOCATE (mjj)
    2940         420 :       DEALLOCATE (list_pair)
    2941             : 
    2942         840 :    END SUBROUTINE jacobi_rot_para_core
    2943             : 
    2944             : ! **************************************************************************************************
    2945             : !> \brief ...
    2946             : !> \param iperm ...
    2947             : !> \param para_env ...
    2948             : !> \param list_pair ...
    2949             : ! **************************************************************************************************
    2950       87052 :    SUBROUTINE eberlein(iperm, para_env, list_pair)
    2951             :       INTEGER, INTENT(IN)                                :: iperm
    2952             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2953             :       INTEGER, DIMENSION(:, :)                           :: list_pair
    2954             : 
    2955             :       INTEGER                                            :: i, ii, jj, npair
    2956             : 
    2957       87052 :       npair = (para_env%num_pe + 1)/2
    2958       87052 :       IF (iperm == 1) THEN
    2959             : !..set up initial ordering
    2960      261156 :          DO I = 0, para_env%num_pe - 1
    2961      174104 :             II = ((i + 1) + 1)/2
    2962      174104 :             JJ = MOD((i + 1) + 1, 2) + 1
    2963      261156 :             list_pair(JJ, II) = i
    2964             :          END DO
    2965       87052 :          IF (MOD(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
    2966           0 :       ELSEIF (MOD(iperm, 2) == 0) THEN
    2967             : !..a type shift
    2968           0 :          jj = list_pair(1, npair)
    2969           0 :          DO I = npair, 3, -1
    2970           0 :             list_pair(1, I) = list_pair(1, I - 1)
    2971             :          END DO
    2972           0 :          list_pair(1, 2) = list_pair(2, 1)
    2973           0 :          list_pair(2, 1) = jj
    2974             :       ELSE
    2975             : !..b type shift
    2976           0 :          jj = list_pair(2, 1)
    2977           0 :          DO I = 1, npair - 1
    2978           0 :             list_pair(2, I) = list_pair(2, I + 1)
    2979             :          END DO
    2980           0 :          list_pair(2, npair) = jj
    2981             :       END IF
    2982             : 
    2983       87052 :    END SUBROUTINE eberlein
    2984             : 
    2985             : ! **************************************************************************************************
    2986             : !> \brief ...
    2987             : !> \param vectors ...
    2988             : !> \param op_sm_set ...
    2989             : !> \param zij_fm_set ...
    2990             : ! **************************************************************************************************
    2991         488 :    SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
    2992             : 
    2993             :       TYPE(cp_fm_type), INTENT(IN)                       :: vectors
    2994             :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set
    2995             :       TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN)      :: zij_fm_set
    2996             : 
    2997             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'zij_matrix'
    2998             : 
    2999             :       INTEGER                                            :: handle, i, j, nao, nmoloc
    3000             :       TYPE(cp_fm_type)                                   :: opvec
    3001             : 
    3002         488 :       CALL timeset(routineN, handle)
    3003             : 
    3004             :       ! get rows and cols of the input
    3005         488 :       CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
    3006             :       ! replicate the input kind of matrix
    3007         488 :       CALL cp_fm_create(opvec, vectors%matrix_struct)
    3008             : 
    3009             :       ! Compute zij here
    3010        1988 :       DO i = 1, SIZE(zij_fm_set, 2)
    3011        4988 :          DO j = 1, SIZE(zij_fm_set, 1)
    3012        3000 :             CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
    3013        3000 :             CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
    3014             :             CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
    3015        4500 :                                zij_fm_set(j, i))
    3016             :          END DO
    3017             :       END DO
    3018             : 
    3019         488 :       CALL cp_fm_release(opvec)
    3020         488 :       CALL timestop(handle)
    3021             : 
    3022         488 :    END SUBROUTINE zij_matrix
    3023             : 
    3024             : ! **************************************************************************************************
    3025             : !> \brief ...
    3026             : !> \param vectors ...
    3027             : ! **************************************************************************************************
    3028          36 :    SUBROUTINE scdm_qrfact(vectors)
    3029             : 
    3030             :       TYPE(cp_fm_type), INTENT(IN)                       :: vectors
    3031             : 
    3032             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'scdm_qrfact'
    3033             : 
    3034             :       INTEGER                                            :: handle, ncolT, nrowT
    3035          36 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    3036             :       TYPE(cp_fm_struct_type), POINTER                   :: cstruct
    3037             :       TYPE(cp_fm_type)                                   :: CTp, Qf, tmp
    3038             : 
    3039          36 :       CALL timeset(routineN, handle)
    3040             : 
    3041             :       ! Create Transpose of Coefficient Matrix vectors
    3042          36 :       nrowT = vectors%matrix_struct%ncol_global
    3043          36 :       ncolT = vectors%matrix_struct%nrow_global
    3044             : 
    3045             :       CALL cp_fm_struct_create(cstruct, vectors%matrix_struct%para_env, &
    3046             :                                vectors%matrix_struct%context, nrowT, ncolT, vectors%matrix_struct%ncol_block, &
    3047          36 :                                vectors%matrix_struct%nrow_block, first_p_pos=vectors%matrix_struct%first_p_pos)
    3048          36 :       CALL cp_fm_create(CTp, cstruct)
    3049          36 :       CALL cp_fm_struct_release(cstruct)
    3050             : 
    3051         108 :       ALLOCATE (tau(nrowT))
    3052             : 
    3053          36 :       CALL cp_fm_transpose(vectors, CTp)
    3054             : 
    3055             :       ! Get QR decomposition of CTs
    3056          36 :       CALL cp_fm_pdgeqpf(CTp, tau, nrowT, ncolT, 1, 1)
    3057             : 
    3058             :       ! Construction of Q from the scalapack output
    3059             :       CALL cp_fm_struct_create(cstruct, para_env=CTp%matrix_struct%para_env, &
    3060             :                                context=CTp%matrix_struct%context, nrow_global=CTp%matrix_struct%nrow_global, &
    3061          36 :                                ncol_global=CTp%matrix_struct%nrow_global)
    3062          36 :       CALL cp_fm_create(Qf, cstruct)
    3063          36 :       CALL cp_fm_struct_release(cstruct)
    3064          36 :       CALL cp_fm_to_fm_submat(CTp, Qf, nrowT, nrowT, 1, 1, 1, 1)
    3065             : 
    3066             :       ! Get Q
    3067          36 :       CALL cp_fm_pdorgqr(Qf, tau, nrowT, 1, 1)
    3068             : 
    3069             :       ! Transform original coefficient matrix vectors
    3070          36 :       CALL cp_fm_create(tmp, vectors%matrix_struct)
    3071          36 :       CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
    3072          36 :       CALL cp_fm_to_fm(vectors, tmp)
    3073          36 :       CALL parallel_gemm('N', 'N', ncolT, nrowT, nrowT, 1.0_dp, tmp, Qf, 0.0_dp, vectors)
    3074             : 
    3075             :       ! Cleanup
    3076          36 :       CALL cp_fm_release(CTp)
    3077          36 :       CALL cp_fm_release(tmp)
    3078          36 :       CALL cp_fm_release(Qf)
    3079          36 :       DEALLOCATE (tau)
    3080             : 
    3081          36 :       CALL timestop(handle)
    3082             : 
    3083         144 :    END SUBROUTINE scdm_qrfact
    3084             : 
    3085           0 : END MODULE qs_localization_methods

Generated by: LCOV version 1.15