LCOV - code coverage report
Current view: top level - src - qs_localization_methods.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:06f838d) Lines: 86.4 % 1657 1432
Test Date: 2026-06-05 07:04:50 Functions: 80.0 % 30 24

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Localization methods such as 2x2 Jacobi rotations
      10              : !>                                   Steepest Decents
      11              : !>                                   Conjugate Gradient
      12              : !> \par History
      13              : !>      Initial parallellization of jacobi (JVDV 07.2003)
      14              : !>      direct minimization using exponential parametrization (JVDV 09.2003)
      15              : !>      crazy rotations go fast (JVDV 10.2003)
      16              : !> \author CJM (04.2003)
      17              : ! **************************************************************************************************
      18              : MODULE qs_localization_methods
      19              :    USE bibliography,                    ONLY: Schreder2024_2,&
      20              :                                               cite_reference
      21              :    USE cell_types,                      ONLY: cell_type
      22              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      23              :    USE cp_cfm_basic_linalg,             ONLY: cp_cfm_column_scale,&
      24              :                                               cp_cfm_gemm,&
      25              :                                               cp_cfm_rot_cols,&
      26              :                                               cp_cfm_rot_rows,&
      27              :                                               cp_cfm_scale,&
      28              :                                               cp_cfm_scale_and_add,&
      29              :                                               cp_cfm_schur_product,&
      30              :                                               cp_cfm_trace
      31              :    USE cp_cfm_diag,                     ONLY: cp_cfm_heevd
      32              :    USE cp_cfm_types,                    ONLY: &
      33              :         cp_cfm_create, cp_cfm_get_element, cp_cfm_get_info, cp_cfm_get_submatrix, cp_cfm_release, &
      34              :         cp_cfm_set_all, cp_cfm_set_element, cp_cfm_set_submatrix, cp_cfm_to_cfm, cp_cfm_to_fm, &
      35              :         cp_cfm_type, cp_fm_to_cfm
      36              :    USE cp_dbcsr_api,                    ONLY: dbcsr_p_type
      37              :    USE cp_dbcsr_operations,             ONLY: cp_dbcsr_sm_fm_multiply
      38              :    USE cp_external_control,             ONLY: external_control
      39              :    USE cp_fm_basic_linalg,              ONLY: cp_fm_frobenius_norm,&
      40              :                                               cp_fm_pdgeqpf,&
      41              :                                               cp_fm_pdorgqr,&
      42              :                                               cp_fm_scale,&
      43              :                                               cp_fm_scale_and_add,&
      44              :                                               cp_fm_trace,&
      45              :                                               cp_fm_transpose,&
      46              :                                               cp_fm_triangular_multiply
      47              :    USE cp_fm_cholesky,                  ONLY: cp_fm_cholesky_decompose
      48              :    USE cp_fm_diag,                      ONLY: choose_eigv_solver,&
      49              :                                               cp_fm_syevd
      50              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      51              :                                               cp_fm_struct_get,&
      52              :                                               cp_fm_struct_release,&
      53              :                                               cp_fm_struct_type
      54              :    USE cp_fm_types,                     ONLY: &
      55              :         cp_fm_create, cp_fm_get_element, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_init_random, &
      56              :         cp_fm_maxabsrownorm, cp_fm_maxabsval, cp_fm_release, cp_fm_set_all, cp_fm_set_submatrix, &
      57              :         cp_fm_to_fm, cp_fm_to_fm_submat, cp_fm_type
      58              :    USE cp_log_handling,                 ONLY: cp_logger_get_default_io_unit,&
      59              :                                               cp_logger_get_default_unit_nr
      60              :    USE kahan_sum,                       ONLY: accurate_sum
      61              :    USE kinds,                           ONLY: dp
      62              :    USE machine,                         ONLY: m_flush,&
      63              :                                               m_walltime
      64              :    USE mathconstants,                   ONLY: gaussi,&
      65              :                                               pi,&
      66              :                                               twopi,&
      67              :                                               z_one,&
      68              :                                               z_zero
      69              :    USE matrix_exp,                      ONLY: exp_pade_real,&
      70              :                                               get_nsquare_norder
      71              :    USE message_passing,                 ONLY: mp_para_env_type
      72              :    USE parallel_gemm_api,               ONLY: parallel_gemm
      73              : #include "./base/base_uses.f90"
      74              : 
      75              :    IMPLICIT NONE
      76              :    PUBLIC :: initialize_weights, crazy_rotations, &
      77              :              direct_mini, rotate_orbitals, approx_l1_norm_sd, jacobi_rotations, scdm_qrfact, zij_matrix, &
      78              :              jacobi_cg_edf_ls, cardoso_souloumiac, cardoso_souloumiac_pipek
      79              : 
      80              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
      81              : 
      82              :    PRIVATE
      83              : 
      84              :    TYPE set_c_1d_type
      85              :       COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array => NULL()
      86              :    END TYPE set_c_1d_type
      87              : 
      88              :    TYPE set_c_2d_type
      89              :       COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array => NULL()
      90              :    END TYPE set_c_2d_type
      91              : 
      92              : CONTAINS
      93              : ! **************************************************************************************************
      94              : !> \brief ...
      95              : !> \param C ...
      96              : !> \param iterations ...
      97              : !> \param eps ...
      98              : !> \param converged ...
      99              : !> \param sweeps ...
     100              : ! **************************************************************************************************
     101          210 :    SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
     102              :       TYPE(cp_fm_type), INTENT(IN)                       :: C
     103              :       INTEGER, INTENT(IN)                                :: iterations
     104              :       REAL(KIND=dp), INTENT(IN)                          :: eps
     105              :       LOGICAL, INTENT(INOUT)                             :: converged
     106              :       INTEGER, INTENT(INOUT)                             :: sweeps
     107              : 
     108              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'approx_l1_norm_sd'
     109              :       INTEGER, PARAMETER                                 :: taylor_order = 100
     110              :       REAL(KIND=dp), PARAMETER                           :: alpha = 0.1_dp, f2_eps = 0.01_dp
     111              : 
     112              :       INTEGER                                            :: handle, i, istep, k, n, ncol_local, &
     113              :                                                             nrow_local, output_unit, p
     114              :       REAL(KIND=dp)                                      :: expfactor, f2, f2old, gnorm, tnorm
     115              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     116              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_k_k
     117              :       TYPE(cp_fm_type)                                   :: CTmp, G, Gp1, Gp2, U
     118              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     119              : 
     120           30 :       CALL timeset(routineN, handle)
     121              : 
     122           30 :       NULLIFY (context, para_env, fm_struct_k_k)
     123              : 
     124           30 :       output_unit = cp_logger_get_default_io_unit()
     125              : 
     126              :       CALL cp_fm_struct_get(C%matrix_struct, nrow_global=n, ncol_global=k, &
     127              :                             nrow_local=nrow_local, ncol_local=ncol_local, &
     128           30 :                             para_env=para_env, context=context)
     129              :       CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
     130           30 :                                nrow_global=k, ncol_global=k)
     131           30 :       CALL cp_fm_create(CTmp, C%matrix_struct)
     132           30 :       CALL cp_fm_create(U, fm_struct_k_k)
     133           30 :       CALL cp_fm_create(G, fm_struct_k_k)
     134           30 :       CALL cp_fm_create(Gp1, fm_struct_k_k)
     135           30 :       CALL cp_fm_create(Gp2, fm_struct_k_k)
     136              :       !
     137              :       ! printing
     138           30 :       IF (output_unit > 0) THEN
     139           15 :          WRITE (output_unit, '(1X)')
     140           15 :          WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
     141           15 :          WRITE (output_unit, '(A,I5)') '      Nbr iterations =', iterations
     142           15 :          WRITE (output_unit, '(A,E10.2)') '     eps convergence =', eps
     143           15 :          WRITE (output_unit, '(A,I5)') '    Max Taylor order =', taylor_order
     144           15 :          WRITE (output_unit, '(A,E10.2)') '              f2 eps =', f2_eps
     145           15 :          WRITE (output_unit, '(A,E10.2)') '               alpha =', alpha
     146           15 :          WRITE (output_unit, '(A)') '     iteration    approx_l1_norm    g_norm   rel_err'
     147              :       END IF
     148              :       !
     149           30 :       f2old = 0.0_dp
     150           30 :       converged = .FALSE.
     151              :       !
     152              :       ! Start the steepest descent
     153         1480 :       DO istep = 1, iterations
     154              :          !
     155              :          !-------------------------------------------------------------------
     156              :          ! compute f_2
     157              :          ! f_2(x)=(x^2+eps)^1/2
     158         1470 :          f2 = 0.0_dp
     159        22748 :          DO p = 1, ncol_local ! p
     160      1100956 :             DO i = 1, nrow_local ! i
     161      1099486 :                f2 = f2 + SQRT(C%local_data(i, p)**2 + f2_eps)
     162              :             END DO
     163              :          END DO
     164         1470 :          CALL C%matrix_struct%para_env%sum(f2)
     165              :          !write(*,*) 'qs_localize: f_2=',f2
     166              :          !-------------------------------------------------------------------
     167              :          ! compute the derivative of f_2
     168              :          ! f_2(x)=(x^2+eps)^1/2
     169        22748 :          DO p = 1, ncol_local ! p
     170      1100956 :             DO i = 1, nrow_local ! i
     171      1099486 :                CTmp%local_data(i, p) = C%local_data(i, p)/SQRT(C%local_data(i, p)**2 + f2_eps)
     172              :             END DO
     173              :          END DO
     174         1470 :          CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, CTmp, C, 0.0_dp, G)
     175              :          ! antisymmetrize
     176         1470 :          CALL cp_fm_transpose(G, U)
     177         1470 :          CALL cp_fm_scale_and_add(-0.5_dp, G, 0.5_dp, U)
     178              :          !
     179              :          !-------------------------------------------------------------------
     180              :          !
     181         1470 :          gnorm = cp_fm_frobenius_norm(G)
     182              :          !write(*,*) 'qs_localize: norm(G)=',gnorm
     183              :          !
     184              :          ! rescale for steepest descent
     185         1470 :          CALL cp_fm_scale(-alpha, G)
     186              :          !
     187              :          ! compute unitary transform
     188              :          ! zeroth order
     189         1470 :          CALL cp_fm_set_all(U, 0.0_dp, 1.0_dp)
     190              :          ! first order
     191         1470 :          expfactor = 1.0_dp
     192         1470 :          CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, G)
     193         1470 :          tnorm = cp_fm_frobenius_norm(G)
     194              :          !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
     195         1470 :          IF (tnorm > 1.0E-10_dp) THEN
     196              :             ! other orders
     197         1470 :             CALL cp_fm_to_fm(G, Gp1)
     198         4866 :             DO i = 2, taylor_order
     199              :                ! new power of G
     200         4866 :                CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, G, Gp1, 0.0_dp, Gp2)
     201         4866 :                CALL cp_fm_to_fm(Gp2, Gp1)
     202              :                ! add to the taylor expansion so far
     203         4866 :                expfactor = expfactor/REAL(i, KIND=dp)
     204         4866 :                CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, Gp1)
     205         4866 :                tnorm = cp_fm_frobenius_norm(Gp1)
     206              :                !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
     207         4866 :                IF (tnorm*expfactor < 1.0E-10_dp) EXIT
     208              :             END DO
     209              :          END IF
     210              :          !
     211              :          ! incrementaly rotate the MOs
     212         1470 :          CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, C, U, 0.0_dp, CTmp)
     213         1470 :          CALL cp_fm_to_fm(CTmp, C)
     214              :          !
     215              :          ! printing
     216         1470 :          IF (output_unit > 0) THEN
     217          735 :             WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, ABS((f2 - f2old)/f2)
     218              :          END IF
     219              :          !
     220              :          ! Are we done?
     221         1470 :          sweeps = istep
     222              :          !IF(gnorm<=grad_thresh.AND.ABS((f2-f2old)/f2)<=f2_thresh.AND.istep>1) THEN
     223         1470 :          IF (ABS((f2 - f2old)/f2) <= eps .AND. istep > 1) THEN
     224           20 :             converged = .TRUE.
     225           20 :             EXIT
     226              :          END IF
     227         1460 :          f2old = f2
     228              :       END DO
     229              :       !
     230              :       ! here we should do one refine step to enforce C'*S*C=1 for any case
     231              :       !
     232              :       ! Print the final result
     233           30 :       IF (output_unit > 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
     234              :       !
     235              :       ! sparsity
     236              :       !DO i=1,size(thresh,1)
     237              :       !   gnorm = 0.0_dp
     238              :       !   DO o=1,ncol_local
     239              :       !      DO p=1,nrow_local
     240              :       !         IF(ABS(C%local_data(p,o))>thresh(i)) THEN
     241              :       !            gnorm = gnorm + 1.0_dp
     242              :       !         ENDIF
     243              :       !      ENDDO
     244              :       !   ENDDO
     245              :       !   CALL C%matrix_struct%para_env%sum(gnorm)
     246              :       !   IF(output_unit>0) THEN
     247              :       !      WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
     248              :       !   ENDIF
     249              :       !ENDDO
     250              :       !
     251              :       ! deallocate
     252           30 :       CALL cp_fm_struct_release(fm_struct_k_k)
     253           30 :       CALL cp_fm_release(CTmp)
     254           30 :       CALL cp_fm_release(U)
     255           30 :       CALL cp_fm_release(G)
     256           30 :       CALL cp_fm_release(Gp1)
     257           30 :       CALL cp_fm_release(Gp2)
     258              : 
     259           30 :       CALL timestop(handle)
     260              : 
     261           30 :    END SUBROUTINE approx_l1_norm_sd
     262              : ! **************************************************************************************************
     263              : !> \brief ...
     264              : !> \param cell ...
     265              : !> \param weights ...
     266              : ! **************************************************************************************************
     267          504 :    SUBROUTINE initialize_weights(cell, weights)
     268              : 
     269              :       TYPE(cell_type), POINTER                           :: cell
     270              :       REAL(KIND=dp), DIMENSION(:)                        :: weights
     271              : 
     272              :       REAL(KIND=dp), DIMENSION(3, 3)                     :: metric
     273              : 
     274          504 :       CPASSERT(ASSOCIATED(cell))
     275              : 
     276          504 :       metric = 0.0_dp
     277          504 :       CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
     278              : 
     279          504 :       weights(1) = METRIC(1, 1) - METRIC(1, 2) - METRIC(1, 3)
     280          504 :       weights(2) = METRIC(2, 2) - METRIC(1, 2) - METRIC(2, 3)
     281          504 :       weights(3) = METRIC(3, 3) - METRIC(1, 3) - METRIC(2, 3)
     282          504 :       weights(4) = METRIC(1, 2)
     283          504 :       weights(5) = METRIC(1, 3)
     284          504 :       weights(6) = METRIC(2, 3)
     285              : 
     286          504 :    END SUBROUTINE initialize_weights
     287              : 
     288              : ! **************************************************************************************************
     289              : !> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
     290              : !>        can deal with serial para_envs.
     291              : !> \param weights ...
     292              : !> \param zij ...
     293              : !> \param vectors ...
     294              : !> \param para_env ...
     295              : !> \param max_iter ...
     296              : !> \param eps_localization ...
     297              : !> \param sweeps ...
     298              : !> \param out_each ...
     299              : !> \param target_time ...
     300              : !> \param start_time ...
     301              : !> \param restricted ...
     302              : !> \par History
     303              : !> \author Joost VandeVondele (02.2010)
     304              : ! **************************************************************************************************
     305          390 :    SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
     306              :                                eps_localization, sweeps, out_each, target_time, start_time, restricted)
     307              : 
     308              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     309              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
     310              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     311              :       INTEGER, INTENT(IN)                                :: max_iter
     312              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
     313              :       INTEGER                                            :: sweeps
     314              :       INTEGER, INTENT(IN)                                :: out_each
     315              :       REAL(dp)                                           :: target_time, start_time
     316              :       INTEGER                                            :: restricted
     317              : 
     318          390 :       IF (para_env%num_pe == 1) THEN
     319              :          CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
     320            0 :                                       sweeps, out_each, restricted=restricted)
     321              :       ELSE
     322              :          CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
     323          390 :                               sweeps, out_each, target_time, start_time, restricted=restricted)
     324              :       END IF
     325              : 
     326          390 :    END SUBROUTINE jacobi_rotations
     327              : 
     328              : ! **************************************************************************************************
     329              : !> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
     330              : !>        while the routine below works in parallel, it is too slow to be useful
     331              : !> \param weights ...
     332              : !> \param zij ...
     333              : !> \param vectors ...
     334              : !> \param max_iter ...
     335              : !> \param eps_localization ...
     336              : !> \param sweeps ...
     337              : !> \param out_each ...
     338              : !> \param restricted ...
     339              : ! **************************************************************************************************
     340            0 :    SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
     341              :                                       out_each, restricted)
     342              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     343              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
     344              :       INTEGER, INTENT(IN)                                :: max_iter
     345              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
     346              :       INTEGER                                            :: sweeps
     347              :       INTEGER, INTENT(IN)                                :: out_each
     348              :       INTEGER                                            :: restricted
     349              : 
     350              :       CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial'
     351              : 
     352              :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
     353              :       INTEGER                                            :: dim2, handle, idim, istate, jstate, &
     354              :                                                             nstate, unit_nr
     355              :       REAL(KIND=dp)                                      :: ct, st, t1, t2, theta, tolerance
     356              :       TYPE(cp_cfm_type)                                  :: c_rmat
     357              :       TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:)       :: c_zij
     358              :       TYPE(cp_fm_type)                                   :: rmat
     359              : 
     360            0 :       CALL timeset(routineN, handle)
     361              : 
     362            0 :       dim2 = SIZE(zij, 2)
     363            0 :       ALLOCATE (c_zij(dim2))
     364              :       NULLIFY (mii, mij, mjj)
     365            0 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
     366              : 
     367            0 :       CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
     368            0 :       CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
     369              : 
     370            0 :       CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
     371            0 :       CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
     372            0 :       DO idim = 1, dim2
     373            0 :          CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
     374              :          c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
     375            0 :                                         zij(2, idim)%local_data, dp)
     376              :       END DO
     377              : 
     378            0 :       CALL cp_fm_get_info(rmat, nrow_global=nstate)
     379            0 :       tolerance = 1.0e10_dp
     380              : 
     381            0 :       sweeps = 0
     382            0 :       unit_nr = -1
     383            0 :       IF (rmat%matrix_struct%para_env%is_source()) THEN
     384            0 :          unit_nr = cp_logger_get_default_unit_nr()
     385            0 :          WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
     386              :       END IF
     387              : 
     388            0 :       IF (restricted > 0) THEN
     389            0 :          unit_nr = cp_logger_get_default_unit_nr()
     390            0 :          WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
     391            0 :          nstate = nstate - restricted
     392              :       END IF
     393              : 
     394              :       ! do jacobi sweeps until converged
     395            0 :       DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
     396            0 :          sweeps = sweeps + 1
     397            0 :          t1 = m_walltime()
     398              : 
     399            0 :          DO istate = 1, nstate
     400            0 :             DO jstate = istate + 1, nstate
     401            0 :                DO idim = 1, dim2
     402            0 :                   CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
     403            0 :                   CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
     404            0 :                   CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
     405              :                END DO
     406            0 :                CALL get_angle(mii, mjj, mij, weights, theta)
     407            0 :                st = SIN(theta)
     408            0 :                ct = COS(theta)
     409            0 :                CALL rotate_zij(istate, jstate, st, ct, c_zij)
     410              : 
     411            0 :                CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
     412              :             END DO
     413              :          END DO
     414              : 
     415            0 :          CALL check_tolerance(c_zij, weights, tolerance)
     416              : 
     417            0 :          t2 = m_walltime()
     418            0 :          IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
     419              :             WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
     420            0 :                "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
     421            0 :             CALL m_flush(unit_nr)
     422              :          END IF
     423              : 
     424              :       END DO
     425              : 
     426            0 :       DO idim = 1, dim2
     427            0 :          zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
     428            0 :          zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
     429            0 :          CALL cp_cfm_release(c_zij(idim))
     430              :       END DO
     431            0 :       DEALLOCATE (c_zij)
     432            0 :       DEALLOCATE (mii, mij, mjj)
     433            0 :       rmat%local_data = REAL(c_rmat%local_data, dp)
     434              : 
     435            0 :       CALL rotate_orbitals(rmat, vectors)
     436              : 
     437            0 :       CALL cp_cfm_release(c_rmat)
     438            0 :       CALL cp_fm_release(rmat)
     439              : 
     440            0 :       CALL timestop(handle)
     441              : 
     442            0 :    END SUBROUTINE jacobi_rotations_serial
     443              : ! **************************************************************************************************
     444              : !> \brief very similar to jacobi_rotations_serial with some extra output options
     445              : !> \param weights ...
     446              : !> \param c_zij ...
     447              : !> \param max_iter ...
     448              : !> \param c_rmat ...
     449              : !> \param eps_localization ...
     450              : !> \param tol_out ...
     451              : !> \param jsweeps ...
     452              : !> \param out_each ...
     453              : !> \param c_zij_out ...
     454              : !> \param grad_final ...
     455              : ! **************************************************************************************************
     456            0 :    SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
     457            0 :                                         tol_out, jsweeps, out_each, c_zij_out, grad_final)
     458              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     459              :       TYPE(cp_cfm_type), INTENT(IN)                      :: c_zij(:)
     460              :       INTEGER, INTENT(IN)                                :: max_iter
     461              :       TYPE(cp_cfm_type), INTENT(IN)                      :: c_rmat
     462              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: eps_localization
     463              :       REAL(KIND=dp), INTENT(OUT), OPTIONAL               :: tol_out
     464              :       INTEGER, INTENT(OUT), OPTIONAL                     :: jsweeps
     465              :       INTEGER, INTENT(IN), OPTIONAL                      :: out_each
     466              :       TYPE(cp_cfm_type), INTENT(IN), OPTIONAL            :: c_zij_out(:)
     467              :       TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER   :: grad_final
     468              : 
     469              :       CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial_1'
     470              : 
     471              :       COMPLEX(KIND=dp)                                   :: mzii
     472              :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
     473              :       INTEGER                                            :: dim2, handle, idim, istate, jstate, &
     474              :                                                             nstate, sweeps, unit_nr
     475              :       REAL(KIND=dp)                                      :: alpha, avg_spread_ii, ct, spread_ii, st, &
     476              :                                                             sum_spread_ii, t1, t2, theta, tolerance
     477              :       TYPE(cp_cfm_type)                                  :: c_rmat_local
     478              :       TYPE(cp_cfm_type), ALLOCATABLE                     :: c_zij_local(:)
     479              : 
     480            0 :       CALL timeset(routineN, handle)
     481              : 
     482            0 :       dim2 = SIZE(c_zij)
     483              :       NULLIFY (mii, mij, mjj)
     484            0 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
     485              : 
     486            0 :       ALLOCATE (c_zij_local(dim2))
     487            0 :       CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
     488            0 :       CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
     489            0 :       DO idim = 1, dim2
     490            0 :          CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
     491            0 :          c_zij_local(idim)%local_data = c_zij(idim)%local_data
     492              :       END DO
     493              : 
     494            0 :       CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
     495            0 :       tolerance = 1.0e10_dp
     496              : 
     497            0 :       IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
     498              : 
     499            0 :       sweeps = 0
     500            0 :       IF (PRESENT(out_each)) THEN
     501            0 :          unit_nr = -1
     502            0 :          IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
     503            0 :             unit_nr = cp_logger_get_default_unit_nr()
     504              :          END IF
     505            0 :          alpha = 0.0_dp
     506            0 :          DO idim = 1, dim2
     507            0 :             alpha = alpha + weights(idim)
     508              :          END DO
     509              :       END IF
     510              : 
     511              :       ! do jacobi sweeps until converged
     512            0 :       DO WHILE (sweeps < max_iter)
     513            0 :          sweeps = sweeps + 1
     514            0 :          IF (PRESENT(eps_localization)) THEN
     515            0 :             IF (tolerance < eps_localization) EXIT
     516              :          END IF
     517            0 :          IF (PRESENT(out_each)) t1 = m_walltime()
     518              : 
     519            0 :          DO istate = 1, nstate
     520            0 :             DO jstate = istate + 1, nstate
     521            0 :                DO idim = 1, dim2
     522            0 :                   CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
     523            0 :                   CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
     524            0 :                   CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
     525              :                END DO
     526            0 :                CALL get_angle(mii, mjj, mij, weights, theta)
     527            0 :                st = SIN(theta)
     528            0 :                ct = COS(theta)
     529            0 :                CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
     530              : 
     531            0 :                CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
     532              :             END DO
     533              :          END DO
     534              : 
     535            0 :          IF (PRESENT(grad_final)) THEN
     536            0 :             CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
     537              :          ELSE
     538            0 :             CALL check_tolerance(c_zij_local, weights, tolerance)
     539              :          END IF
     540            0 :          IF (PRESENT(tol_out)) tol_out = tolerance
     541              : 
     542            0 :          IF (PRESENT(out_each)) THEN
     543            0 :             t2 = m_walltime()
     544            0 :             IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
     545            0 :                sum_spread_ii = 0.0_dp
     546            0 :                DO istate = 1, nstate
     547              :                   spread_ii = 0.0_dp
     548            0 :                   DO idim = 1, dim2
     549            0 :                      CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
     550              :                      spread_ii = spread_ii + weights(idim)* &
     551            0 :                                  ABS(mzii)**2/twopi/twopi
     552              :                   END DO
     553            0 :                   sum_spread_ii = sum_spread_ii + spread_ii
     554              :                END DO
     555            0 :                sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
     556            0 :                avg_spread_ii = sum_spread_ii/nstate
     557              :                WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
     558            0 :                   "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
     559              :                WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
     560            0 :                   sweeps, avg_spread_ii, tolerance, t2 - t1
     561            0 :                CALL m_flush(unit_nr)
     562              :             END IF
     563            0 :             IF (PRESENT(jsweeps)) jsweeps = sweeps
     564              :          END IF
     565              : 
     566              :       END DO
     567              : 
     568            0 :       IF (PRESENT(c_zij_out)) THEN
     569            0 :          DO idim = 1, dim2
     570            0 :             CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
     571              :          END DO
     572              :       END IF
     573            0 :       CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
     574              : 
     575            0 :       DEALLOCATE (mii, mij, mjj)
     576            0 :       DO idim = 1, dim2
     577            0 :          CALL cp_cfm_release(c_zij_local(idim))
     578              :       END DO
     579            0 :       DEALLOCATE (c_zij_local)
     580            0 :       CALL cp_cfm_release(c_rmat_local)
     581              : 
     582            0 :       CALL timestop(handle)
     583              : 
     584            0 :    END SUBROUTINE jacobi_rotations_serial_1
     585              : ! **************************************************************************************************
     586              : !> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
     587              : !>        for partially occupied wannier functions
     588              : !> \param para_env ...
     589              : !> \param weights ...
     590              : !> \param zij ...
     591              : !> \param vectors ...
     592              : !> \param max_iter ...
     593              : !> \param eps_localization ...
     594              : !> \param iter ...
     595              : !> \param out_each ...
     596              : !> \param nextra ...
     597              : !> \param do_cg ...
     598              : !> \param nmo ...
     599              : !> \param vectors_2 ...
     600              : !> \param mos_guess ...
     601              : ! **************************************************************************************************
     602            2 :    SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
     603              :                                iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
     604              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     605              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
     606              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
     607              :       INTEGER, INTENT(IN)                                :: max_iter
     608              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
     609              :       INTEGER                                            :: iter
     610              :       INTEGER, INTENT(IN)                                :: out_each, nextra
     611              :       LOGICAL, INTENT(IN)                                :: do_cg
     612              :       INTEGER, INTENT(IN), OPTIONAL                      :: nmo
     613              :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: vectors_2, mos_guess
     614              : 
     615              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'jacobi_cg_edf_ls'
     616              :       COMPLEX(KIND=dp), PARAMETER                        :: cone = (1.0_dp, 0.0_dp), &
     617              :                                                             czero = (0.0_dp, 0.0_dp)
     618              :       REAL(KIND=dp), PARAMETER                           :: gold_sec = 0.3819_dp
     619              : 
     620              :       COMPLEX(KIND=dp)                                   :: cnorm2_Gct, cnorm2_Gct_cross, mzii
     621            2 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :)     :: tmp_cmat
     622            2 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: arr_zii
     623            2 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: matrix_zii
     624              :       INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
     625              :          lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
     626              :       INTEGER, DIMENSION(1)                              :: iloc
     627              :       LOGICAL                                            :: do_cinit_mo, do_cinit_random, &
     628              :                                                             do_U_guess_mo, new_direction
     629              :       REAL(KIND=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_Gct, &
     630              :          norm2_Gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
     631              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: sum_spread
     632            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: tmp_mat, tmp_mat_1
     633              :       REAL(KIND=dp), DIMENSION(50)                       :: energy, pos
     634            2 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tmp_arr
     635              :       TYPE(cp_blacs_env_type), POINTER                   :: context
     636              :       TYPE(cp_cfm_type)                                  :: c_tilde, ctrans_lambda, Gct_old, &
     637              :                                                             grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
     638              :                                                             tmp_cfm_2, U, UL, V, VL, zdiag
     639            2 :       TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:)       :: c_zij, zij_0
     640              :       TYPE(cp_fm_struct_type), POINTER                   :: tmp_fm_struct
     641              :       TYPE(cp_fm_type)                                   :: id_nextra, matrix_U, matrix_V, &
     642              :                                                             matrix_V_all, rmat, tmp_fm, vectors_all
     643              : 
     644            2 :       CALL timeset(routineN, handle)
     645              : 
     646            2 :       dim2 = SIZE(zij, 2)
     647            2 :       NULLIFY (context)
     648            2 :       NULLIFY (matrix_zii, arr_zii)
     649            2 :       NULLIFY (tmp_fm_struct)
     650            2 :       NULLIFY (tmp_arr)
     651              : 
     652           12 :       ALLOCATE (c_zij(dim2))
     653              : 
     654            2 :       CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
     655              : 
     656            6 :       ALLOCATE (sum_spread(nstate))
     657            8 :       ALLOCATE (matrix_zii(nstate, dim2))
     658          266 :       matrix_zii = czero
     659            2 :       sum_spread = 0.0_dp
     660              : 
     661            2 :       alpha = 0.0_dp
     662            8 :       DO idim = 1, dim2
     663            6 :          alpha = alpha + weights(idim)
     664            6 :          CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
     665              :          c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
     666         5813 :                                         zij(2, idim)%local_data, dp)
     667              :       END DO
     668              : 
     669           10 :       ALLOCATE (zij_0(dim2))
     670              : 
     671            2 :       CALL cp_cfm_create(U, zij(1, 1)%matrix_struct)
     672            2 :       CALL cp_fm_create(matrix_U, zij(1, 1)%matrix_struct)
     673              : 
     674            2 :       CALL cp_cfm_set_all(U, czero, cone)
     675            2 :       CALL cp_fm_set_all(matrix_U, 0.0_dp, 1.0_dp)
     676              : 
     677            2 :       CALL cp_fm_get_info(vectors, nrow_global=nao)
     678            2 :       IF (nextra > 0) THEN
     679            2 :          IF (PRESENT(mos_guess)) THEN
     680            2 :             do_cinit_random = .FALSE.
     681            2 :             do_cinit_mo = .TRUE.
     682            2 :             CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
     683              :          ELSE
     684            0 :             do_cinit_random = .TRUE.
     685            0 :             do_cinit_mo = .FALSE.
     686            0 :             ndummy = nstate
     687              :          END IF
     688              : 
     689              :          IF (do_cinit_random) THEN
     690              :             icinit = 1
     691              :             do_U_guess_mo = .FALSE.
     692              :          ELSEIF (do_cinit_mo) THEN
     693            2 :             icinit = 2
     694            2 :             do_U_guess_mo = .TRUE.
     695              :          END IF
     696              : 
     697            2 :          nocc = nstate - nextra
     698            2 :          northo = nmo - nocc
     699            2 :          norextra = nmo - nstate
     700            2 :          CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
     701              : 
     702            8 :          ALLOCATE (tmp_cmat(nstate, nstate))
     703              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
     704            2 :                                   para_env=para_env, context=context)
     705            8 :          DO idim = 1, dim2
     706            6 :             CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
     707            6 :             CALL cp_cfm_set_all(zij_0(idim), czero, cone)
     708            6 :             CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
     709            8 :             CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
     710              :          END DO
     711            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     712            2 :          DEALLOCATE (tmp_cmat)
     713              : 
     714              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
     715            2 :                                   para_env=para_env, context=context)
     716            2 :          CALL cp_cfm_create(V, tmp_fm_struct)
     717            2 :          CALL cp_fm_create(matrix_V, tmp_fm_struct)
     718            2 :          CALL cp_cfm_create(zdiag, tmp_fm_struct)
     719            2 :          CALL cp_fm_create(rmat, tmp_fm_struct)
     720            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     721            2 :          CALL cp_cfm_set_all(V, czero, cone)
     722            2 :          CALL cp_fm_set_all(matrix_V, 0.0_dp, 1.0_dp)
     723              : 
     724              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
     725            2 :                                   para_env=para_env, context=context)
     726            2 :          CALL cp_fm_create(matrix_V_all, tmp_fm_struct)
     727            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     728            2 :          CALL cp_fm_set_all(matrix_V_all, 0._dp, 1._dp)
     729              : 
     730            6 :          ALLOCATE (arr_zii(nstate))
     731              : 
     732              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
     733            2 :                                   para_env=para_env, context=context)
     734            2 :          CALL cp_cfm_create(c_tilde, tmp_fm_struct)
     735            2 :          CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
     736            2 :          CALL cp_cfm_create(Gct_old, tmp_fm_struct)
     737            2 :          CALL cp_cfm_create(skc, tmp_fm_struct)
     738            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     739            2 :          CALL cp_cfm_set_all(c_tilde, czero)
     740            2 :          CALL cp_cfm_set_all(Gct_old, czero)
     741            2 :          CALL cp_cfm_set_all(skc, czero)
     742              : 
     743              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
     744            2 :                                   para_env=para_env, context=context)
     745            2 :          CALL cp_cfm_create(VL, tmp_fm_struct)
     746            2 :          CALL cp_cfm_set_all(VL, czero)
     747            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     748              : 
     749              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
     750            2 :                                   para_env=para_env, context=context)
     751            2 :          CALL cp_fm_create(id_nextra, tmp_fm_struct)
     752            2 :          CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
     753            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     754            2 :          CALL cp_cfm_set_all(ctrans_lambda, czero)
     755            2 :          CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
     756              : 
     757              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
     758            2 :                                   para_env=para_env, context=context)
     759            2 :          CALL cp_cfm_create(UL, tmp_fm_struct)
     760            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     761            2 :          CALL cp_cfm_set_all(UL, czero)
     762              : 
     763              :          CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
     764            2 :                                   para_env=para_env, context=context)
     765            2 :          CALL cp_fm_create(vectors_all, tmp_fm_struct)
     766            2 :          CALL cp_fm_struct_release(tmp_fm_struct)
     767            8 :          ALLOCATE (tmp_mat(nao, nstate))
     768            2 :          CALL cp_fm_get_submatrix(vectors, tmp_mat)
     769            2 :          CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
     770            2 :          DEALLOCATE (tmp_mat)
     771            8 :          ALLOCATE (tmp_mat(nao, norextra))
     772            2 :          CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
     773            2 :          CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
     774            2 :          DEALLOCATE (tmp_mat)
     775              : 
     776              :          ! initialize c_tilde
     777           14 :          SELECT CASE (icinit)
     778              :          CASE (1) ! random coefficients
     779              :             !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
     780            0 :             CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
     781            0 :             CALL cp_fm_init_random(tmp_fm, nextra)
     782            0 :             CALL ortho_vectors(tmp_fm)
     783            0 :             c_tilde%local_data = tmp_fm%local_data
     784            0 :             CALL cp_fm_release(tmp_fm)
     785            0 :             ALLOCATE (tmp_cmat(northo, nextra))
     786            0 :             CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
     787            0 :             CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
     788            0 :             DEALLOCATE (tmp_cmat)
     789              :          CASE (2) ! MO based coeffs
     790            2 :             CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_V_all)
     791            6 :             ALLOCATE (tmp_arr(nmo))
     792            8 :             ALLOCATE (tmp_mat(nmo, ndummy))
     793            8 :             ALLOCATE (tmp_mat_1(nmo, nstate))
     794              :             ! normalize matrix_V_all
     795            2 :             CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat)
     796           88 :             DO istate = 1, ndummy
     797         4558 :                tmp_arr(:) = tmp_mat(:, istate)
     798         4558 :                norm = NORM2(tmp_arr)
     799         4558 :                tmp_arr(:) = tmp_arr(:)/norm
     800         4560 :                tmp_mat(:, istate) = tmp_arr(:)
     801              :             END DO
     802            2 :             CALL cp_fm_set_submatrix(matrix_V_all, tmp_mat)
     803            2 :             CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat_1, 1, 1, nmo, nstate)
     804            2 :             CALL cp_fm_set_submatrix(matrix_V, tmp_mat_1)
     805            2 :             DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
     806            2 :             CALL cp_fm_to_cfm(msourcer=matrix_V, mtarget=V)
     807            8 :             ALLOCATE (tmp_mat(northo, ndummy))
     808            8 :             ALLOCATE (tmp_mat_1(northo, nextra))
     809            2 :             CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat, nocc + 1, 1, northo, ndummy)
     810            6 :             ALLOCATE (tmp_arr(ndummy))
     811           88 :             tmp_arr = 0.0_dp
     812           88 :             DO istate = 1, ndummy
     813         1034 :                tmp_arr(istate) = NORM2(tmp_mat(:, istate))
     814              :             END DO
     815              :             ! find edfs
     816            6 :             DO istate = 1, nextra
     817          180 :                iloc = MAXLOC(tmp_arr)
     818           48 :                tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
     819            6 :                tmp_arr(iloc(1)) = 0.0_dp
     820              :             END DO
     821              : 
     822            2 :             DEALLOCATE (tmp_arr, tmp_mat)
     823              : 
     824              :             CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
     825            2 :                                      para_env=para_env, context=context)
     826            2 :             CALL cp_fm_create(tmp_fm, tmp_fm_struct)
     827            2 :             CALL cp_fm_struct_release(tmp_fm_struct)
     828            2 :             CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
     829            2 :             DEALLOCATE (tmp_mat_1)
     830            2 :             CALL ortho_vectors(tmp_fm)
     831            2 :             CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
     832            2 :             CALL cp_fm_release(tmp_fm)
     833              :             ! initialize U
     834            2 :             IF (do_U_guess_mo) THEN
     835            8 :                ALLOCATE (tmp_cmat(nocc, nstate))
     836            2 :                CALL cp_cfm_get_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
     837            2 :                CALL cp_cfm_set_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
     838            2 :                DEALLOCATE (tmp_cmat)
     839            8 :                ALLOCATE (tmp_cmat(northo, nstate))
     840            2 :                CALL cp_cfm_get_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
     841            2 :                CALL cp_cfm_set_submatrix(VL, tmp_cmat, 1, 1, northo, nstate)
     842            2 :                DEALLOCATE (tmp_cmat)
     843            2 :                CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, VL, czero, UL)
     844            8 :                ALLOCATE (tmp_cmat(nextra, nstate))
     845            2 :                CALL cp_cfm_get_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
     846            2 :                CALL cp_cfm_set_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
     847            2 :                DEALLOCATE (tmp_cmat)
     848            2 :                CALL cp_fm_create(tmp_fm, U%matrix_struct)
     849         1937 :                tmp_fm%local_data = REAL(U%local_data, KIND=dp)
     850            2 :                CALL ortho_vectors(tmp_fm)
     851            2 :                CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=U)
     852            2 :                CALL cp_fm_release(tmp_fm)
     853            4 :                CALL cp_cfm_to_fm(U, matrix_U)
     854              :             END IF
     855              :             ! reevaluate V
     856            8 :             ALLOCATE (tmp_cmat(nocc, nstate))
     857            2 :             CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
     858            2 :             CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
     859            2 :             DEALLOCATE (tmp_cmat)
     860            8 :             ALLOCATE (tmp_cmat(nextra, nstate))
     861            2 :             CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
     862            2 :             CALL cp_cfm_set_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
     863            2 :             DEALLOCATE (tmp_cmat)
     864            2 :             CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
     865            8 :             ALLOCATE (tmp_cmat(northo, nstate))
     866            2 :             CALL cp_cfm_get_submatrix(VL, tmp_cmat)
     867            2 :             CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
     868            6 :             DEALLOCATE (tmp_cmat)
     869              :          END SELECT
     870              :       ELSE
     871            0 :          DO idim = 1, dim2
     872            0 :             CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
     873            0 :             CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
     874              :          END DO
     875            0 :          CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
     876            0 :          CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
     877              :       END IF
     878              : 
     879            2 :       unit_nr = -1
     880            2 :       IF (rmat%matrix_struct%para_env%is_source()) THEN
     881            1 :          unit_nr = cp_logger_get_default_unit_nr()
     882            1 :          WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
     883              :       END IF
     884              : 
     885            2 :       norm2_old = 1.0E30_dp
     886            2 :       ds_min = 1.0_dp
     887            2 :       new_direction = .TRUE.
     888            2 :       iter = 0
     889            2 :       line_searches = 0
     890            2 :       line_search_count = 0
     891            2 :       tol = 1.0E+20_dp
     892            2 :       mintol = 1.0E+10_dp
     893            2 :       miniter = 0
     894              : 
     895              :       !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
     896              :       !                            do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
     897              : 
     898              :       ! do conjugate gradient until converged
     899           34 :       DO WHILE (iter < max_iter)
     900           34 :          iter = iter + 1
     901              :          !WRITE(*,*) 'iter = ', iter
     902           34 :          t1 = m_walltime()
     903              : 
     904           34 :          IF (iter > 1) THEN
     905              :             ! comput U
     906           32 :             CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
     907           32 :             CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
     908           32 :             IF (para_env%num_pe == 1) THEN
     909            0 :                CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
     910              :             ELSE
     911           32 :                CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
     912              :             END IF
     913           32 :             CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, U, tmp_cfm_2, czero, tmp_cfm)
     914           32 :             CALL cp_cfm_to_cfm(tmp_cfm, U)
     915           32 :             CALL cp_cfm_release(tmp_cfm)
     916           32 :             CALL cp_cfm_release(tmp_cfm_2)
     917              :          END IF
     918              : 
     919           34 :          IF (nextra > 0) THEN
     920          136 :             ALLOCATE (tmp_cmat(nextra, nstate))
     921           34 :             CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
     922           34 :             CALL cp_cfm_set_submatrix(UL, tmp_cmat)
     923           34 :             DEALLOCATE (tmp_cmat)
     924           34 :             IF (iter > 1) THEN
     925              :                ! orthonormalize c_tilde
     926           32 :                CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
     927          448 :                tmp_fm%local_data = REAL(c_tilde%local_data, KIND=dp)
     928           32 :                CALL ortho_vectors(tmp_fm)
     929           32 :                CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
     930           32 :                CALL cp_fm_release(tmp_fm)
     931              : 
     932          128 :                ALLOCATE (tmp_cmat(nocc, nstate))
     933           32 :                CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
     934           32 :                CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
     935           32 :                DEALLOCATE (tmp_cmat)
     936           32 :                CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
     937          128 :                ALLOCATE (tmp_cmat(northo, nstate))
     938           32 :                CALL cp_cfm_get_submatrix(VL, tmp_cmat)
     939           32 :                CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
     940           64 :                DEALLOCATE (tmp_cmat)
     941              :             END IF
     942              : 
     943              :             ! reset if new_direction
     944           34 :             IF (new_direction .AND. MOD(line_searches, 20) == 5) THEN
     945            0 :                CALL cp_cfm_set_all(skc, czero)
     946            0 :                CALL cp_cfm_set_all(Gct_old, czero)
     947            0 :                norm2_old = 1.0E30_dp
     948              :             END IF
     949              : 
     950           34 :             CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
     951           34 :             CALL cp_cfm_to_cfm(V, tmp_cfm)
     952           34 :             CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
     953           68 :             ndummy = nmo
     954              :          ELSE
     955            0 :             CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
     956            0 :             CALL cp_cfm_to_cfm(U, tmp_cfm)
     957            0 :             CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
     958            0 :             ndummy = nstate
     959              :          END IF
     960              :          ! update z_ij
     961          136 :          DO idim = 1, dim2
     962              :             ! 'tmp_cfm_1 = zij_0*tmp_cfm'
     963              :             CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
     964          102 :                                tmp_cfm, czero, tmp_cfm_1)
     965              :             ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
     966              :             CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
     967          136 :                                czero, c_zij(idim))
     968              :          END DO
     969           34 :          CALL cp_cfm_release(tmp_cfm)
     970           34 :          CALL cp_cfm_release(tmp_cfm_1)
     971              :          ! compute spread
     972         1496 :          DO istate = 1, nstate
     973         1462 :             spread_ii = 0.0_dp
     974         5848 :             DO idim = 1, dim2
     975         4386 :                CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
     976              :                spread_ii = spread_ii + weights(idim)* &
     977         4386 :                            ABS(mzii)**2/twopi/twopi
     978         5848 :                matrix_zii(istate, idim) = mzii
     979              :             END DO
     980              :             !WRITE(*,*) 'spread_ii', spread_ii
     981         1496 :             sum_spread(istate) = spread_ii
     982              :          END DO
     983           34 :          CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
     984           34 :          spread_sum = accurate_sum(sum_spread)
     985              : 
     986           34 :          IF (nextra > 0) THEN
     987              :             ! update c_tilde
     988           34 :             CALL cp_cfm_set_all(zdiag, czero)
     989           34 :             CALL cp_cfm_set_all(grad_ctilde, czero)
     990           34 :             CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
     991           34 :             CALL cp_cfm_set_all(tmp_cfm, czero)
     992           34 :             CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
     993           34 :             CALL cp_cfm_set_all(tmp_cfm_1, czero)
     994          136 :             ALLOCATE (tmp_cmat(northo, nstate))
     995          136 :             DO idim = 1, dim2
     996          102 :                weight = weights(idim)
     997         8874 :                arr_zii = matrix_zii(:, idim)
     998              :                ! tmp_cfm = zij_0*V
     999              :                CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
    1000          102 :                                   zij_0(idim), V, czero, tmp_cfm)
    1001              :                ! tmp_cfm = tmp_cfm*diag_zij_dagg
    1002         4488 :                CALL cp_cfm_column_scale(tmp_cfm, CONJG(arr_zii))
    1003              :                ! tmp_cfm_1 = tmp_cfm*U_dagg
    1004              :                CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
    1005          102 :                                   U, czero, tmp_cfm_1)
    1006          102 :                CALL cp_cfm_scale(weight, tmp_cfm_1)
    1007              :                ! zdiag = zdiag + tmp_cfm_1'
    1008          102 :                CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
    1009              : 
    1010              :                ! tmp_cfm = zij_0_dagg*V
    1011              :                CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
    1012          102 :                                   zij_0(idim), V, czero, tmp_cfm)
    1013              : 
    1014              :                ! tmp_cfm = tmp_cfm*diag_zij
    1015          102 :                CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
    1016              :                ! tmp_cfm_1 = tmp_cfm*U_dagg
    1017              :                CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
    1018          102 :                                   U, czero, tmp_cfm_1)
    1019          102 :                CALL cp_cfm_scale(weight, tmp_cfm_1)
    1020              :                ! zdiag = zdiag + tmp_cfm_1'
    1021          136 :                CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
    1022              :             END DO ! idim
    1023           34 :             CALL cp_cfm_release(tmp_cfm)
    1024           34 :             CALL cp_cfm_release(tmp_cfm_1)
    1025           34 :             DEALLOCATE (tmp_cmat)
    1026          136 :             ALLOCATE (tmp_cmat(northo, nextra))
    1027              :             CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
    1028           34 :                                       northo, nextra, .FALSE.)
    1029              :             ! 'grad_ctilde'
    1030           34 :             CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
    1031           34 :             DEALLOCATE (tmp_cmat)
    1032              :             ! ctrans_lambda = c_tilde_dagg*grad_ctilde
    1033           34 :             CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
    1034              :             !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
    1035              :             ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
    1036          102 :             CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
    1037              :          END IF ! nextra > 0
    1038              : 
    1039              :          ! tolerance
    1040           34 :          IF (nextra > 0) THEN
    1041              :             tolc = 0.0_dp
    1042           34 :             CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
    1043           34 :             CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
    1044           34 :             CALL cp_fm_maxabsval(tmp_fm, tolc)
    1045           34 :             CALL cp_fm_release(tmp_fm)
    1046              :             !WRITE(*,*) 'tolc = ', tolc
    1047           34 :             tol = tol + tolc
    1048              :          END IF
    1049              :          !WRITE(*,*) 'tol = ', tol
    1050              : 
    1051           36 :          IF (nextra > 0) THEN
    1052              :             !WRITE(*,*) 'new_direction: ', new_direction
    1053           34 :             IF (new_direction) THEN
    1054            6 :                line_searches = line_searches + 1
    1055            6 :                IF (mintol > tol) THEN
    1056            4 :                   mintol = tol
    1057            4 :                   miniter = iter
    1058              :                END IF
    1059              : 
    1060            6 :                IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
    1061            0 :                   sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
    1062            0 :                   avg_spread_ii = sum_spread_ii/nstate
    1063              :                   WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
    1064            0 :                      "Iteration", "Avg. Spread_ii", "Tolerance"
    1065              :                   WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
    1066            0 :                      iter, avg_spread_ii, tol
    1067            0 :                   CALL m_flush(unit_nr)
    1068              :                END IF
    1069            6 :                IF (tol < eps_localization) EXIT
    1070              : 
    1071            4 :                IF (do_cg) THEN
    1072              :                   cnorm2_Gct = czero
    1073              :                   cnorm2_Gct_cross = czero
    1074            4 :                   CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct_cross)
    1075            4 :                   norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
    1076           56 :                   Gct_old%local_data = grad_ctilde%local_data
    1077            4 :                   CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct)
    1078            4 :                   norm2_Gct = REAL(cnorm2_Gct, KIND=dp)
    1079              :                   ! compute beta_pr
    1080            4 :                   beta_pr = (norm2_Gct - norm2_Gct_cross)/norm2_old
    1081            4 :                   norm2_old = norm2_Gct
    1082            4 :                   beta = MAX(0.0_dp, beta_pr)
    1083              :                   !WRITE(*,*) 'beta = ', beta
    1084              :                   ! compute skc / ska = beta * skc / ska + grad_ctilde / G
    1085            4 :                   CALL cp_cfm_scale(beta, skc)
    1086            4 :                   CALL cp_cfm_scale_and_add(cone, skc, cone, Gct_old)
    1087            4 :                   CALL cp_cfm_trace(skc, Gct_old, cnorm2_Gct_cross)
    1088            4 :                   norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
    1089            4 :                   IF (norm2_Gct_cross <= 0.0_dp) THEN ! back to steepest ascent
    1090            0 :                      CALL cp_cfm_scale_and_add(czero, skc, cone, Gct_old)
    1091              :                   END IF
    1092              :                ELSE
    1093            0 :                   CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
    1094              :                END IF
    1095              :                line_search_count = 0
    1096              :             END IF
    1097              : 
    1098           32 :             line_search_count = line_search_count + 1
    1099              :             !WRITE(*,*) 'line_search_count = ', line_search_count
    1100           32 :             energy(line_search_count) = spread_sum
    1101              : 
    1102              :             ! gold line search
    1103           32 :             new_direction = .FALSE.
    1104           32 :             IF (line_search_count == 1) THEN
    1105            4 :                lsl = 1
    1106            4 :                lsr = 0
    1107            4 :                lsm = 1
    1108            4 :                pos(1) = 0.0_dp
    1109            4 :                pos(2) = ds_min/gold_sec
    1110            4 :                ds = pos(2)
    1111              :             ELSE
    1112           28 :                IF (line_search_count == 50) THEN
    1113            0 :                   IF (ABS(energy(line_search_count) - energy(line_search_count - 1)) < 1.0E-4_dp) THEN
    1114            0 :                      CPWARN("Line search failed to converge properly")
    1115            0 :                      ds_min = 0.1_dp
    1116            0 :                      new_direction = .TRUE.
    1117              :                      ds = pos(line_search_count)
    1118            0 :                      line_search_count = 0
    1119              :                   ELSE
    1120            0 :                      CPABORT("No. of line searches exceeds 50")
    1121              :                   END IF
    1122              :                ELSE
    1123           28 :                   IF (lsr == 0) THEN
    1124           28 :                      IF (energy(line_search_count - 1) > energy(line_search_count)) THEN
    1125            0 :                         lsr = line_search_count
    1126            0 :                         pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
    1127              :                      ELSE
    1128           28 :                         lsl = lsm
    1129           28 :                         lsm = line_search_count
    1130           28 :                         pos(line_search_count + 1) = pos(line_search_count)/gold_sec
    1131              :                      END IF
    1132              :                   ELSE
    1133            0 :                      IF (pos(line_search_count) < pos(lsm)) THEN
    1134            0 :                         IF (energy(line_search_count) > energy(lsm)) THEN
    1135              :                            lsr = lsm
    1136              :                            lsm = line_search_count
    1137              :                         ELSE
    1138            0 :                            lsl = line_search_count
    1139              :                         END IF
    1140              :                      ELSE
    1141            0 :                         IF (energy(line_search_count) > energy(lsm)) THEN
    1142              :                            lsl = lsm
    1143              :                            lsm = line_search_count
    1144              :                         ELSE
    1145            0 :                            lsr = line_search_count
    1146              :                         END IF
    1147              :                      END IF
    1148            0 :                      IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
    1149            0 :                         pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
    1150              :                      ELSE
    1151            0 :                         pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
    1152              :                      END IF
    1153            0 :                      IF ((pos(lsr) - pos(lsl)) < 1.0E-3_dp*pos(lsr)) THEN
    1154            0 :                         new_direction = .TRUE.
    1155              :                      END IF
    1156              :                   END IF ! lsr .eq. 0
    1157              :                END IF ! line_search_count .eq. 50
    1158              :                ! now go to the suggested point
    1159           28 :                ds = pos(line_search_count + 1) - pos(line_search_count)
    1160              :                !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
    1161           28 :                IF ((ABS(ds) < 1.0E-10_dp) .AND. (lsl == 1)) THEN
    1162            0 :                   new_direction = .TRUE.
    1163            0 :                   ds_min = 0.5_dp/alpha
    1164           28 :                ELSEIF (ABS(ds) > 10.0_dp) THEN
    1165            4 :                   new_direction = .TRUE.
    1166            4 :                   ds_min = 0.5_dp/alpha
    1167              :                ELSE
    1168              :                   ds_min = pos(line_search_count + 1)
    1169              :                END IF
    1170              :             END IF ! first step
    1171              :             ! 'c_tilde = c_tilde + d*skc'
    1172           32 :             CALL cp_cfm_scale(ds, skc)
    1173           32 :             CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
    1174              :          ELSE
    1175            0 :             IF (mintol > tol) THEN
    1176            0 :                mintol = tol
    1177            0 :                miniter = iter
    1178              :             END IF
    1179            0 :             IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
    1180            0 :                sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
    1181            0 :                avg_spread_ii = sum_spread_ii/nstate
    1182              :                WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
    1183            0 :                   "Iteration", "Avg. Spread_ii", "Tolerance"
    1184              :                WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
    1185            0 :                   iter, avg_spread_ii, tol
    1186            0 :                CALL m_flush(unit_nr)
    1187              :             END IF
    1188            0 :             IF (tol < eps_localization) EXIT
    1189              :          END IF ! nextra > 0
    1190              : 
    1191              :       END DO ! iteration
    1192              : 
    1193            2 :       IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
    1194            0 :          WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
    1195            0 :          WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
    1196            0 :          CALL m_flush(unit_nr)
    1197              :       END IF
    1198              : 
    1199            2 :       CALL cp_cfm_to_fm(U, matrix_U)
    1200              : 
    1201            2 :       IF (nextra > 0) THEN
    1202         2324 :          rmat%local_data = REAL(V%local_data, KIND=dp)
    1203            2 :          CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
    1204              : 
    1205            2 :          CALL cp_cfm_release(c_tilde)
    1206            2 :          CALL cp_cfm_release(grad_ctilde)
    1207            2 :          CALL cp_cfm_release(Gct_old)
    1208            2 :          CALL cp_cfm_release(skc)
    1209            2 :          CALL cp_cfm_release(UL)
    1210            2 :          CALL cp_cfm_release(zdiag)
    1211            2 :          CALL cp_cfm_release(ctrans_lambda)
    1212            2 :          CALL cp_fm_release(id_nextra)
    1213            2 :          CALL cp_fm_release(vectors_all)
    1214            2 :          CALL cp_cfm_release(V)
    1215            2 :          CALL cp_fm_release(matrix_V)
    1216            2 :          CALL cp_fm_release(matrix_V_all)
    1217            2 :          CALL cp_cfm_release(VL)
    1218            2 :          DEALLOCATE (arr_zii)
    1219              :       ELSE
    1220            0 :          rmat%local_data = matrix_U%local_data
    1221            0 :          CALL rotate_orbitals(rmat, vectors)
    1222              :       END IF
    1223            8 :       DO idim = 1, dim2
    1224            8 :          CALL cp_cfm_release(zij_0(idim))
    1225              :       END DO
    1226            2 :       DEALLOCATE (zij_0)
    1227              : 
    1228            8 :       DO idim = 1, dim2
    1229         5811 :          zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
    1230         5811 :          zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
    1231            8 :          CALL cp_cfm_release(c_zij(idim))
    1232              :       END DO
    1233            2 :       DEALLOCATE (c_zij)
    1234            2 :       CALL cp_fm_release(rmat)
    1235            2 :       CALL cp_cfm_release(U)
    1236            2 :       CALL cp_fm_release(matrix_U)
    1237            2 :       DEALLOCATE (matrix_zii, sum_spread)
    1238              : 
    1239            2 :       CALL timestop(handle)
    1240              : 
    1241           10 :    END SUBROUTINE jacobi_cg_edf_ls
    1242              : 
    1243              : ! **************************************************************************************************
    1244              : !> \brief ...
    1245              : !> \param vmatrix ...
    1246              : ! **************************************************************************************************
    1247          108 :    SUBROUTINE ortho_vectors(vmatrix)
    1248              : 
    1249              :       TYPE(cp_fm_type), INTENT(IN)                       :: vmatrix
    1250              : 
    1251              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'ortho_vectors'
    1252              : 
    1253              :       INTEGER                                            :: handle, n, ncol
    1254              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
    1255              :       TYPE(cp_fm_type)                                   :: overlap_vv
    1256              : 
    1257           36 :       CALL timeset(routineN, handle)
    1258              : 
    1259           36 :       NULLIFY (fm_struct_tmp)
    1260              : 
    1261           36 :       CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
    1262              : 
    1263              :       CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
    1264              :                                para_env=vmatrix%matrix_struct%para_env, &
    1265           36 :                                context=vmatrix%matrix_struct%context)
    1266           36 :       CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
    1267           36 :       CALL cp_fm_struct_release(fm_struct_tmp)
    1268              : 
    1269           36 :       CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
    1270           36 :       CALL cp_fm_cholesky_decompose(overlap_vv)
    1271           36 :       CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.TRUE.)
    1272              : 
    1273           36 :       CALL cp_fm_release(overlap_vv)
    1274              : 
    1275           36 :       CALL timestop(handle)
    1276              : 
    1277           36 :    END SUBROUTINE ortho_vectors
    1278              : 
    1279              : ! **************************************************************************************************
    1280              : !> \brief ...
    1281              : !> \param istate ...
    1282              : !> \param jstate ...
    1283              : !> \param st ...
    1284              : !> \param ct ...
    1285              : !> \param zij ...
    1286              : ! **************************************************************************************************
    1287            0 :    SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
    1288              :       INTEGER, INTENT(IN)                                :: istate, jstate
    1289              :       REAL(KIND=dp), INTENT(IN)                          :: st, ct
    1290              :       TYPE(cp_cfm_type)                                  :: zij(:)
    1291              : 
    1292              :       INTEGER                                            :: id
    1293              : 
    1294              : ! Locals
    1295              : 
    1296            0 :       DO id = 1, SIZE(zij, 1)
    1297            0 :          CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
    1298            0 :          CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
    1299              :       END DO
    1300              : 
    1301            0 :    END SUBROUTINE rotate_zij
    1302              : ! **************************************************************************************************
    1303              : !> \brief ...
    1304              : !> \param istate ...
    1305              : !> \param jstate ...
    1306              : !> \param st ...
    1307              : !> \param ct ...
    1308              : !> \param rmat ...
    1309              : ! **************************************************************************************************
    1310            0 :    SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
    1311              :       INTEGER, INTENT(IN)                                :: istate, jstate
    1312              :       REAL(KIND=dp), INTENT(IN)                          :: st, ct
    1313              :       TYPE(cp_cfm_type), INTENT(IN)                      :: rmat
    1314              : 
    1315            0 :       CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
    1316              : 
    1317            0 :    END SUBROUTINE rotate_rmat
    1318              : ! **************************************************************************************************
    1319              : !> \brief ...
    1320              : !> \param mii ...
    1321              : !> \param mjj ...
    1322              : !> \param mij ...
    1323              : !> \param weights ...
    1324              : !> \param theta ...
    1325              : !> \param grad_ij ...
    1326              : !> \param step ...
    1327              : ! **************************************************************************************************
    1328      2326264 :    SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
    1329              :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mjj(:), mij(:)
    1330              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1331              :       REAL(KIND=dp), INTENT(OUT)                         :: theta
    1332              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: grad_ij, step
    1333              : 
    1334              :       COMPLEX(KIND=dp)                                   :: z11, z12, z22
    1335              :       INTEGER                                            :: dim_m, idim
    1336              :       REAL(KIND=dp)                                      :: a12, b12, d2, ratio
    1337              : 
    1338      2326264 :       a12 = 0.0_dp
    1339      2326264 :       b12 = 0.0_dp
    1340      2326264 :       dim_m = SIZE(mii)
    1341      9411550 :       DO idim = 1, dim_m
    1342      7085286 :          z11 = mii(idim)
    1343      7085286 :          z22 = mjj(idim)
    1344      7085286 :          z12 = mij(idim)
    1345      7085286 :          a12 = a12 + weights(idim)*REAL(CONJG(z12)*(z11 - z22), KIND=dp)
    1346              :          b12 = b12 + weights(idim)*REAL((z12*CONJG(z12) - &
    1347      9411550 :                                          0.25_dp*(z11 - z22)*(CONJG(z11) - CONJG(z22))), KIND=dp)
    1348              :       END DO
    1349      2326264 :       IF (ABS(b12) > 1.e-10_dp) THEN
    1350      2326264 :          ratio = -a12/b12
    1351      2326264 :          theta = 0.25_dp*ATAN(ratio)
    1352            0 :       ELSEIF (ABS(b12) < 1.e-10_dp) THEN
    1353            0 :          b12 = 0.0_dp
    1354            0 :          theta = 0.0_dp
    1355              :       ELSE
    1356            0 :          theta = 0.25_dp*pi
    1357              :       END IF
    1358      2326264 :       IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
    1359              : ! Check second derivative info
    1360      2326264 :       d2 = a12*SIN(4._dp*theta) - b12*COS(4._dp*theta)
    1361      2326264 :       IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
    1362         3262 :          IF (theta > 0.0_dp) THEN ! make theta as small as possible
    1363         1619 :             theta = theta - 0.25_dp*pi
    1364              :          ELSE
    1365         1643 :             theta = theta + 0.25_dp*pi
    1366              :          END IF
    1367              :       END IF
    1368      2326264 :    END SUBROUTINE get_angle
    1369              : ! **************************************************************************************************
    1370              : !> \brief ...
    1371              : !> \param zij ...
    1372              : !> \param weights ...
    1373              : !> \param tolerance ...
    1374              : !> \param grad ...
    1375              : ! **************************************************************************************************
    1376          104 :    SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
    1377              :       TYPE(cp_cfm_type)                                  :: zij(:)
    1378              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1379              :       REAL(KIND=dp), INTENT(OUT)                         :: tolerance
    1380              :       TYPE(cp_fm_type), INTENT(OUT), OPTIONAL            :: grad
    1381              : 
    1382              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'check_tolerance'
    1383              : 
    1384              :       INTEGER                                            :: handle
    1385              :       TYPE(cp_fm_type)                                   :: force
    1386              : 
    1387          104 :       CALL timeset(routineN, handle)
    1388              : 
    1389              : ! compute gradient at t=0
    1390              : 
    1391          104 :       CALL cp_fm_create(force, zij(1)%matrix_struct)
    1392          104 :       CALL cp_fm_set_all(force, 0._dp)
    1393          104 :       CALL grad_at_0(zij, weights, force)
    1394          104 :       CALL cp_fm_maxabsval(force, tolerance)
    1395          104 :       IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
    1396          104 :       CALL cp_fm_release(force)
    1397              : 
    1398          104 :       CALL timestop(handle)
    1399              : 
    1400          104 :    END SUBROUTINE check_tolerance
    1401              : 
    1402              : ! **************************************************************************************************
    1403              : !> \brief ...
    1404              : !> \param rmat ...
    1405              : !> \param vectors ...
    1406              : ! **************************************************************************************************
    1407         1128 :    SUBROUTINE rotate_orbitals(rmat, vectors)
    1408              :       TYPE(cp_fm_type), INTENT(IN)                       :: rmat, vectors
    1409              : 
    1410              :       INTEGER                                            :: k, n
    1411              :       TYPE(cp_fm_type)                                   :: wf
    1412              : 
    1413          564 :       CALL cp_fm_create(wf, vectors%matrix_struct)
    1414          564 :       CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
    1415          564 :       CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
    1416          564 :       CALL cp_fm_to_fm(wf, vectors)
    1417          564 :       CALL cp_fm_release(wf)
    1418          564 :    END SUBROUTINE rotate_orbitals
    1419              : 
    1420              : ! **************************************************************************************************
    1421              : !> \brief ...
    1422              : !> \param rmat ...
    1423              : !> \param vectors ...
    1424              : ! **************************************************************************************************
    1425           24 :    SUBROUTINE rotate_orbitals_cfm(rmat, vectors)
    1426              :       TYPE(cp_cfm_type), INTENT(IN)                      :: rmat, vectors
    1427              : 
    1428              :       INTEGER                                            :: k, n
    1429              :       TYPE(cp_cfm_type)                                  :: wf
    1430              : 
    1431           12 :       CALL cp_cfm_create(wf, vectors%matrix_struct)
    1432           12 :       CALL cp_cfm_get_info(vectors, nrow_global=n, ncol_global=k)
    1433           12 :       CALL parallel_gemm("N", "N", n, k, k, z_one, vectors, rmat, z_zero, wf)
    1434           12 :       CALL cp_cfm_to_cfm(wf, vectors)
    1435           12 :       CALL cp_cfm_release(wf)
    1436           12 :    END SUBROUTINE rotate_orbitals_cfm
    1437              : 
    1438              : ! **************************************************************************************************
    1439              : !> \brief ...
    1440              : !> \param rmat ...
    1441              : !> \param vec_all ...
    1442              : !> \param vectors ...
    1443              : ! **************************************************************************************************
    1444            6 :    SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
    1445              :       TYPE(cp_fm_type), INTENT(IN)                       :: rmat, vec_all, vectors
    1446              : 
    1447              :       INTEGER                                            :: k, l, n
    1448              :       TYPE(cp_fm_type)                                   :: wf
    1449              : 
    1450            2 :       CALL cp_fm_create(wf, vectors%matrix_struct)
    1451            2 :       CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
    1452            2 :       CALL cp_fm_get_info(rmat, ncol_global=l)
    1453              : 
    1454            2 :       CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
    1455            2 :       CALL cp_fm_to_fm(wf, vectors)
    1456            2 :       CALL cp_fm_release(wf)
    1457            2 :    END SUBROUTINE rotate_orbitals_edf
    1458              : ! **************************************************************************************************
    1459              : !> \brief ...
    1460              : !> \param diag ...
    1461              : !> \param weights ...
    1462              : !> \param matrix ...
    1463              : !> \param ndim ...
    1464              : ! **************************************************************************************************
    1465         1670 :    SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
    1466              :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
    1467              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1468              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1469              :       INTEGER, INTENT(IN)                                :: ndim
    1470              : 
    1471              :       COMPLEX(KIND=dp)                                   :: zii, zjj
    1472              :       INTEGER                                            :: idim, istate, jstate, ncol_local, &
    1473              :                                                             nrow_global, nrow_local
    1474         1670 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1475              :       REAL(KIND=dp)                                      :: gradsq_ij
    1476              : 
    1477              :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
    1478              :                           ncol_local=ncol_local, nrow_global=nrow_global, &
    1479         1670 :                           row_indices=row_indices, col_indices=col_indices)
    1480              : 
    1481         5010 :       DO istate = 1, nrow_local
    1482        18370 :          DO jstate = 1, ncol_local
    1483              : ! get real and imaginary parts
    1484        13360 :             gradsq_ij = 0.0_dp
    1485        93520 :             DO idim = 1, ndim
    1486        80160 :                zii = diag(row_indices(istate), idim)
    1487        80160 :                zjj = diag(col_indices(jstate), idim)
    1488              :                gradsq_ij = gradsq_ij + weights(idim)* &
    1489        93520 :                            4.0_dp*REAL((CONJG(zii)*zii + CONJG(zjj)*zjj), KIND=dp)
    1490              :             END DO
    1491        16700 :             matrix%local_data(istate, jstate) = gradsq_ij
    1492              :          END DO
    1493              :       END DO
    1494         1670 :    END SUBROUTINE gradsq_at_0
    1495              : ! **************************************************************************************************
    1496              : !> \brief ...
    1497              : !> \param matrix_p ...
    1498              : !> \param weights ...
    1499              : !> \param matrix ...
    1500              : ! **************************************************************************************************
    1501          104 :    SUBROUTINE grad_at_0(matrix_p, weights, matrix)
    1502              :       TYPE(cp_cfm_type)                                  :: matrix_p(:)
    1503              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1504              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix
    1505              : 
    1506              :       COMPLEX(KIND=dp)                                   :: zii, zij, zjj
    1507          104 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
    1508              :       INTEGER                                            :: dim_m, idim, istate, jstate, ncol_local, &
    1509              :                                                             nrow_global, nrow_local
    1510          104 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1511              :       REAL(KIND=dp)                                      :: grad_ij
    1512              : 
    1513          104 :       NULLIFY (diag)
    1514              :       CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
    1515              :                           ncol_local=ncol_local, nrow_global=nrow_global, &
    1516          104 :                           row_indices=row_indices, col_indices=col_indices)
    1517          104 :       dim_m = SIZE(matrix_p, 1)
    1518          416 :       ALLOCATE (diag(nrow_global, dim_m))
    1519              : 
    1520          728 :       DO idim = 1, dim_m
    1521        23192 :          DO istate = 1, nrow_global
    1522        23088 :             CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
    1523              :          END DO
    1524              :       END DO
    1525              : 
    1526         1976 :       DO istate = 1, nrow_local
    1527        69368 :          DO jstate = 1, ncol_local
    1528              : ! get real and imaginary parts
    1529              :             grad_ij = 0.0_dp
    1530       471744 :             DO idim = 1, dim_m
    1531       404352 :                zii = diag(row_indices(istate), idim)
    1532       404352 :                zjj = diag(col_indices(jstate), idim)
    1533       404352 :                zij = matrix_p(idim)%local_data(istate, jstate)
    1534              :                grad_ij = grad_ij + weights(idim)* &
    1535       471744 :                          REAL(4.0_dp*CONJG(zij)*(zjj - zii), dp)
    1536              :             END DO
    1537        69264 :             matrix%local_data(istate, jstate) = grad_ij
    1538              :          END DO
    1539              :       END DO
    1540          104 :       DEALLOCATE (diag)
    1541          104 :    END SUBROUTINE grad_at_0
    1542              : 
    1543              : ! return energy and maximum gradient in the current point
    1544              : ! **************************************************************************************************
    1545              : !> \brief ...
    1546              : !> \param weights ...
    1547              : !> \param zij ...
    1548              : !> \param tolerance ...
    1549              : !> \param value ...
    1550              : ! **************************************************************************************************
    1551         4944 :    SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
    1552              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1553              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :)
    1554              :       REAL(KIND=dp)                                      :: tolerance, value
    1555              : 
    1556              :       COMPLEX(KIND=dp)                                   :: kii, kij, kjj
    1557         4944 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag
    1558              :       INTEGER                                            :: idim, istate, jstate, ncol_local, &
    1559              :                                                             nrow_global, nrow_local
    1560         4944 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1561              :       REAL(KIND=dp)                                      :: grad_ij, ra, rb
    1562              : 
    1563         4944 :       NULLIFY (diag)
    1564              :       CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
    1565              :                           ncol_local=ncol_local, nrow_global=nrow_global, &
    1566         4944 :                           row_indices=row_indices, col_indices=col_indices)
    1567        19776 :       ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
    1568         4944 :       value = 0.0_dp
    1569        19890 :       DO idim = 1, SIZE(zij, 2)
    1570       201120 :          DO istate = 1, nrow_global
    1571       181230 :             CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
    1572       181230 :             CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
    1573       181230 :             diag(istate, idim) = CMPLX(ra, rb, dp)
    1574       196176 :             value = value + weights(idim) - weights(idim)*ABS(diag(istate, idim))**2
    1575              :          END DO
    1576              :       END DO
    1577         4944 :       tolerance = 0.0_dp
    1578        35054 :       DO istate = 1, nrow_local
    1579       499774 :          DO jstate = 1, ncol_local
    1580      1860305 :             grad_ij = 0.0_dp
    1581      1860305 :             DO idim = 1, SIZE(zij, 2)
    1582      1395585 :                kii = diag(row_indices(istate), idim)
    1583      1395585 :                kjj = diag(col_indices(jstate), idim)
    1584      1395585 :                ra = zij(1, idim)%local_data(istate, jstate)
    1585      1395585 :                rb = zij(2, idim)%local_data(istate, jstate)
    1586      1395585 :                kij = CMPLX(ra, rb, dp)
    1587              :                grad_ij = grad_ij + weights(idim)* &
    1588      1860305 :                          REAL(4.0_dp*CONJG(kij)*(kjj - kii), dp)
    1589              :             END DO
    1590       494830 :             tolerance = MAX(ABS(grad_ij), tolerance)
    1591              :          END DO
    1592              :       END DO
    1593         4944 :       CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
    1594              : 
    1595         4944 :       DEALLOCATE (diag)
    1596              : 
    1597         4944 :    END SUBROUTINE check_tolerance_new
    1598              : 
    1599              : ! **************************************************************************************************
    1600              : !> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
    1601              : !>        and rotates them all at the same time (hoping for the best of course)
    1602              : !> \param weights ...
    1603              : !> \param zij ...
    1604              : !> \param vectors ...
    1605              : !> \param max_iter ...
    1606              : !> \param max_crazy_angle ...
    1607              : !> \param crazy_scale ...
    1608              : !> \param crazy_use_diag ...
    1609              : !> \param eps_localization ...
    1610              : !> \param iterations ...
    1611              : !> \param converged ...
    1612              : ! **************************************************************************************************
    1613          136 :    SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
    1614              :                               eps_localization, iterations, converged)
    1615              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1616              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
    1617              :       INTEGER, INTENT(IN)                                :: max_iter
    1618              :       REAL(KIND=dp), INTENT(IN)                          :: max_crazy_angle
    1619              :       REAL(KIND=dp)                                      :: crazy_scale
    1620              :       LOGICAL                                            :: crazy_use_diag
    1621              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
    1622              :       INTEGER                                            :: iterations
    1623              :       LOGICAL, INTENT(out), OPTIONAL                     :: converged
    1624              : 
    1625              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'crazy_rotations'
    1626              :       COMPLEX(KIND=dp), PARAMETER                        :: cone = (1.0_dp, 0.0_dp), &
    1627              :                                                             czero = (0.0_dp, 0.0_dp)
    1628              : 
    1629              :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: evals_exp
    1630          136 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag_z
    1631              :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
    1632              :       INTEGER                                            :: dim2, handle, i, icol, idim, irow, &
    1633              :                                                             method, ncol_global, ncol_local, &
    1634              :                                                             norder, nrow_global, nrow_local, &
    1635              :                                                             nsquare, unit_nr
    1636          136 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1637              :       LOGICAL                                            :: do_emd
    1638              :       REAL(KIND=dp)                                      :: eps_exp, limit_crazy_angle, maxeval, &
    1639              :                                                             norm, ra, rb, theta, tolerance, value
    1640              :       REAL(KIND=dp), DIMENSION(:), POINTER               :: evals
    1641              :       TYPE(cp_cfm_type)                                  :: cmat_A, cmat_R, cmat_t1
    1642              :       TYPE(cp_fm_type)                                   :: mat_R, mat_t, mat_theta, mat_U
    1643              : 
    1644          136 :       CALL timeset(routineN, handle)
    1645          136 :       NULLIFY (row_indices, col_indices)
    1646              :       CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
    1647              :                           ncol_global=ncol_global, &
    1648              :                           row_indices=row_indices, col_indices=col_indices, &
    1649          136 :                           nrow_local=nrow_local, ncol_local=ncol_local)
    1650              : 
    1651          136 :       limit_crazy_angle = max_crazy_angle
    1652              : 
    1653          136 :       NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
    1654          136 :       dim2 = SIZE(zij, 2)
    1655          544 :       ALLOCATE (diag_z(nrow_global, dim2))
    1656          408 :       ALLOCATE (evals(nrow_global))
    1657          408 :       ALLOCATE (evals_exp(nrow_global))
    1658              : 
    1659          136 :       CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
    1660          136 :       CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
    1661          136 :       CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
    1662              : 
    1663          136 :       CALL cp_fm_create(mat_U, zij(1, 1)%matrix_struct)
    1664          136 :       CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
    1665          136 :       CALL cp_fm_create(mat_R, zij(1, 1)%matrix_struct)
    1666              : 
    1667          136 :       CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
    1668              : 
    1669          136 :       CALL cp_fm_set_all(mat_R, 0.0_dp, 1.0_dp)
    1670          136 :       CALL cp_fm_set_all(mat_t, 0.0_dp)
    1671          680 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
    1672          550 :       DO idim = 1, dim2
    1673          414 :          CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
    1674          550 :          CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
    1675              :       END DO
    1676          136 :       CALL cp_fm_syevd(mat_t, mat_U, evals)
    1677          550 :       DO idim = 1, dim2
    1678              :          ! rotate z's
    1679          414 :          CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
    1680          414 :          CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
    1681          414 :          CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
    1682          550 :          CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
    1683              :       END DO
    1684              :       ! collect rotation matrix
    1685          136 :       CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
    1686          136 :       CALL cp_fm_to_fm(mat_t, mat_R)
    1687              : 
    1688          136 :       unit_nr = -1
    1689          136 :       IF (cmat_A%matrix_struct%para_env%is_source()) THEN
    1690           68 :          unit_nr = cp_logger_get_default_unit_nr()
    1691              :          WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
    1692           68 :             "CRAZY| ", "Iter", "value    ", "gradient", "Max. eval", "limit"
    1693              :       END IF
    1694              : 
    1695          136 :       iterations = 0
    1696          136 :       tolerance = 1.0_dp
    1697              : 
    1698              :       DO
    1699         4944 :          iterations = iterations + 1
    1700        19890 :          DO idim = 1, dim2
    1701       201120 :             DO i = 1, nrow_global
    1702       181230 :                CALL cp_fm_get_element(zij(1, idim), i, i, ra)
    1703       181230 :                CALL cp_fm_get_element(zij(2, idim), i, i, rb)
    1704       196176 :                diag_z(i, idim) = CMPLX(ra, rb, dp)
    1705              :             END DO
    1706              :          END DO
    1707        35054 :          DO irow = 1, nrow_local
    1708       499774 :             DO icol = 1, ncol_local
    1709      1860305 :                DO idim = 1, dim2
    1710      1395585 :                   ra = zij(1, idim)%local_data(irow, icol)
    1711      1395585 :                   rb = zij(2, idim)%local_data(irow, icol)
    1712      1395585 :                   mij(idim) = CMPLX(ra, rb, dp)
    1713      1395585 :                   mii(idim) = diag_z(row_indices(irow), idim)
    1714      1860305 :                   mjj(idim) = diag_z(col_indices(icol), idim)
    1715              :                END DO
    1716       494830 :                IF (row_indices(irow) /= col_indices(icol)) THEN
    1717       434610 :                   CALL get_angle(mii, mjj, mij, weights, theta)
    1718       434610 :                   theta = crazy_scale*theta
    1719       434610 :                   IF (theta > limit_crazy_angle) theta = limit_crazy_angle
    1720       434610 :                   IF (theta < -limit_crazy_angle) theta = -limit_crazy_angle
    1721       434610 :                   IF (crazy_use_diag) THEN
    1722            0 :                      cmat_A%local_data(irow, icol) = -CMPLX(0.0_dp, theta, dp)
    1723              :                   ELSE
    1724       434610 :                      mat_theta%local_data(irow, icol) = -theta
    1725              :                   END IF
    1726              :                ELSE
    1727        30110 :                   IF (crazy_use_diag) THEN
    1728            0 :                      cmat_A%local_data(irow, icol) = czero
    1729              :                   ELSE
    1730        30110 :                      mat_theta%local_data(irow, icol) = 0.0_dp
    1731              :                   END IF
    1732              :                END IF
    1733              :             END DO
    1734              :          END DO
    1735              : 
    1736              :          ! construct rotation matrix U based on A using diagonalization
    1737              :          ! alternatively, exp based on repeated squaring could be faster
    1738         4944 :          IF (crazy_use_diag) THEN
    1739            0 :             CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
    1740            0 :             maxeval = MAXVAL(ABS(evals))
    1741            0 :             evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
    1742            0 :             CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
    1743            0 :             CALL cp_cfm_column_scale(cmat_t1, evals_exp)
    1744              :             CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
    1745            0 :                                cmat_t1, cmat_R, czero, cmat_A)
    1746            0 :             mat_U%local_data = REAL(cmat_A%local_data, KIND=dp) ! U is a real matrix
    1747              :          ELSE
    1748         4944 :             do_emd = .FALSE.
    1749         4944 :             method = 2
    1750         4944 :             eps_exp = 1.0_dp*EPSILON(eps_exp)
    1751         4944 :             CALL cp_fm_maxabsrownorm(mat_theta, norm)
    1752         4944 :             maxeval = norm ! an upper bound
    1753         4944 :             CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
    1754         4944 :             CALL exp_pade_real(mat_U, mat_theta, nsquare, norder)
    1755              :          END IF
    1756              : 
    1757        19890 :          DO idim = 1, dim2
    1758              :             ! rotate z's
    1759        14946 :             CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
    1760        14946 :             CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
    1761        14946 :             CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
    1762        19890 :             CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
    1763              :          END DO
    1764              :          ! collect rotation matrix
    1765         4944 :          CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
    1766         4944 :          CALL cp_fm_to_fm(mat_t, mat_R)
    1767              : 
    1768         4944 :          CALL check_tolerance_new(weights, zij, tolerance, value)
    1769              : 
    1770         4944 :          IF (unit_nr > 0) THEN
    1771              :             WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
    1772         2472 :                "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
    1773         2472 :             CALL m_flush(unit_nr)
    1774              :          END IF
    1775         4944 :          IF (tolerance < eps_localization .OR. iterations >= max_iter) EXIT
    1776              :       END DO
    1777              : 
    1778          136 :       IF (PRESENT(converged)) converged = (tolerance < eps_localization)
    1779              : 
    1780          136 :       CALL cp_cfm_release(cmat_A)
    1781          136 :       CALL cp_cfm_release(cmat_R)
    1782          136 :       CALL cp_cfm_release(cmat_T1)
    1783              : 
    1784          136 :       CALL cp_fm_release(mat_U)
    1785          136 :       CALL cp_fm_release(mat_T)
    1786          136 :       CALL cp_fm_release(mat_theta)
    1787              : 
    1788          136 :       CALL rotate_orbitals(mat_R, vectors)
    1789              : 
    1790          136 :       CALL cp_fm_release(mat_R)
    1791          136 :       DEALLOCATE (evals_exp, evals, diag_z)
    1792          136 :       DEALLOCATE (mii, mij, mjj)
    1793              : 
    1794          136 :       CALL timestop(handle)
    1795              : 
    1796          272 :    END SUBROUTINE crazy_rotations
    1797              : 
    1798              : ! **************************************************************************************************
    1799              : !> \brief use the exponential parametrization as described in to perform a direct mini
    1800              : !>        Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
    1801              : !> none of the input is modified for the time being, just finds the rotations
    1802              : !> that minimizes, and throws it away afterwards :-)
    1803              : !> apart from being expensive and not cleaned, this works fine
    1804              : !> useful to try different spread functionals
    1805              : !> \param weights ...
    1806              : !> \param zij ...
    1807              : !> \param vectors ...
    1808              : !> \param max_iter ...
    1809              : !> \param eps_localization ...
    1810              : !> \param iterations ...
    1811              : ! **************************************************************************************************
    1812            2 :    SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
    1813              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    1814              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
    1815              :       INTEGER, INTENT(IN)                                :: max_iter
    1816              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
    1817              :       INTEGER                                            :: iterations
    1818              : 
    1819              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'direct_mini'
    1820              :       COMPLEX(KIND=dp), PARAMETER                        :: cone = (1.0_dp, 0.0_dp), &
    1821              :                                                             czero = (0.0_dp, 0.0_dp)
    1822              :       REAL(KIND=dp), PARAMETER                           :: gold_sec = 0.3819_dp
    1823              : 
    1824              :       COMPLEX(KIND=dp)                                   :: lk, ll, tmp
    1825            2 :       COMPLEX(KIND=dp), DIMENSION(:), POINTER            :: evals_exp
    1826            2 :       COMPLEX(KIND=dp), DIMENSION(:, :), POINTER         :: diag_z
    1827              :       INTEGER                                            :: handle, i, icol, idim, irow, &
    1828              :                                                             line_search_count, line_searches, lsl, &
    1829              :                                                             lsm, lsr, n, ncol_local, ndim, &
    1830              :                                                             nrow_local, output_unit
    1831            2 :       INTEGER, DIMENSION(:), POINTER                     :: col_indices, row_indices
    1832              :       LOGICAL                                            :: new_direction
    1833              :       REAL(KIND=dp)                                      :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
    1834              :                                                             fb, fc, nom, normg, normg_cross, &
    1835              :                                                             normg_old, npos, omega, tol, val, x0, &
    1836              :                                                             x1, xa, xb, xc
    1837              :       REAL(KIND=dp), DIMENSION(150)                      :: energy, grad, pos
    1838            2 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: evals, fval, fvald
    1839              :       TYPE(cp_cfm_type)                                  :: cmat_A, cmat_B, cmat_M, cmat_R, cmat_t1, &
    1840              :                                                             cmat_t2, cmat_U
    1841            2 :       TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:)       :: c_zij
    1842              :       TYPE(cp_fm_type)                                   :: matrix_A, matrix_G, matrix_G_old, &
    1843              :                                                             matrix_G_search, matrix_H, matrix_R, &
    1844              :                                                             matrix_T
    1845              : 
    1846            2 :       NULLIFY (evals, evals_exp, diag_z, fval, fvald)
    1847              : 
    1848            2 :       CALL timeset(routineN, handle)
    1849            2 :       output_unit = cp_logger_get_default_io_unit()
    1850              : 
    1851            2 :       n = zij(1, 1)%matrix_struct%nrow_global
    1852            2 :       ndim = (SIZE(zij, 2))
    1853              : 
    1854            2 :       IF (output_unit > 0) THEN
    1855            1 :          WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
    1856            1 :          WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
    1857              :       END IF
    1858              : 
    1859           20 :       ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
    1860           18 :       ALLOCATE (c_zij(ndim))
    1861              : 
    1862              :       ! create the three complex matrices Z
    1863           14 :       DO idim = 1, ndim
    1864           12 :          CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
    1865              :          c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
    1866          158 :                                         zij(2, idim)%local_data, dp)
    1867              :       END DO
    1868              : 
    1869            2 :       CALL cp_fm_create(matrix_A, zij(1, 1)%matrix_struct)
    1870            2 :       CALL cp_fm_create(matrix_G, zij(1, 1)%matrix_struct)
    1871            2 :       CALL cp_fm_create(matrix_T, zij(1, 1)%matrix_struct)
    1872            2 :       CALL cp_fm_create(matrix_H, zij(1, 1)%matrix_struct)
    1873            2 :       CALL cp_fm_create(matrix_G_search, zij(1, 1)%matrix_struct)
    1874            2 :       CALL cp_fm_create(matrix_G_old, zij(1, 1)%matrix_struct)
    1875            2 :       CALL cp_fm_create(matrix_R, zij(1, 1)%matrix_struct)
    1876            2 :       CALL cp_fm_set_all(matrix_R, 0.0_dp, 1.0_dp)
    1877              : 
    1878            2 :       CALL cp_fm_set_all(matrix_A, 0.0_dp)
    1879              : !    CALL cp_fm_init_random ( matrix_A )
    1880              : 
    1881            2 :       CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
    1882            2 :       CALL cp_cfm_create(cmat_U, zij(1, 1)%matrix_struct)
    1883            2 :       CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
    1884            2 :       CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
    1885            2 :       CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
    1886            2 :       CALL cp_cfm_create(cmat_B, zij(1, 1)%matrix_struct)
    1887            2 :       CALL cp_cfm_create(cmat_M, zij(1, 1)%matrix_struct)
    1888              : 
    1889              :       CALL cp_cfm_get_info(cmat_B, nrow_local=nrow_local, ncol_local=ncol_local, &
    1890            2 :                            row_indices=row_indices, col_indices=col_indices)
    1891              : 
    1892            2 :       CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
    1893            2 :       CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
    1894            2 :       normg_old = 1.0E30_dp
    1895            2 :       ds_min = 1.0_dp
    1896            2 :       new_direction = .TRUE.
    1897            2 :       Iterations = 0
    1898            2 :       line_searches = 0
    1899            2 :       line_search_count = 0
    1900         1670 :       DO
    1901         1670 :          iterations = iterations + 1
    1902              :          ! compute U,R,evals given A
    1903        21710 :          cmat_A%local_data = CMPLX(0.0_dp, matrix_A%local_data, dp) ! cmat_A is hermitian, evals are reals
    1904         1670 :          CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
    1905        15030 :          evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
    1906         1670 :          CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
    1907         1670 :          CALL cp_cfm_column_scale(cmat_t1, evals_exp)
    1908         1670 :          CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_U)
    1909        21710 :          cmat_U%local_data = REAL(cmat_U%local_data, KIND=dp) ! enforce numerics, U is a real matrix
    1910              : 
    1911         1670 :          IF (new_direction .AND. MOD(line_searches, 20) == 5) THEN ! reset with A .eq. 0
    1912           28 :             DO idim = 1, ndim
    1913           24 :                CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1)
    1914           28 :                CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, c_zij(idim))
    1915              :             END DO
    1916              :             ! collect rotation matrix
    1917           52 :             matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
    1918            4 :             CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
    1919            4 :             CALL cp_fm_to_fm(matrix_T, matrix_R)
    1920              : 
    1921            4 :             CALL cp_cfm_set_all(cmat_U, czero, cone)
    1922            4 :             CALL cp_cfm_set_all(cmat_R, czero, cone)
    1923            4 :             CALL cp_cfm_set_all(cmat_A, czero)
    1924            4 :             CALL cp_fm_set_all(matrix_A, 0.0_dp)
    1925           20 :             evals(:) = 0.0_dp
    1926           36 :             evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
    1927            4 :             CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
    1928            4 :             CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
    1929            4 :             normg_old = 1.0E30_dp
    1930              :          END IF
    1931              : 
    1932              :          ! compute Omega and M
    1933         1670 :          CALL cp_cfm_set_all(cmat_M, czero)
    1934         1670 :          omega = 0.0_dp
    1935        11690 :          DO idim = 1, ndim
    1936        10020 :             CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1) ! t1=ZU
    1937        10020 :             CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
    1938        50100 :             DO i = 1, n
    1939        40080 :                CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
    1940              :                SELECT CASE (2) ! allows for selection of different spread functionals
    1941              :                CASE (1)
    1942              :                   fval(i) = -weights(idim)*LOG(ABS(diag_z(i, idim))**2)
    1943              :                   fvald(i) = -weights(idim)/(ABS(diag_z(i, idim))**2)
    1944              :                CASE (2) ! corresponds to the jacobi setup
    1945        40080 :                   fval(i) = weights(idim) - weights(idim)*ABS(diag_z(i, idim))**2
    1946        40080 :                   fvald(i) = -weights(idim)
    1947              :                END SELECT
    1948        50100 :                omega = omega + fval(i)
    1949              :             END DO
    1950        51770 :             DO icol = 1, ncol_local
    1951       130260 :                DO irow = 1, nrow_local
    1952        80160 :                   tmp = cmat_t1%local_data(irow, icol)*CONJG(diag_z(col_indices(icol), idim))
    1953              :                   cmat_M%local_data(irow, icol) = cmat_M%local_data(irow, icol) &
    1954       120240 :                                                   + 4.0_dp*fvald(col_indices(icol))*REAL(tmp, KIND=dp)
    1955              :                END DO
    1956              :             END DO
    1957              :          END DO
    1958              : 
    1959              :          ! compute Hessian diagonal approximation for the preconditioner
    1960              :          IF (.TRUE.) THEN
    1961         1670 :             CALL gradsq_at_0(diag_z, weights, matrix_H, ndim)
    1962              :          ELSE
    1963              :             CALL cp_fm_set_all(matrix_H, 1.0_dp)
    1964              :          END IF
    1965              : 
    1966              :          ! compute B
    1967         8350 :          DO icol = 1, ncol_local
    1968        21710 :             DO irow = 1, nrow_local
    1969        13360 :                ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
    1970        13360 :                lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
    1971        20040 :                IF (ABS(ll - lk) < 0.5_dp) THEN ! use a series expansion to avoid loss of precision
    1972         5378 :                   tmp = 1.0_dp
    1973         5378 :                   cmat_B%local_data(irow, icol) = 0.0_dp
    1974        91426 :                   DO i = 1, 16
    1975        86048 :                      cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol) + tmp
    1976        91426 :                      tmp = tmp*(ll - lk)/(i + 1)
    1977              :                   END DO
    1978         5378 :                   cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol)*EXP(lk)
    1979              :                ELSE
    1980         7982 :                   cmat_B%local_data(irow, icol) = (EXP(lk) - EXP(ll))/(lk - ll)
    1981              :                END IF
    1982              :             END DO
    1983              :          END DO
    1984              :          ! compute gradient matrix_G
    1985              : 
    1986         1670 :          CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_M, cmat_R, czero, cmat_t1) ! t1=(M^T)(R^T)
    1987         1670 :          CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_R, cmat_t1, czero, cmat_t2) ! t2=(R)t1
    1988         1670 :          CALL cp_cfm_schur_product(cmat_t2, cmat_B, cmat_t1)
    1989         1670 :          CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_t2)
    1990         1670 :          CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_R, cmat_t2, czero, cmat_t1)
    1991        21710 :          matrix_G%local_data = REAL(cmat_t1%local_data, KIND=dp)
    1992         1670 :          CALL cp_fm_transpose(matrix_G, matrix_T)
    1993         1670 :          CALL cp_fm_scale_and_add(-1.0_dp, matrix_G, 1.0_dp, matrix_T)
    1994         1670 :          CALL cp_fm_maxabsval(matrix_G, tol)
    1995              : 
    1996              :          ! from here on, minimizing technology
    1997         1670 :          IF (new_direction) THEN
    1998              :             ! energy converged up to machine precision ?
    1999           66 :             line_searches = line_searches + 1
    2000              :             ! DO i=1,line_search_count
    2001              :             !   write(15,*) pos(i),energy(i)
    2002              :             ! ENDDO
    2003              :             ! write(15,*) ""
    2004              :             ! CALL m_flush(15)
    2005              :             !write(16,*) evals(:)
    2006              :             !write(17,*) matrix_A%local_data(:,:)
    2007              :             !write(18,*) matrix_G%local_data(:,:)
    2008           66 :             IF (output_unit > 0) THEN
    2009           33 :                WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, Iterations, Omega, tol, ds_min
    2010           33 :                CALL m_flush(output_unit)
    2011              :             END IF
    2012           66 :             IF (tol < eps_localization .OR. iterations > max_iter) EXIT
    2013              : 
    2014              :             IF (.TRUE.) THEN ! do conjugate gradient CG
    2015           64 :                CALL cp_fm_trace(matrix_G, matrix_G_old, normg_cross)
    2016           64 :                normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
    2017              :                ! apply the preconditioner
    2018          320 :                DO icol = 1, ncol_local
    2019          832 :                   DO irow = 1, nrow_local
    2020          768 :                      matrix_G_old%local_data(irow, icol) = matrix_G%local_data(irow, icol)/matrix_H%local_data(irow, icol)
    2021              :                   END DO
    2022              :                END DO
    2023           64 :                CALL cp_fm_trace(matrix_G, matrix_G_old, normg)
    2024           64 :                normg = normg*0.5_dp
    2025           64 :                beta_pr = (normg - normg_cross)/normg_old
    2026           64 :                normg_old = normg
    2027           64 :                beta_pr = MAX(beta_pr, 0.0_dp)
    2028           64 :                CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
    2029           64 :                CALL cp_fm_trace(matrix_G_search, matrix_G_old, normg_cross)
    2030           64 :                IF (normg_cross >= 0) THEN ! back to SD
    2031            6 :                   IF (matrix_A%matrix_struct%para_env%is_source()) THEN
    2032            3 :                      WRITE (cp_logger_get_default_unit_nr(), *) "!"
    2033              :                   END IF
    2034            6 :                   beta_pr = 0.0_dp
    2035            6 :                   CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
    2036              :                END IF
    2037              :             ELSE ! SD
    2038              :                CALL cp_fm_scale_and_add(0.0_dp, matrix_G_search, -1.0_dp, matrix_G)
    2039              :             END IF
    2040              :             ! ds_min=1.0E-4_dp
    2041              :             line_search_count = 0
    2042              :          END IF
    2043         1668 :          line_search_count = line_search_count + 1
    2044         1668 :          energy(line_search_count) = Omega
    2045              : 
    2046              :          ! line search section
    2047              :          SELECT CASE (3)
    2048              :          CASE (1) ! two point line search
    2049              :             SELECT CASE (line_search_count)
    2050              :             CASE (1)
    2051              :                pos(1) = 0.0_dp
    2052              :                pos(2) = ds_min
    2053              :                CALL cp_fm_trace(matrix_G, matrix_G_search, grad(1))
    2054              :                grad(1) = grad(1)/2.0_dp
    2055              :                new_direction = .FALSE.
    2056              :             CASE (2)
    2057              :                new_direction = .TRUE.
    2058              :                x0 = pos(1) ! 0.0_dp
    2059              :                c = energy(1)
    2060              :                b = grad(1)
    2061              :                x1 = pos(2)
    2062              :                a = (energy(2) - b*x1 - c)/(x1**2)
    2063              :                IF (a <= 0.0_dp) a = 1.0E-15_dp
    2064              :                npos = -b/(2.0_dp*a)
    2065              :                val = a*npos**2 + b*npos + c
    2066              :                IF (val < energy(1) .AND. val <= energy(2)) THEN
    2067              :                   ! we go to a minimum, but ...
    2068              :                   ! we take a guard against too large steps
    2069              :                   pos(3) = MIN(npos, MAXVAL(pos(1:2))*4.0_dp)
    2070              :                ELSE ! just take an extended step
    2071              :                   pos(3) = MAXVAL(pos(1:2))*2.0_dp
    2072              :                END IF
    2073              :             END SELECT
    2074              :          CASE (2) ! 3 point line search
    2075              :             SELECT CASE (line_search_count)
    2076              :             CASE (1)
    2077              :                new_direction = .FALSE.
    2078              :                pos(1) = 0.0_dp
    2079              :                pos(2) = ds_min*0.8_dp
    2080              :             CASE (2)
    2081              :                new_direction = .FALSE.
    2082              :                IF (energy(2) > energy(1)) THEN
    2083              :                   pos(3) = ds_min*0.7_dp
    2084              :                ELSE
    2085              :                   pos(3) = ds_min*1.4_dp
    2086              :                END IF
    2087              :             CASE (3)
    2088              :                new_direction = .TRUE.
    2089              :                xa = pos(1)
    2090              :                xb = pos(2)
    2091              :                xc = pos(3)
    2092              :                fa = energy(1)
    2093              :                fb = energy(2)
    2094              :                fc = energy(3)
    2095              :                nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
    2096              :                denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
    2097              :                IF (ABS(denom) <= 1.0E-18_dp*MAX(ABS(fb - fc), ABS(fb - fa))) THEN
    2098              :                   npos = xb
    2099              :                ELSE
    2100              :                   npos = xb - 0.5_dp*nom/denom ! position of the stationary point
    2101              :                END IF
    2102              :                val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
    2103              :                      (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
    2104              :                      (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
    2105              :                IF (val < fa .AND. val <= fb .AND. val <= fc) THEN ! OK, we go to a minimum
    2106              :                   ! we take a guard against too large steps
    2107              :                   pos(4) = MAX(MAXVAL(pos(1:3))*0.01_dp, &
    2108              :                                MIN(npos, MAXVAL(pos(1:3))*4.0_dp))
    2109              :                ELSE ! just take an extended step
    2110              :                   pos(4) = MAXVAL(pos(1:3))*2.0_dp
    2111              :                END IF
    2112              :             END SELECT
    2113              :          CASE (3) ! golden section hunt
    2114         1668 :             new_direction = .FALSE.
    2115         1668 :             IF (line_search_count == 1) THEN
    2116           64 :                lsl = 1
    2117           64 :                lsr = 0
    2118           64 :                lsm = 1
    2119           64 :                pos(1) = 0.0_dp
    2120           64 :                pos(2) = ds_min/gold_sec
    2121              :             ELSE
    2122         1604 :                IF (line_search_count == 150) CPABORT("Too many")
    2123         1604 :                IF (lsr == 0) THEN
    2124          148 :                   IF (energy(line_search_count - 1) < energy(line_search_count)) THEN
    2125           64 :                      lsr = line_search_count
    2126           64 :                      pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
    2127              :                   ELSE
    2128           84 :                      lsl = lsm
    2129           84 :                      lsm = line_search_count
    2130           84 :                      pos(line_search_count + 1) = pos(line_search_count)/gold_sec
    2131              :                   END IF
    2132              :                ELSE
    2133         1456 :                   IF (pos(line_search_count) < pos(lsm)) THEN
    2134          690 :                      IF (energy(line_search_count) < energy(lsm)) THEN
    2135              :                         lsr = lsm
    2136              :                         lsm = line_search_count
    2137              :                      ELSE
    2138          558 :                         lsl = line_search_count
    2139              :                      END IF
    2140              :                   ELSE
    2141          766 :                      IF (energy(line_search_count) < energy(lsm)) THEN
    2142              :                         lsl = lsm
    2143              :                         lsm = line_search_count
    2144              :                      ELSE
    2145          518 :                         lsr = line_search_count
    2146              :                      END IF
    2147              :                   END IF
    2148         1456 :                   IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
    2149          722 :                      pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
    2150              :                   ELSE
    2151          734 :                      pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
    2152              :                   END IF
    2153         1456 :                   IF ((pos(lsr) - pos(lsl)) < 1.0E-3_dp*pos(lsr)) THEN
    2154           64 :                      new_direction = .TRUE.
    2155              :                   END IF
    2156              :                END IF ! lsr .eq. 0
    2157              :             END IF ! first step
    2158              :          END SELECT
    2159              :          ! now go to the suggested point
    2160         1668 :          ds_min = pos(line_search_count + 1)
    2161         1668 :          ds = pos(line_search_count + 1) - pos(line_search_count)
    2162         1670 :          CALL cp_fm_scale_and_add(1.0_dp, matrix_A, ds, matrix_G_search)
    2163              :       END DO
    2164              : 
    2165              :       ! collect rotation matrix
    2166           26 :       matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
    2167            2 :       CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
    2168            2 :       CALL cp_fm_to_fm(matrix_T, matrix_R)
    2169            2 :       CALL rotate_orbitals(matrix_R, vectors)
    2170            2 :       CALL cp_fm_release(matrix_R)
    2171              : 
    2172            2 :       CALL cp_fm_release(matrix_A)
    2173            2 :       CALL cp_fm_release(matrix_G)
    2174            2 :       CALL cp_fm_release(matrix_H)
    2175            2 :       CALL cp_fm_release(matrix_T)
    2176            2 :       CALL cp_fm_release(matrix_G_search)
    2177            2 :       CALL cp_fm_release(matrix_G_old)
    2178            2 :       CALL cp_cfm_release(cmat_A)
    2179            2 :       CALL cp_cfm_release(cmat_U)
    2180            2 :       CALL cp_cfm_release(cmat_R)
    2181            2 :       CALL cp_cfm_release(cmat_t1)
    2182            2 :       CALL cp_cfm_release(cmat_t2)
    2183            2 :       CALL cp_cfm_release(cmat_B)
    2184            2 :       CALL cp_cfm_release(cmat_M)
    2185              : 
    2186            2 :       DEALLOCATE (evals, evals_exp, fval, fvald)
    2187              : 
    2188           14 :       DO idim = 1, SIZE(c_zij)
    2189          156 :          zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
    2190          156 :          zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
    2191           14 :          CALL cp_cfm_release(c_zij(idim))
    2192              :       END DO
    2193            2 :       DEALLOCATE (c_zij)
    2194            2 :       DEALLOCATE (diag_z)
    2195              : 
    2196            2 :       CALL timestop(handle)
    2197              : 
    2198            6 :    END SUBROUTINE direct_mini
    2199              : 
    2200              : ! **************************************************************************************************
    2201              : !> \brief Parallel algorithm for jacobi rotations
    2202              : !> \param weights ...
    2203              : !> \param zij ...
    2204              : !> \param vectors ...
    2205              : !> \param para_env ...
    2206              : !> \param max_iter ...
    2207              : !> \param eps_localization ...
    2208              : !> \param sweeps ...
    2209              : !> \param out_each ...
    2210              : !> \param target_time ...
    2211              : !> \param start_time ...
    2212              : !> \param restricted ...
    2213              : !> \par History
    2214              : !>      use allgather for improved performance
    2215              : !> \author MI (11.2009)
    2216              : ! **************************************************************************************************
    2217          390 :    SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
    2218              :                               sweeps, out_each, target_time, start_time, restricted)
    2219              : 
    2220              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    2221              :       TYPE(cp_fm_type), INTENT(IN)                       :: zij(:, :), vectors
    2222              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2223              :       INTEGER, INTENT(IN)                                :: max_iter
    2224              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
    2225              :       INTEGER                                            :: sweeps
    2226              :       INTEGER, INTENT(IN)                                :: out_each
    2227              :       REAL(dp)                                           :: target_time, start_time
    2228              :       INTEGER                                            :: restricted
    2229              : 
    2230              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'jacobi_rot_para'
    2231              : 
    2232              :       INTEGER                                            :: dim2, handle, i, idim, ii, ilow1, ip, j, &
    2233              :                                                             nblock, nblock_max, ns_me, nstate, &
    2234              :                                                             output_unit
    2235          390 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: ns_bound
    2236          390 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: rotmat, z_ij_loc_im, z_ij_loc_re
    2237              :       REAL(KIND=dp)                                      :: xstate
    2238              :       TYPE(cp_fm_type)                                   :: rmat
    2239              :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: cz_ij_loc
    2240              : 
    2241          390 :       CALL timeset(routineN, handle)
    2242              : 
    2243          390 :       output_unit = cp_logger_get_default_io_unit()
    2244              : 
    2245              :       NULLIFY (cz_ij_loc)
    2246              : 
    2247          390 :       dim2 = SIZE(zij, 2)
    2248              : 
    2249          390 :       CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
    2250          390 :       CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
    2251              : 
    2252          390 :       CALL cp_fm_get_info(rmat, nrow_global=nstate)
    2253              : 
    2254          390 :       IF (restricted > 0) THEN
    2255            0 :          IF (output_unit > 0) THEN
    2256            0 :             WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
    2257              :          END IF
    2258            0 :          nstate = nstate - restricted
    2259              :       END IF
    2260              : 
    2261              :       ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
    2262          390 :       xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
    2263         1560 :       ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
    2264         1170 :       DO ip = 1, para_env%num_pe
    2265          780 :          ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
    2266         1170 :          ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
    2267              :       END DO
    2268          390 :       nblock_max = 0
    2269         1170 :       DO ip = 0, para_env%num_pe - 1
    2270          780 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2271         1170 :          nblock_max = MAX(nblock_max, nblock)
    2272              :       END DO
    2273              : 
    2274              :       ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
    2275         1560 :       ALLOCATE (z_ij_loc_re(nstate, nblock_max))
    2276         1170 :       ALLOCATE (z_ij_loc_im(nstate, nblock_max))
    2277         2364 :       ALLOCATE (cz_ij_loc(dim2))
    2278         1584 :       DO idim = 1, dim2
    2279         3972 :          DO ip = 0, para_env%num_pe - 1
    2280         2388 :             nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2281         2388 :             CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
    2282         2388 :             CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
    2283         3582 :             IF (para_env%mepos == ip) THEN
    2284         4752 :                ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
    2285         5034 :                DO i = 1, nblock
    2286        37830 :                   DO j = 1, nstate
    2287        36636 :                      cz_ij_loc(idim)%c_array(j, i) = CMPLX(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
    2288              :                   END DO
    2289              :                END DO
    2290              :             END IF
    2291              :          END DO ! ip
    2292              :       END DO
    2293          390 :       DEALLOCATE (z_ij_loc_re)
    2294          390 :       DEALLOCATE (z_ij_loc_im)
    2295              : 
    2296         1560 :       ALLOCATE (rotmat(nstate, 2*nblock_max))
    2297              : 
    2298              :       CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
    2299              :                                 cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
    2300          390 :                                 target_time=target_time, start_time=start_time)
    2301              : 
    2302          390 :       ilow1 = ns_bound(para_env%mepos, 1)
    2303          390 :       ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
    2304         1170 :       ALLOCATE (z_ij_loc_re(nstate, nblock_max))
    2305         1170 :       ALLOCATE (z_ij_loc_im(nstate, nblock_max))
    2306         1584 :       DO idim = 1, dim2
    2307         3972 :          DO ip = 0, para_env%num_pe - 1
    2308         2388 :             z_ij_loc_re = 0.0_dp
    2309         2388 :             z_ij_loc_im = 0.0_dp
    2310         2388 :             nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2311         2388 :             IF (ip == para_env%mepos) THEN
    2312         5034 :                ns_me = nblock
    2313         5034 :                DO i = 1, ns_me
    2314        36636 :                   ii = ilow1 + i - 1
    2315        37830 :                   DO j = 1, nstate
    2316        32796 :                      z_ij_loc_re(j, i) = REAL(cz_ij_loc(idim)%c_array(j, i), dp)
    2317        36636 :                      z_ij_loc_im(j, i) = AIMAG(cz_ij_loc(idim)%c_array(j, i))
    2318              :                   END DO
    2319              :                END DO
    2320              :             END IF
    2321         2388 :             CALL para_env%bcast(z_ij_loc_re, ip)
    2322         2388 :             CALL para_env%bcast(z_ij_loc_im, ip)
    2323         2388 :             CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
    2324         3582 :             CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
    2325              :          END DO ! ip
    2326              :       END DO
    2327              : 
    2328         1170 :       DO ip = 0, para_env%num_pe - 1
    2329          780 :          z_ij_loc_re = 0.0_dp
    2330          780 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2331          780 :          IF (ip == para_env%mepos) THEN
    2332         1633 :             ns_me = nblock
    2333         1633 :             DO i = 1, ns_me
    2334        11774 :                ii = ilow1 + i - 1
    2335        12164 :                DO j = 1, nstate
    2336        11774 :                   z_ij_loc_re(j, i) = rotmat(j, i)
    2337              :                END DO
    2338              :             END DO
    2339              :          END IF
    2340          780 :          CALL para_env%bcast(z_ij_loc_re, ip)
    2341         1170 :          CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
    2342              :       END DO
    2343              : 
    2344          390 :       DEALLOCATE (z_ij_loc_re)
    2345          390 :       DEALLOCATE (z_ij_loc_im)
    2346         1584 :       DO idim = 1, dim2
    2347         1584 :          DEALLOCATE (cz_ij_loc(idim)%c_array)
    2348              :       END DO
    2349          390 :       DEALLOCATE (cz_ij_loc)
    2350              : 
    2351          390 :       CALL para_env%sync()
    2352          390 :       CALL rotate_orbitals(rmat, vectors)
    2353          390 :       CALL cp_fm_release(rmat)
    2354              : 
    2355          390 :       DEALLOCATE (rotmat)
    2356          390 :       DEALLOCATE (ns_bound)
    2357              : 
    2358          390 :       CALL timestop(handle)
    2359              : 
    2360         1170 :    END SUBROUTINE jacobi_rot_para
    2361              : 
    2362              : ! **************************************************************************************************
    2363              : !> \brief almost identical to 'jacobi_rot_para' but with different inout variables
    2364              : !> \param weights ...
    2365              : !> \param czij ...
    2366              : !> \param para_env ...
    2367              : !> \param max_iter ...
    2368              : !> \param rmat ...
    2369              : !> \param tol_out ...
    2370              : !> \author Soumya Ghosh (08/21)
    2371              : ! **************************************************************************************************
    2372           32 :    SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
    2373              : 
    2374              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    2375              :       TYPE(cp_cfm_type), INTENT(IN)                      :: czij(:)
    2376              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2377              :       INTEGER, INTENT(IN)                                :: max_iter
    2378              :       TYPE(cp_cfm_type), INTENT(IN)                      :: rmat
    2379              :       REAL(dp), INTENT(OUT), OPTIONAL                    :: tol_out
    2380              : 
    2381              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'jacobi_rot_para_1'
    2382              : 
    2383           32 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :)     :: czij_array
    2384              :       INTEGER                                            :: dim2, handle, i, idim, ii, ilow1, ip, j, &
    2385              :                                                             nblock, nblock_max, ns_me, nstate, &
    2386              :                                                             sweeps
    2387              :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: ns_bound
    2388           32 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: rotmat, z_ij_loc_re
    2389              :       REAL(KIND=dp)                                      :: xstate
    2390              :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: cz_ij_loc
    2391              : 
    2392           32 :       CALL timeset(routineN, handle)
    2393              : 
    2394           32 :       dim2 = SIZE(czij)
    2395              : 
    2396           32 :       CALL cp_cfm_set_all(rmat, CMPLX(0._dp, 0._dp, dp), CMPLX(1._dp, 0._dp, dp))
    2397              : 
    2398           32 :       CALL cp_cfm_get_info(rmat, nrow_global=nstate)
    2399              : 
    2400              :       ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
    2401           32 :       xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
    2402          128 :       ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
    2403           96 :       DO ip = 1, para_env%num_pe
    2404           64 :          ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
    2405           96 :          ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
    2406              :       END DO
    2407           32 :       nblock_max = 0
    2408           96 :       DO ip = 0, para_env%num_pe - 1
    2409           64 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2410           96 :          nblock_max = MAX(nblock_max, nblock)
    2411              :       END DO
    2412              : 
    2413              :       ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
    2414          128 :       ALLOCATE (czij_array(nstate, nblock_max))
    2415          192 :       ALLOCATE (cz_ij_loc(dim2))
    2416          128 :       DO idim = 1, dim2
    2417          320 :          DO ip = 0, para_env%num_pe - 1
    2418          192 :             nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2419              :             ! cfm --> allocatable
    2420          192 :             CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
    2421          288 :             IF (para_env%mepos == ip) THEN
    2422           96 :                ns_me = nblock
    2423          384 :                ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
    2424         2160 :                DO i = 1, ns_me
    2425        90912 :                   DO j = 1, nstate
    2426        90816 :                      cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
    2427              :                   END DO
    2428              :                END DO
    2429              :             END IF
    2430              :          END DO ! ip
    2431              :       END DO
    2432           32 :       DEALLOCATE (czij_array)
    2433              : 
    2434          128 :       ALLOCATE (rotmat(nstate, 2*nblock_max))
    2435              : 
    2436              :       CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
    2437           32 :                                 cz_ij_loc, rotmat, 0, tol_out=tol_out)
    2438              : 
    2439           32 :       ilow1 = ns_bound(para_env%mepos, 1)
    2440           32 :       ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
    2441          128 :       ALLOCATE (z_ij_loc_re(nstate, nblock_max))
    2442              : 
    2443           96 :       DO ip = 0, para_env%num_pe - 1
    2444           64 :          z_ij_loc_re = 0.0_dp
    2445           64 :          nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
    2446           64 :          IF (ip == para_env%mepos) THEN
    2447          720 :             ns_me = nblock
    2448          720 :             DO i = 1, ns_me
    2449        30272 :                ii = ilow1 + i - 1
    2450        30304 :                DO j = 1, nstate
    2451        30272 :                   z_ij_loc_re(j, i) = rotmat(j, i)
    2452              :                END DO
    2453              :             END DO
    2454              :          END IF
    2455           64 :          CALL para_env%bcast(z_ij_loc_re, ip)
    2456        62048 :          CALL cp_cfm_set_submatrix(rmat, CMPLX(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
    2457              :       END DO
    2458              : 
    2459           32 :       DEALLOCATE (z_ij_loc_re)
    2460          128 :       DO idim = 1, dim2
    2461          128 :          DEALLOCATE (cz_ij_loc(idim)%c_array)
    2462              :       END DO
    2463           32 :       DEALLOCATE (cz_ij_loc)
    2464              : 
    2465           32 :       CALL para_env%sync()
    2466              : 
    2467           32 :       DEALLOCATE (rotmat)
    2468           32 :       DEALLOCATE (ns_bound)
    2469              : 
    2470           32 :       CALL timestop(handle)
    2471              : 
    2472           64 :    END SUBROUTINE jacobi_rot_para_1
    2473              : 
    2474              : ! **************************************************************************************************
    2475              : !> \brief Parallel algorithm for jacobi rotations
    2476              : !> \param weights ...
    2477              : !> \param para_env ...
    2478              : !> \param max_iter ...
    2479              : !> \param sweeps ...
    2480              : !> \param out_each ...
    2481              : !> \param dim2 ...
    2482              : !> \param nstate ...
    2483              : !> \param nblock_max ...
    2484              : !> \param ns_bound ...
    2485              : !> \param cz_ij_loc ...
    2486              : !> \param rotmat ...
    2487              : !> \param output_unit ...
    2488              : !> \param tol_out ...
    2489              : !> \param eps_localization ...
    2490              : !> \param target_time ...
    2491              : !> \param start_time ...
    2492              : !> \par History
    2493              : !>      split out to reuse with different input types
    2494              : !> \author HF (05.2022)
    2495              : ! **************************************************************************************************
    2496         1266 :    SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
    2497          422 :                                    ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
    2498              : 
    2499              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    2500              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2501              :       INTEGER, INTENT(IN)                                :: max_iter
    2502              :       INTEGER, INTENT(OUT)                               :: sweeps
    2503              :       INTEGER, INTENT(IN)                                :: out_each, dim2, nstate, nblock_max
    2504              :       INTEGER, DIMENSION(0:, :), INTENT(IN)              :: ns_bound
    2505              :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: cz_ij_loc
    2506              :       REAL(dp), DIMENSION(:, :), INTENT(OUT)             :: rotmat
    2507              :       INTEGER, INTENT(IN)                                :: output_unit
    2508              :       REAL(dp), INTENT(OUT), OPTIONAL                    :: tol_out
    2509              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: eps_localization
    2510              :       REAL(dp), OPTIONAL                                 :: target_time, start_time
    2511              : 
    2512              :       COMPLEX(KIND=dp)                                   :: zi, zj
    2513          422 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)  :: c_array_me, c_array_partner
    2514              :       COMPLEX(KIND=dp), POINTER                          :: mii(:), mij(:), mjj(:)
    2515              :       INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
    2516              :          ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
    2517              :          iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
    2518              :          ns_recv_from, ns_recv_partner
    2519          422 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: rcount, rdispl
    2520              :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: list_pair
    2521              :       LOGICAL                                            :: should_stop
    2522          422 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :)             :: gmat, rmat_loc, rmat_recv, rmat_send
    2523          422 :       REAL(dp), ALLOCATABLE, DIMENSION(:, :, :)          :: rmat_recv_all
    2524              :       REAL(KIND=dp)                                      :: ct, func, gmax, grad, ri, rj, st, t1, &
    2525              :                                                             t2, theta, tolerance, zc, zr
    2526          422 :       TYPE(set_c_1d_type), DIMENSION(:), POINTER         :: zdiag_all, zdiag_me
    2527          422 :       TYPE(set_c_2d_type), DIMENSION(:), POINTER         :: xyz_mix, xyz_mix_ns
    2528              : 
    2529          422 :       NULLIFY (zdiag_all, zdiag_me)
    2530          422 :       NULLIFY (xyz_mix, xyz_mix_ns)
    2531              :       NULLIFY (mii, mij, mjj)
    2532              : 
    2533         2110 :       ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
    2534              : 
    2535         1266 :       ALLOCATE (rcount(para_env%num_pe), STAT=istat)
    2536          844 :       ALLOCATE (rdispl(para_env%num_pe), STAT=istat)
    2537              : 
    2538          422 :       tolerance = 1.0e10_dp
    2539          422 :       sweeps = 0
    2540              : 
    2541              :       ! number of processor pairs and number of permutations
    2542          422 :       npair = (para_env%num_pe + 1)/2
    2543          422 :       nperm = para_env%num_pe - MOD(para_env%num_pe + 1, 2)
    2544         1266 :       ALLOCATE (list_pair(2, npair))
    2545              : 
    2546              :       ! initialize rotation matrix
    2547        87298 :       rotmat = 0.0_dp
    2548         2353 :       DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2549         1931 :          ii = i - ns_bound(para_env%mepos, 1) + 1
    2550         2353 :          rotmat(i, ii) = 1.0_dp
    2551              :       END DO
    2552              : 
    2553         2556 :       ALLOCATE (xyz_mix(dim2))
    2554         2134 :       ALLOCATE (xyz_mix_ns(dim2))
    2555         2556 :       ALLOCATE (zdiag_me(dim2))
    2556         2134 :       ALLOCATE (zdiag_all(dim2))
    2557              : 
    2558          422 :       ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
    2559          422 :       IF (ns_me /= 0) THEN
    2560         2070 :          ALLOCATE (c_array_me(nstate, ns_me, dim2))
    2561         1680 :          DO idim = 1, dim2
    2562         5478 :             ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
    2563              :          END DO
    2564         1656 :          ALLOCATE (gmat(nstate, ns_me))
    2565              :       END IF
    2566              : 
    2567         1712 :       DO idim = 1, dim2
    2568         3870 :          ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
    2569         7530 :          zdiag_me(idim)%c_array = z_zero
    2570         3870 :          ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
    2571        14192 :          zdiag_all(idim)%c_array = z_zero
    2572              :       END DO
    2573         1688 :       ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
    2574         1266 :       ALLOCATE (rmat_send(nblock_max*2, nblock_max))
    2575              : 
    2576              :       ! buffer for message passing
    2577         2110 :       ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
    2578              : 
    2579          422 :       IF (output_unit > 0) THEN
    2580          195 :          WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
    2581          195 :          WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
    2582              :       END IF
    2583              : 
    2584        87304 :       DO lsweep = 1, max_iter + 1
    2585        87304 :          sweeps = lsweep
    2586        87304 :          IF (sweeps == max_iter + 1) THEN
    2587          156 :             IF (output_unit > 0) THEN
    2588           62 :                WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
    2589           62 :                WRITE (output_unit, *) '               Present  Max. gradient = ', tolerance
    2590              :             END IF
    2591              :             EXIT
    2592              :          END IF
    2593        87148 :          t1 = m_walltime()
    2594              : 
    2595       174296 :          DO iperm = 1, nperm
    2596              : 
    2597              :             ! fix partners for this permutation, and get the number of states
    2598        87148 :             CALL eberlein(iperm, para_env, list_pair)
    2599        87148 :             ip_partner = -1
    2600        87148 :             ns_partner = 0
    2601        87148 :             DO ipair = 1, npair
    2602        87148 :                IF (list_pair(1, ipair) == para_env%mepos) THEN
    2603        43574 :                   ip_partner = list_pair(2, ipair)
    2604        43574 :                   EXIT
    2605        43574 :                ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
    2606        43574 :                   ip_partner = list_pair(1, ipair)
    2607        43574 :                   EXIT
    2608              :                END IF
    2609              :             END DO
    2610        87148 :             IF (ip_partner >= 0) THEN
    2611        87148 :                ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
    2612              :             ELSE
    2613              :                ns_partner = 0
    2614              :             END IF
    2615              : 
    2616              :             ! if there is a non-zero block connecting two partners, jacobi-sweep it.
    2617        87148 :             IF (ns_partner*ns_me /= 0) THEN
    2618              : 
    2619       348528 :                ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
    2620        87132 :                rmat_loc = 0.0_dp
    2621       699326 :                DO i = 1, ns_me + ns_partner
    2622       699326 :                   rmat_loc(i, i) = 1.0_dp
    2623              :                END DO
    2624              : 
    2625       435660 :                ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
    2626              : 
    2627       350130 :                DO idim = 1, dim2
    2628      1051992 :                   ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
    2629      1277862 :                   DO i = 1, ns_me
    2630      7898778 :                      c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
    2631              :                   END DO
    2632              :                END DO
    2633              : 
    2634              :                CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
    2635        87132 :                                       msgout=c_array_partner, source=ip_partner)
    2636              : 
    2637        87132 :                n1 = ns_me
    2638        87132 :                n2 = ns_partner
    2639        87132 :                ilow1 = ns_bound(para_env%mepos, 1)
    2640        87132 :                iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
    2641        87132 :                ilow2 = ns_bound(ip_partner, 1)
    2642        87132 :                iup2 = ns_bound(ip_partner, 1) + n2 - 1
    2643        87132 :                IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
    2644        43566 :                   il1 = 1
    2645              :                   iu1 = n1
    2646        43566 :                   iu1 = n1
    2647        43566 :                   il2 = 1 + n1
    2648        43566 :                   iu2 = n1 + n2
    2649              :                ELSE
    2650        43566 :                   il1 = 1 + n2
    2651              :                   iu1 = n1 + n2
    2652        43566 :                   iu1 = n1 + n2
    2653        43566 :                   il2 = 1
    2654        43566 :                   iu2 = n2
    2655              :                END IF
    2656              : 
    2657       350130 :                DO idim = 1, dim2
    2658      1190730 :                   DO i = 1, n1
    2659      4342242 :                      xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
    2660      4484268 :                      xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
    2661              :                   END DO
    2662      1277862 :                   DO i = 1, n2
    2663      4342242 :                      xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
    2664      4484268 :                      xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
    2665              :                   END DO
    2666              :                END DO
    2667              : 
    2668       699326 :                DO istate = 1, n1 + n2
    2669      2590980 :                   DO jstate = istate + 1, n1 + n2
    2670      7671970 :                      DO idim = 1, dim2
    2671      5780316 :                         mii(idim) = xyz_mix(idim)%c_array(istate, istate)
    2672      5780316 :                         mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
    2673      7671970 :                         mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
    2674              :                      END DO
    2675      1891654 :                      CALL get_angle(mii, mjj, mij, weights, theta)
    2676      1891654 :                      st = SIN(theta)
    2677      1891654 :                      ct = COS(theta)
    2678      7671970 :                      DO idim = 1, dim2
    2679     51334218 :                         DO i = 1, n1 + n2
    2680     45553902 :                            zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
    2681     45553902 :                            zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
    2682     45553902 :                            xyz_mix(idim)%c_array(i, istate) = zi
    2683     51334218 :                            xyz_mix(idim)%c_array(i, jstate) = zj
    2684              :                         END DO
    2685     53225872 :                         DO i = 1, n1 + n2
    2686     45553902 :                            zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
    2687     45553902 :                            zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
    2688     45553902 :                            xyz_mix(idim)%c_array(istate, i) = zi
    2689     51334218 :                            xyz_mix(idim)%c_array(jstate, i) = zj
    2690              :                         END DO
    2691              :                      END DO
    2692              : 
    2693     17253578 :                      DO i = 1, n1 + n2
    2694     14749730 :                         ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
    2695     14749730 :                         rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
    2696     14749730 :                         rmat_loc(i, istate) = ri
    2697     16641384 :                         rmat_loc(i, jstate) = rj
    2698              :                      END DO
    2699              :                   END DO
    2700              :                END DO
    2701              : 
    2702        87132 :                k = nblock_max + 1
    2703              :                CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
    2704      5094828 :                                       rotmat(1:nstate, k:k + n2 - 1), ip_partner)
    2705              : 
    2706        87132 :                IF (ilow1 < ilow2) THEN
    2707              :                   ! no longer compiles in official sdgb:
    2708              :                   !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
    2709              :                   ! probably inefficient:
    2710              :                   CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
    2711      1468694 :                              n2, 0.0_dp, gmat(:, :), nstate)
    2712              :                   CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
    2713        43566 :                              n1 + n2, 1.0_dp, gmat(:, :), nstate)
    2714              :                ELSE
    2715              :                   CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
    2716        43566 :                              rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
    2717              :                   ! no longer compiles in official sdgb:
    2718              :                   !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
    2719              :                   ! probably inefficient:
    2720              :                   CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
    2721      1148318 :                              n1, 1.0_dp, gmat(:, :), nstate)
    2722              :                END IF
    2723              : 
    2724        87132 :                CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
    2725              : 
    2726       350130 :                DO idim = 1, dim2
    2727      1190730 :                   DO i = 1, n1
    2728      7898778 :                      xyz_mix_ns(idim)%c_array(1:nstate, i) = z_zero
    2729              :                   END DO
    2730              : 
    2731      1190730 :                   DO istate = 1, n1
    2732      7898778 :                      DO jstate = 1, nstate
    2733     33346560 :                         DO i = 1, n2
    2734              :                            xyz_mix_ns(idim)%c_array(jstate, istate) = &
    2735              :                               xyz_mix_ns(idim)%c_array(jstate, istate) + &
    2736     32418828 :                               c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
    2737              :                         END DO
    2738              :                      END DO
    2739              :                   END DO
    2740      1277862 :                   DO istate = 1, n1
    2741      7898778 :                      DO jstate = 1, nstate
    2742     34186950 :                         DO i = 1, n1
    2743              :                            xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
    2744     33259218 :                                                                  c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
    2745              :                         END DO
    2746              :                      END DO
    2747              :                   END DO
    2748              :                END DO ! idim
    2749              : 
    2750        87132 :                DEALLOCATE (c_array_partner)
    2751              : 
    2752              :             ELSE ! save my data
    2753           64 :                DO idim = 1, dim2
    2754           88 :                   DO i = 1, ns_me
    2755          120 :                      xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
    2756              :                   END DO
    2757              :                END DO
    2758              :             END IF
    2759              : 
    2760       350194 :             DO idim = 1, dim2
    2761      1277950 :                DO i = 1, ns_me
    2762      7898874 :                   cz_ij_loc(idim)%c_array(1:nstate, i) = z_zero
    2763              :                END DO
    2764              :             END DO
    2765              : 
    2766        87148 :             IF (ns_partner*ns_me /= 0) THEN
    2767              :                ! transpose rotation matrix rmat_loc
    2768       699326 :                DO i = 1, ns_me + ns_partner
    2769      2590980 :                   DO j = i + 1, ns_me + ns_partner
    2770      1891654 :                      ri = rmat_loc(i, j)
    2771      1891654 :                      rmat_loc(i, j) = rmat_loc(j, i)
    2772      2503848 :                      rmat_loc(j, i) = ri
    2773              :                   END DO
    2774              :                END DO
    2775              : 
    2776              :                ! prepare for distribution
    2777       393229 :                DO i = 1, n1
    2778      1512260 :                   rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
    2779              :                END DO
    2780        87132 :                ik = nblock_max
    2781       393229 :                DO i = 1, n2
    2782      1471949 :                   rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
    2783              :                END DO
    2784              :             ELSE
    2785           16 :                rmat_send = 0.0_dp
    2786              :             END IF
    2787              : 
    2788              :             ! collect data from all tasks (this takes some significant time)
    2789        87148 :             CALL para_env%allgather(rmat_send, rmat_recv_all)
    2790              : 
    2791              :             ! update blocks everywhere
    2792       261444 :             DO ip = 0, para_env%num_pe - 1
    2793              : 
    2794       174296 :                ip_recv_from = MOD(para_env%mepos - IP + para_env%num_pe, para_env%num_pe)
    2795      6463592 :                rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
    2796              : 
    2797       174296 :                ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
    2798              : 
    2799       261444 :                IF (ns_me /= 0) THEN
    2800       174280 :                   IF (ns_recv_from /= 0) THEN
    2801              :                      !look for the partner of ip_recv_from
    2802       174272 :                      ip_recv_partner = -1
    2803       174272 :                      ns_recv_partner = 0
    2804       174272 :                      DO ipair = 1, npair
    2805       174272 :                         IF (list_pair(1, ipair) == ip_recv_from) THEN
    2806        87140 :                            ip_recv_partner = list_pair(2, ipair)
    2807        87140 :                            EXIT
    2808        87132 :                         ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
    2809              :                            ip_recv_partner = list_pair(1, ipair)
    2810              :                            EXIT
    2811              :                         END IF
    2812              :                      END DO
    2813              : 
    2814       174272 :                      IF (ip_recv_partner >= 0) THEN
    2815       174272 :                         ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
    2816              :                      END IF
    2817       174272 :                      IF (ns_recv_partner > 0) THEN
    2818       174264 :                         il1 = ns_bound(para_env%mepos, 1)
    2819       174264 :                         il_recv = ns_bound(ip_recv_from, 1)
    2820       174264 :                         il_recv_partner = ns_bound(ip_recv_partner, 1)
    2821       174264 :                         ik = nblock_max
    2822              : 
    2823       700260 :                         DO idim = 1, dim2
    2824      2381460 :                            DO i = 1, ns_recv_from
    2825      1855464 :                               ii = il_recv + i - 1
    2826      9089508 :                               DO j = 1, ns_me
    2827     33259218 :                                  jj = j
    2828     35114682 :                                  DO k = 1, ns_recv_from
    2829     26551170 :                                     kk = il_recv + k - 1
    2830              :                                     cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
    2831     33259218 :                                                                       rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
    2832              :                                  END DO
    2833              :                               END DO
    2834              :                            END DO
    2835      2555724 :                            DO i = 1, ns_recv_from
    2836      1855464 :                               ii = il_recv + i - 1
    2837      9089508 :                               DO j = 1, ns_me
    2838     32418828 :                                  jj = j
    2839     34274292 :                                  DO k = 1, ns_recv_partner
    2840     25710780 :                                     kk = il_recv_partner + k - 1
    2841              :                                     cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
    2842     32418828 :                                                                       rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
    2843              :                                  END DO
    2844              :                               END DO
    2845              :                            END DO
    2846              :                         END DO ! idim
    2847              :                      ELSE
    2848            8 :                         il1 = ns_bound(para_env%mepos, 1)
    2849            8 :                         il_recv = ns_bound(ip_recv_from, 1)
    2850           32 :                         DO idim = 1, dim2
    2851           56 :                            DO j = 1, ns_me
    2852           48 :                               jj = j
    2853           72 :                               DO i = 1, ns_recv_from
    2854           24 :                                  ii = il_recv + i - 1
    2855           48 :                                  cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
    2856              :                               END DO
    2857              :                            END DO
    2858              :                         END DO ! idim
    2859              :                      END IF
    2860              :                   END IF
    2861              :                END IF ! ns_me
    2862              :             END DO ! ip
    2863              : 
    2864       174296 :             IF (ns_partner*ns_me /= 0) THEN
    2865        87132 :                DEALLOCATE (rmat_loc)
    2866       350130 :                DO idim = 1, dim2
    2867       350130 :                   DEALLOCATE (xyz_mix(idim)%c_array)
    2868              :                END DO
    2869              :             END IF
    2870              : 
    2871              :          END DO ! iperm
    2872              : 
    2873              :          ! calculate the max gradient
    2874       350194 :          DO idim = 1, dim2
    2875      1190802 :             DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2876       927756 :                ii = i - ns_bound(para_env%mepos, 1) + 1
    2877       927756 :                zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
    2878      1190802 :                zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
    2879              :             END DO
    2880       789138 :             rcount(:) = SIZE(zdiag_me(idim)%c_array)
    2881       263046 :             rdispl(1) = 0
    2882       526092 :             DO ip = 2, para_env%num_pe
    2883       526092 :                rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
    2884              :             END DO
    2885              :             ! collect all the diagonal elements in a replicated 1d array
    2886      3496450 :             CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
    2887              :          END DO
    2888              : 
    2889        87148 :          gmax = 0.0_dp
    2890       393253 :          DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2891       306105 :             k = j - ns_bound(para_env%mepos, 1) + 1
    2892      1339080 :             DO i = 1, j - 1
    2893              :                ! find the location of the diagonal element (i,i)
    2894      1089122 :                DO ip = 0, para_env%num_pe - 1
    2895      1089122 :                   IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
    2896              :                      ip_has_i = ip
    2897              :                      EXIT
    2898              :                   END IF
    2899              :                END DO
    2900       945827 :                ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
    2901              :                ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
    2902       945827 :                jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
    2903       945827 :                grad = 0.0_dp
    2904      3835985 :                DO idim = 1, dim2
    2905      2890158 :                   zi = zdiag_all(idim)%c_array(ii)
    2906      2890158 :                   zj = zdiag_all(idim)%c_array(jj)
    2907      3835985 :                   grad = grad + weights(idim)*REAL(4.0_dp*CONJG(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
    2908              :                END DO
    2909      1251932 :                gmax = MAX(gmax, ABS(grad))
    2910              :             END DO
    2911              :          END DO
    2912              : 
    2913        87148 :          CALL para_env%max(gmax)
    2914        87148 :          tolerance = gmax
    2915        87148 :          IF (PRESENT(tol_out)) tol_out = tolerance
    2916              : 
    2917        87148 :          func = 0.0_dp
    2918       393253 :          DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
    2919       306105 :             k = i - ns_bound(para_env%mepos, 1) + 1
    2920      1321009 :             DO idim = 1, dim2
    2921       927756 :                zr = REAL(cz_ij_loc(idim)%c_array(i, k), dp)
    2922       927756 :                zc = AIMAG(cz_ij_loc(idim)%c_array(i, k))
    2923      1233861 :                func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
    2924              :             END DO
    2925              :          END DO
    2926        87148 :          CALL para_env%sum(func)
    2927        87148 :          t2 = m_walltime()
    2928              : 
    2929        87148 :          IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
    2930          440 :             WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
    2931          440 :             CALL m_flush(output_unit)
    2932              :          END IF
    2933        87148 :          IF (PRESENT(eps_localization)) THEN
    2934        87116 :             IF (tolerance < eps_localization) EXIT
    2935              :          END IF
    2936        87148 :          IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
    2937        86850 :             CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
    2938        86850 :             IF (should_stop) EXIT
    2939              :          END IF
    2940              : 
    2941              :       END DO ! lsweep
    2942              : 
    2943              :       ! buffer for message passing
    2944          422 :       DEALLOCATE (rmat_recv_all)
    2945              : 
    2946          422 :       DEALLOCATE (rmat_recv)
    2947          422 :       DEALLOCATE (rmat_send)
    2948          422 :       IF (ns_me > 0) THEN
    2949          414 :          DEALLOCATE (c_array_me)
    2950              :       END IF
    2951         1712 :       DO idim = 1, dim2
    2952         1290 :          DEALLOCATE (zdiag_me(idim)%c_array)
    2953         1712 :          DEALLOCATE (zdiag_all(idim)%c_array)
    2954              :       END DO
    2955          422 :       DEALLOCATE (zdiag_me)
    2956          422 :       DEALLOCATE (zdiag_all)
    2957          422 :       DEALLOCATE (xyz_mix)
    2958         1712 :       DO idim = 1, dim2
    2959         1712 :          IF (ns_me /= 0) THEN
    2960         1266 :             DEALLOCATE (xyz_mix_ns(idim)%c_array)
    2961              :          END IF
    2962              :       END DO
    2963          422 :       DEALLOCATE (xyz_mix_ns)
    2964          422 :       IF (ns_me /= 0) THEN
    2965          414 :          DEALLOCATE (gmat)
    2966              :       END IF
    2967          422 :       DEALLOCATE (mii)
    2968          422 :       DEALLOCATE (mij)
    2969          422 :       DEALLOCATE (mjj)
    2970          422 :       DEALLOCATE (list_pair)
    2971              : 
    2972          844 :    END SUBROUTINE jacobi_rot_para_core
    2973              : 
    2974              : ! **************************************************************************************************
    2975              : !> \brief ...
    2976              : !> \param iperm ...
    2977              : !> \param para_env ...
    2978              : !> \param list_pair ...
    2979              : ! **************************************************************************************************
    2980        87148 :    SUBROUTINE eberlein(iperm, para_env, list_pair)
    2981              :       INTEGER, INTENT(IN)                                :: iperm
    2982              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    2983              :       INTEGER, DIMENSION(:, :)                           :: list_pair
    2984              : 
    2985              :       INTEGER                                            :: i, ii, jj, npair
    2986              : 
    2987        87148 :       npair = (para_env%num_pe + 1)/2
    2988        87148 :       IF (iperm == 1) THEN
    2989              : !..set up initial ordering
    2990       261444 :          DO I = 0, para_env%num_pe - 1
    2991       174296 :             II = ((i + 1) + 1)/2
    2992       174296 :             JJ = MOD((i + 1) + 1, 2) + 1
    2993       261444 :             list_pair(JJ, II) = i
    2994              :          END DO
    2995        87148 :          IF (MOD(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
    2996            0 :       ELSEIF (MOD(iperm, 2) == 0) THEN
    2997              : !..a type shift
    2998            0 :          jj = list_pair(1, npair)
    2999            0 :          DO I = npair, 3, -1
    3000            0 :             list_pair(1, I) = list_pair(1, I - 1)
    3001              :          END DO
    3002            0 :          list_pair(1, 2) = list_pair(2, 1)
    3003            0 :          list_pair(2, 1) = jj
    3004              :       ELSE
    3005              : !..b type shift
    3006            0 :          jj = list_pair(2, 1)
    3007            0 :          DO I = 1, npair - 1
    3008            0 :             list_pair(2, I) = list_pair(2, I + 1)
    3009              :          END DO
    3010            0 :          list_pair(2, npair) = jj
    3011              :       END IF
    3012              : 
    3013        87148 :    END SUBROUTINE eberlein
    3014              : 
    3015              : ! **************************************************************************************************
    3016              : !> \brief ...
    3017              : !> \param vectors ...
    3018              : !> \param op_sm_set ...
    3019              : !> \param zij_fm_set ...
    3020              : ! **************************************************************************************************
    3021          532 :    SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
    3022              : 
    3023              :       TYPE(cp_fm_type), INTENT(IN)                       :: vectors
    3024              :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: op_sm_set
    3025              :       TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN)      :: zij_fm_set
    3026              : 
    3027              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'zij_matrix'
    3028              : 
    3029              :       INTEGER                                            :: handle, i, j, nao, nmoloc
    3030              :       TYPE(cp_fm_type)                                   :: opvec
    3031              : 
    3032          532 :       CALL timeset(routineN, handle)
    3033              : 
    3034              :       ! get rows and cols of the input
    3035          532 :       CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
    3036              :       ! replicate the input kind of matrix
    3037          532 :       CALL cp_fm_create(opvec, vectors%matrix_struct)
    3038              : 
    3039              :       ! Compute zij here
    3040         2164 :       DO i = 1, SIZE(zij_fm_set, 2)
    3041         5428 :          DO j = 1, SIZE(zij_fm_set, 1)
    3042         3264 :             CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
    3043         3264 :             CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
    3044              :             CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
    3045         4896 :                                zij_fm_set(j, i))
    3046              :          END DO
    3047              :       END DO
    3048              : 
    3049          532 :       CALL cp_fm_release(opvec)
    3050          532 :       CALL timestop(handle)
    3051              : 
    3052          532 :    END SUBROUTINE zij_matrix
    3053              : 
    3054              : ! **************************************************************************************************
    3055              : !> \brief ...
    3056              : !> \param vectors ...
    3057              : ! **************************************************************************************************
    3058           38 :    SUBROUTINE scdm_qrfact(vectors)
    3059              : 
    3060              :       TYPE(cp_fm_type), INTENT(IN)                       :: vectors
    3061              : 
    3062              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'scdm_qrfact'
    3063              : 
    3064              :       INTEGER                                            :: handle, ncolT, nrowT
    3065           38 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: tau
    3066              :       TYPE(cp_fm_struct_type), POINTER                   :: cstruct
    3067              :       TYPE(cp_fm_type)                                   :: CTp, Qf, tmp
    3068              : 
    3069           38 :       CALL timeset(routineN, handle)
    3070              : 
    3071              :       ! Create Transpose of Coefficient Matrix vectors
    3072           38 :       nrowT = vectors%matrix_struct%ncol_global
    3073           38 :       ncolT = vectors%matrix_struct%nrow_global
    3074              : 
    3075              :       CALL cp_fm_struct_create(cstruct, template_fmstruct=vectors%matrix_struct, &
    3076           38 :                                nrow_global=nrowT, ncol_global=ncolT)
    3077           38 :       CALL cp_fm_create(CTp, cstruct)
    3078           38 :       CALL cp_fm_struct_release(cstruct)
    3079              : 
    3080          114 :       ALLOCATE (tau(nrowT))
    3081              : 
    3082           38 :       CALL cp_fm_transpose(vectors, CTp)
    3083              : 
    3084              :       ! Get QR decomposition of CTs
    3085           38 :       CALL cp_fm_pdgeqpf(CTp, tau, nrowT, ncolT, 1, 1)
    3086              : 
    3087              :       ! Construction of Q from the scalapack output
    3088              :       CALL cp_fm_struct_create(cstruct, para_env=CTp%matrix_struct%para_env, &
    3089              :                                context=CTp%matrix_struct%context, nrow_global=CTp%matrix_struct%nrow_global, &
    3090           38 :                                ncol_global=CTp%matrix_struct%nrow_global)
    3091           38 :       CALL cp_fm_create(Qf, cstruct)
    3092           38 :       CALL cp_fm_struct_release(cstruct)
    3093           38 :       CALL cp_fm_to_fm_submat(CTp, Qf, nrowT, nrowT, 1, 1, 1, 1)
    3094              : 
    3095              :       ! Get Q
    3096           38 :       CALL cp_fm_pdorgqr(Qf, tau, nrowT, 1, 1)
    3097              : 
    3098              :       ! Transform original coefficient matrix vectors
    3099           38 :       CALL cp_fm_create(tmp, vectors%matrix_struct)
    3100           38 :       CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
    3101           38 :       CALL cp_fm_to_fm(vectors, tmp)
    3102           38 :       CALL parallel_gemm('N', 'N', ncolT, nrowT, nrowT, 1.0_dp, tmp, Qf, 0.0_dp, vectors)
    3103              : 
    3104              :       ! Cleanup
    3105           38 :       CALL cp_fm_release(CTp)
    3106           38 :       CALL cp_fm_release(tmp)
    3107           38 :       CALL cp_fm_release(Qf)
    3108           38 :       DEALLOCATE (tau)
    3109              : 
    3110           38 :       CALL timestop(handle)
    3111              : 
    3112          152 :    END SUBROUTINE scdm_qrfact
    3113              : 
    3114              : ! **************************************************************************************************
    3115              : !> \brief Achieves minimisation of the spread functional by simultaneous diagonalisation with Jacobi
    3116              : !>        rotations as presented in Cardoso & Souloumiac, SIAM J. Matrix Anal. Appl., 17(1), 161.
    3117              : !>        Generalizes the Jacobi algorithm to complex matrices.
    3118              : !> \param weights array of weights for calculating the total spread
    3119              : !> \param zij spread operator matrices
    3120              : !> \param max_iter maximum number iterations
    3121              : !> \param eps_localization numerical tolerance
    3122              : !> \param sweeps counts number of sweeps required
    3123              : !> \param out_each how often to print info
    3124              : !> \param vectors complex vectors to be localized
    3125              : !> \par History
    3126              : !>      2020-04 created [LS]
    3127              : !> \author Lukas Schreder
    3128              : ! **************************************************************************************************
    3129            6 :    SUBROUTINE cardoso_souloumiac(weights, zij, max_iter, eps_localization, sweeps, &
    3130              :                                  out_each, vectors)
    3131              : 
    3132              :       REAL(KIND=dp), INTENT(IN)                          :: weights(:)
    3133              :       TYPE(cp_cfm_type), INTENT(INOUT)                   :: zij(:, :)
    3134              :       INTEGER, INTENT(IN)                                :: max_iter
    3135              :       REAL(KIND=dp), INTENT(IN)                          :: eps_localization
    3136              :       INTEGER                                            :: sweeps
    3137              :       INTEGER, INTENT(IN)                                :: out_each
    3138              :       TYPE(cp_cfm_type), POINTER                         :: vectors
    3139              : 
    3140              :       CHARACTER(len=*), PARAMETER :: routineN = 'cardoso_souloumiac'
    3141              : 
    3142              :       COMPLEX(KIND=dp)                                   :: s
    3143            6 :       COMPLEX(KIND=dp), ALLOCATABLE                      :: mii(:), mij(:), mji(:), mjj(:)
    3144              :       INTEGER                                            :: dim1, dim2, handle, idim, istate, jdim, &
    3145              :                                                             jstate, nstate, unit_nr
    3146              :       REAL(KIND=dp)                                      :: c, old_spread, spread, t1, t2, tolerance
    3147              :       TYPE(cp_cfm_type), POINTER                         :: c_rmat, c_zij(:)
    3148              : 
    3149            6 :       CALL timeset(routineN, handle)
    3150              : 
    3151            6 :       dim1 = SIZE(zij, 1)
    3152            6 :       dim2 = SIZE(zij, 2)
    3153              : 
    3154            6 :       NULLIFY (c_rmat, c_zij)
    3155           84 :       ALLOCATE (c_rmat, c_zij(dim1*dim2), mii(dim1*dim2), mij(dim1*dim2), mji(dim1*dim2), mjj(dim1*dim2))
    3156            6 :       CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
    3157            6 :       CALL cp_cfm_set_all(c_rmat, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))  ! start with the identity transformation
    3158           24 :       DO idim = 1, dim2
    3159           60 :          DO jdim = 1, dim1
    3160           36 :             CALL cp_cfm_create(c_zij((idim - 1)*dim1 + jdim), zij(jdim, idim)%matrix_struct)
    3161           54 :             CALL cp_cfm_to_cfm(zij(jdim, idim), c_zij((idim - 1)*dim1 + jdim))
    3162              :          END DO
    3163              :       END DO
    3164              : 
    3165            6 :       CALL cp_cfm_get_info(c_rmat, nrow_global=nstate)
    3166            6 :       tolerance = 1.0e10_dp
    3167            6 :       old_spread = 1.0e10_dp
    3168            6 :       sweeps = 0
    3169            6 :       unit_nr = -1
    3170            6 :       IF (c_rmat%matrix_struct%para_env%is_source()) THEN
    3171            3 :          unit_nr = cp_logger_get_default_unit_nr()
    3172              :          WRITE (unit_nr, "(T4,A )") " Localization by iterative Jacobi rotation using "// &
    3173            3 :             "Cardoso-Souloumiac angles"
    3174              : 
    3175              :       END IF
    3176              : 
    3177              :       ! Jacobi sweeps until converged
    3178           58 :       DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
    3179           52 :          sweeps = sweeps + 1
    3180           52 :          t1 = m_walltime()
    3181         1924 :          DO jstate = 1, nstate
    3182        34684 :             DO istate = jstate + 1, nstate
    3183       229320 :                DO idim = 1, dim1*dim2
    3184       196560 :                   CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
    3185       196560 :                   CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
    3186       196560 :                   CALL cp_cfm_get_element(c_zij(idim), jstate, istate, mji(idim))
    3187       229320 :                   CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
    3188              :                END DO
    3189        32760 :                CALL get_cardoso_angles(mii, mij, mji, mjj, c, s, vectors%matrix_struct)
    3190       229320 :                DO idim = 1, dim1*dim2
    3191       196560 :                   CALL cp_cfm_rot_cols(c_zij(idim), istate, jstate, c, REAL(s))
    3192       229320 :                   CALL cp_cfm_rot_rows(c_zij(idim), istate, jstate, c, REAL(s))
    3193              :                END DO
    3194        67392 :                CALL cp_cfm_rot_cols(c_rmat, istate, jstate, c, REAL(s))
    3195              :             END DO
    3196              :          END DO
    3197           52 :          CALL check_tolerance(c_zij, weights, spread)
    3198           52 :          CALL check_tolerance(c_zij, weights, tolerance)
    3199           52 :          tolerance = ABS(spread - old_spread)
    3200           52 :          old_spread = spread
    3201              : 
    3202           52 :          t2 = m_walltime()
    3203           58 :          IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
    3204              :             WRITE (unit_nr, "(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
    3205           26 :                "Iteration:", sweeps, "Functional", spread, "Tolerance:", tolerance, "Time:", t2 - t1
    3206           26 :             CALL m_flush(unit_nr)
    3207              :          END IF
    3208              :       END DO
    3209              : 
    3210           24 :       DO idim = 1, dim2
    3211              :          ! back to an interlaced matrix
    3212           60 :          DO jdim = 1, dim1
    3213           36 :             CALL cp_cfm_to_cfm(c_zij((idim - 1)*dim1 + jdim), zij(jdim, idim))
    3214           54 :             CALL cp_cfm_release(c_zij((idim - 1)*dim1 + jdim))
    3215              :          END DO
    3216              :       END DO
    3217              : 
    3218            6 :       CALL rotate_orbitals_cfm(c_rmat, vectors)
    3219              : 
    3220            6 :       DEALLOCATE (c_zij, mii, mij, mji, mjj)
    3221            6 :       CALL cp_cfm_release(c_rmat)
    3222            6 :       DEALLOCATE (c_rmat)
    3223              : 
    3224            6 :       CALL timestop(handle)
    3225              : 
    3226           18 :    END SUBROUTINE cardoso_souloumiac
    3227              : 
    3228              : ! **************************************************************************************************
    3229              : !> \brief Pipek-Mezey version of the Cardoso-Souloumiac PADE algorithm for complex-valued matrices.
    3230              : !> \param zij spread operator matrices
    3231              : !> \param vec complex vectors to be localised
    3232              : !> \param sweeps counts number of sweeps required
    3233              : !> \param max_iter maximum number iterations
    3234              : !> \param eps numerical tolerance
    3235              : !> \param out_each how often to print info
    3236              : !> \par History
    3237              : !>      2020-04 created [LS]
    3238              : !> \author Lukas Schreder
    3239              : ! **************************************************************************************************
    3240            6 :    SUBROUTINE cardoso_souloumiac_pipek(zij, vec, sweeps, max_iter, eps, out_each)
    3241              : 
    3242              :       TYPE(cp_cfm_type), POINTER                         :: zij(:, :), vec
    3243              :       INTEGER                                            :: sweeps, max_iter
    3244              :       REAL(dp)                                           :: eps
    3245              :       INTEGER                                            :: out_each
    3246              : 
    3247              :       CHARACTER(*), PARAMETER :: routineN = 'cardoso_souloumiac_pipek'
    3248              : 
    3249              :       COMPLEX(dp)                                        :: c_spread, s
    3250              :       COMPLEX(dp), POINTER                               :: qii(:), qij(:), qji(:), qjj(:)
    3251              :       INTEGER                                            :: handle, i, j, k, n_dim, n_states, &
    3252              :                                                             output_unit
    3253              :       REAL(dp)                                           :: c, old_spread, spread, t1, t2, tol
    3254            6 :       TYPE(cp_cfm_type), POINTER                         :: c_zij(:), rmat
    3255              : 
    3256            6 :       CALL timeset(routineN, handle)
    3257              : 
    3258            6 :       CALL cite_reference(Schreder2024_2)
    3259              : 
    3260           18 :       n_dim = SIZE(zij)
    3261              : 
    3262            6 :       ALLOCATE (rmat)
    3263            6 :       CALL cp_cfm_create(rmat, zij(1, 1)%matrix_struct)
    3264              : 
    3265            6 :       CALL cp_cfm_set_all(rmat, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
    3266          120 :       ALLOCATE (c_zij(n_dim), qii(n_dim), qij(n_dim), qji(n_dim), qjj(n_dim))
    3267              : 
    3268              :       ! build Qij matrix
    3269           78 :       DO k = 1, n_dim
    3270           78 :          c_zij(k) = zij(k, 1)
    3271              :       END DO
    3272              : 
    3273            6 :       CALL cp_cfm_get_info(c_zij(1), ncol_global=n_states)
    3274              : 
    3275            6 :       tol = 1.0e10_dp
    3276            6 :       c_spread = (0.0_dp, 0.0_dp)
    3277           78 :       DO k = 1, n_dim
    3278         2382 :          DO i = 1, n_states
    3279         2304 :             CALL cp_cfm_get_element(c_zij(k), i, i, s)
    3280         2376 :             c_spread = c_spread + s*s
    3281              :          END DO
    3282              :       END DO
    3283            6 :       spread = REAL(c_spread)
    3284            6 :       old_spread = spread
    3285              : 
    3286            6 :       sweeps = 0
    3287            6 :       output_unit = cp_logger_get_default_unit_nr()
    3288              :       WRITE (output_unit, "(T4,A )") " Localization by iterative Jacobi rotation using "// &
    3289            6 :          "Cardoso-Souloumiac angles"
    3290            6 :       WRITE (output_unit, "(T4,A )") " and Pipek-Mezey spread functional"
    3291              : 
    3292            6 :       IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
    3293              :          WRITE (output_unit, "(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
    3294            6 :             "Iteration:", sweeps, " Functional", spread, " Tolerance:", tol, " Time:", 0.0_dp
    3295              :       END IF
    3296              : 
    3297           36 :       DO WHILE (tol >= eps .AND. sweeps < max_iter)
    3298           30 :          t1 = m_walltime()
    3299           30 :          sweeps = sweeps + 1
    3300              : 
    3301          990 :          DO i = 1, n_states
    3302        15870 :             DO j = i + 1, n_states
    3303       193440 :                DO k = 1, n_dim
    3304       178560 :                   CALL cp_cfm_get_element(c_zij(k), i, i, qii(k))
    3305       178560 :                   CALL cp_cfm_get_element(c_zij(k), i, j, qij(k))
    3306       178560 :                   CALL cp_cfm_get_element(c_zij(k), j, i, qji(k))
    3307       193440 :                   CALL cp_cfm_get_element(c_zij(k), j, j, qjj(k))
    3308              :                END DO
    3309        14880 :                CALL get_cardoso_angles(qii, qij, qji, qjj, c, s, vec%matrix_struct)
    3310       193440 :                DO k = 1, n_dim
    3311       178560 :                   CALL cp_cfm_rot_cols(c_zij(k), i, j, c, REAL(s))
    3312       193440 :                   CALL cp_cfm_rot_rows(c_zij(k), i, j, c, REAL(s))
    3313              :                END DO
    3314        30720 :                CALL cp_cfm_rot_cols(rmat, i, j, c, REAL(s))
    3315              :             END DO
    3316              :          END DO
    3317              : 
    3318           30 :          c_spread = (0.0_dp, 0.0_dp)
    3319          990 :          DO i = 1, n_states
    3320        12510 :             DO k = 1, n_dim
    3321        11520 :                CALL cp_cfm_get_element(c_zij(k), i, i, s)
    3322        12480 :                c_spread = c_spread + s*s
    3323              :             END DO
    3324              :          END DO
    3325           30 :          spread = REAL(c_spread)
    3326              : 
    3327           30 :          tol = ABS(spread - old_spread)
    3328           30 :          old_spread = spread
    3329           30 :          t2 = m_walltime()
    3330           36 :          IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
    3331              :             WRITE (output_unit, "(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
    3332           30 :                "Iteration:", sweeps, " Functional", spread, " Tolerance:", tol, " Time:", t2 - t1
    3333              :          END IF
    3334              :       END DO
    3335              : 
    3336            6 :       CALL rotate_orbitals_cfm(rmat, vec)
    3337              : 
    3338              :       ! c_zij(k) was assigned (shallow copy of zij(k,1)); do not release contents
    3339            6 :       DEALLOCATE (c_zij, qii, qij, qji, qjj)
    3340            6 :       CALL cp_cfm_release(rmat)
    3341            6 :       DEALLOCATE (rmat)
    3342              : 
    3343            6 :       CALL timestop(handle)
    3344              : 
    3345           18 :    END SUBROUTINE cardoso_souloumiac_pipek
    3346              : 
    3347              : ! **************************************************************************************************
    3348              : !> \brief calculates the Jacobi angles needed in serial Cardoso-Souloumiac diagonalisation
    3349              : !> \param mii ...
    3350              : !> \param mij ...
    3351              : !> \param mji ...
    3352              : !> \param mjj ...
    3353              : !> \param c ...
    3354              : !> \param s ...
    3355              : !> \param tmp_fm_struct ...
    3356              : !> \par History
    3357              : !>      2020-04 created [LS]
    3358              : !> \author Lukas Schreder
    3359              : ! **************************************************************************************************
    3360        47640 :    SUBROUTINE get_cardoso_angles(mii, mij, mji, mjj, c, s, tmp_fm_struct)
    3361              : 
    3362              :       COMPLEX(KIND=dp), DIMENSION(:)                     :: mii, mij, mji, mjj
    3363              :       REAL(KIND=dp), INTENT(out)                         :: c
    3364              :       COMPLEX(KIND=dp), INTENT(out)                      :: s
    3365              :       TYPE(cp_fm_struct_type), POINTER                   :: tmp_fm_struct
    3366              : 
    3367              :       INTEGER                                            :: dim_m, i, i_max
    3368              :       REAL(KIND=dp)                                      :: r, x, y, z
    3369              :       REAL(KIND=dp), DIMENSION(3)                        :: evals
    3370              :       TYPE(cp_cfm_type), POINTER                         :: c_Gmat, hmat
    3371              :       TYPE(cp_fm_struct_type), POINTER                   :: G_fm_struct, h_fm_struct
    3372              :       TYPE(cp_fm_type), POINTER                          :: evects, Gmat
    3373              : 
    3374        47640 :       dim_m = SIZE(mii)
    3375              : 
    3376              :       CALL cp_fm_struct_create(h_fm_struct, nrow_global=1, ncol_global=3, &
    3377        47640 :                                template_fmstruct=tmp_fm_struct)
    3378              :       CALL cp_fm_struct_create(G_fm_struct, nrow_global=3, ncol_global=3, &
    3379        47640 :                                template_fmstruct=tmp_fm_struct)
    3380        47640 :       ALLOCATE (hmat, c_Gmat, Gmat, evects)
    3381        47640 :       CALL cp_cfm_create(hmat, h_fm_struct)
    3382        47640 :       CALL cp_cfm_create(c_Gmat, G_fm_struct)
    3383        47640 :       CALL cp_cfm_set_all(c_Gmat, (0.0_dp, 0.0_dp), (0.0_dp, 0.0_dp))
    3384        47640 :       CALL cp_fm_create(Gmat, G_fm_struct)
    3385        47640 :       CALL cp_fm_create(evects, G_fm_struct)
    3386              : 
    3387       422760 :       DO i = 1, dim_m
    3388       375120 :          CALL cp_cfm_set_element(hmat, 1, 1, (mii(i) - mjj(i)))
    3389       375120 :          CALL cp_cfm_set_element(hmat, 1, 2, (mij(i) + mji(i)))
    3390       375120 :          CALL cp_cfm_set_element(hmat, 1, 3, (0.0_dp, 1.0_dp)*(mji(i) - mij(i)))
    3391       422760 :          CALL cp_cfm_gemm("C", "N", 3, 3, 1, (1.0_dp, 0.0_dp), hmat, hmat, (1.0_dp, 0.0_dp), c_Gmat)
    3392              :       END DO
    3393        47640 :       CALL cp_cfm_to_fm(c_Gmat, Gmat)
    3394              : 
    3395              :       ! find eigenvector with highest eigenvalue
    3396        47640 :       CALL choose_eigv_solver(Gmat, evects, evals)
    3397       190560 :       i_max = MAXLOC(evals, 1)
    3398        47640 :       CALL cp_fm_get_element(evects, 1, i_max, x)
    3399        47640 :       CALL cp_fm_get_element(evects, 2, i_max, y)
    3400        47640 :       CALL cp_fm_get_element(evects, 3, i_max, z)
    3401        47640 :       IF (x < 0) THEN
    3402        47288 :          x = -x
    3403        47288 :          y = -y
    3404        47288 :          z = -z
    3405              :       END IF
    3406              : 
    3407              :       ! calculate the angles
    3408        47640 :       r = SQRT(x**2 + y**2 + z**2)  ! always 1
    3409        47640 :       c = SQRT((x + r)/(2*r))  ! always real
    3410        47640 :       s = (y - gaussi*z)/SQRT(2*r*(x + r)) ! always complex
    3411              : 
    3412        47640 :       CALL cp_fm_struct_release(h_fm_struct)
    3413        47640 :       CALL cp_fm_struct_release(G_fm_struct)
    3414        47640 :       CALL cp_cfm_release(hmat)
    3415        47640 :       CALL cp_cfm_release(c_Gmat)
    3416        47640 :       CALL cp_fm_release(Gmat)
    3417        47640 :       CALL cp_fm_release(evects)
    3418        47640 :       DEALLOCATE (hmat, c_Gmat, Gmat, evects)
    3419              : 
    3420        47640 :    END SUBROUTINE get_cardoso_angles
    3421              : 
    3422            0 : END MODULE qs_localization_methods
        

Generated by: LCOV version 2.0-1