Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2026 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Localization methods such as 2x2 Jacobi rotations
10 : !> Steepest Decents
11 : !> Conjugate Gradient
12 : !> \par History
13 : !> Initial parallellization of jacobi (JVDV 07.2003)
14 : !> direct minimization using exponential parametrization (JVDV 09.2003)
15 : !> crazy rotations go fast (JVDV 10.2003)
16 : !> \author CJM (04.2003)
17 : ! **************************************************************************************************
18 : MODULE qs_localization_methods
19 : USE bibliography, ONLY: Schreder2024_2,&
20 : cite_reference
21 : USE cell_types, ONLY: cell_type
22 : USE cp_blacs_env, ONLY: cp_blacs_env_type
23 : USE cp_cfm_basic_linalg, ONLY: cp_cfm_column_scale,&
24 : cp_cfm_gemm,&
25 : cp_cfm_rot_cols,&
26 : cp_cfm_rot_rows,&
27 : cp_cfm_scale,&
28 : cp_cfm_scale_and_add,&
29 : cp_cfm_schur_product,&
30 : cp_cfm_trace
31 : USE cp_cfm_diag, ONLY: cp_cfm_heevd
32 : USE cp_cfm_types, ONLY: &
33 : cp_cfm_create, cp_cfm_get_element, cp_cfm_get_info, cp_cfm_get_submatrix, cp_cfm_release, &
34 : cp_cfm_set_all, cp_cfm_set_element, cp_cfm_set_submatrix, cp_cfm_to_cfm, cp_cfm_to_fm, &
35 : cp_cfm_type, cp_fm_to_cfm
36 : USE cp_dbcsr_api, ONLY: dbcsr_p_type
37 : USE cp_dbcsr_operations, ONLY: cp_dbcsr_sm_fm_multiply
38 : USE cp_external_control, ONLY: external_control
39 : USE cp_fm_basic_linalg, ONLY: cp_fm_frobenius_norm,&
40 : cp_fm_pdgeqpf,&
41 : cp_fm_pdorgqr,&
42 : cp_fm_scale,&
43 : cp_fm_scale_and_add,&
44 : cp_fm_trace,&
45 : cp_fm_transpose,&
46 : cp_fm_triangular_multiply
47 : USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
48 : USE cp_fm_diag, ONLY: choose_eigv_solver,&
49 : cp_fm_syevd
50 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
51 : cp_fm_struct_get,&
52 : cp_fm_struct_release,&
53 : cp_fm_struct_type
54 : USE cp_fm_types, ONLY: &
55 : cp_fm_create, cp_fm_get_element, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_init_random, &
56 : cp_fm_maxabsrownorm, cp_fm_maxabsval, cp_fm_release, cp_fm_set_all, cp_fm_set_submatrix, &
57 : cp_fm_to_fm, cp_fm_to_fm_submat, cp_fm_type
58 : USE cp_log_handling, ONLY: cp_logger_get_default_io_unit,&
59 : cp_logger_get_default_unit_nr
60 : USE kahan_sum, ONLY: accurate_sum
61 : USE kinds, ONLY: dp
62 : USE machine, ONLY: m_flush,&
63 : m_walltime
64 : USE mathconstants, ONLY: gaussi,&
65 : pi,&
66 : twopi,&
67 : z_one,&
68 : z_zero
69 : USE matrix_exp, ONLY: exp_pade_real,&
70 : get_nsquare_norder
71 : USE message_passing, ONLY: mp_para_env_type
72 : USE parallel_gemm_api, ONLY: parallel_gemm
73 : #include "./base/base_uses.f90"
74 :
75 : IMPLICIT NONE
76 : PUBLIC :: initialize_weights, crazy_rotations, &
77 : direct_mini, rotate_orbitals, approx_l1_norm_sd, jacobi_rotations, scdm_qrfact, zij_matrix, &
78 : jacobi_cg_edf_ls, cardoso_souloumiac, cardoso_souloumiac_pipek
79 :
80 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
81 :
82 : PRIVATE
83 :
84 : TYPE set_c_1d_type
85 : COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array => NULL()
86 : END TYPE set_c_1d_type
87 :
88 : TYPE set_c_2d_type
89 : COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array => NULL()
90 : END TYPE set_c_2d_type
91 :
92 : CONTAINS
93 : ! **************************************************************************************************
94 : !> \brief ...
95 : !> \param C ...
96 : !> \param iterations ...
97 : !> \param eps ...
98 : !> \param converged ...
99 : !> \param sweeps ...
100 : ! **************************************************************************************************
101 210 : SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
102 : TYPE(cp_fm_type), INTENT(IN) :: C
103 : INTEGER, INTENT(IN) :: iterations
104 : REAL(KIND=dp), INTENT(IN) :: eps
105 : LOGICAL, INTENT(INOUT) :: converged
106 : INTEGER, INTENT(INOUT) :: sweeps
107 :
108 : CHARACTER(len=*), PARAMETER :: routineN = 'approx_l1_norm_sd'
109 : INTEGER, PARAMETER :: taylor_order = 100
110 : REAL(KIND=dp), PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
111 :
112 : INTEGER :: handle, i, istep, k, n, ncol_local, &
113 : nrow_local, output_unit, p
114 : REAL(KIND=dp) :: expfactor, f2, f2old, gnorm, tnorm
115 : TYPE(cp_blacs_env_type), POINTER :: context
116 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_k_k
117 : TYPE(cp_fm_type) :: CTmp, G, Gp1, Gp2, U
118 : TYPE(mp_para_env_type), POINTER :: para_env
119 :
120 30 : CALL timeset(routineN, handle)
121 :
122 30 : NULLIFY (context, para_env, fm_struct_k_k)
123 :
124 30 : output_unit = cp_logger_get_default_io_unit()
125 :
126 : CALL cp_fm_struct_get(C%matrix_struct, nrow_global=n, ncol_global=k, &
127 : nrow_local=nrow_local, ncol_local=ncol_local, &
128 30 : para_env=para_env, context=context)
129 : CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
130 30 : nrow_global=k, ncol_global=k)
131 30 : CALL cp_fm_create(CTmp, C%matrix_struct)
132 30 : CALL cp_fm_create(U, fm_struct_k_k)
133 30 : CALL cp_fm_create(G, fm_struct_k_k)
134 30 : CALL cp_fm_create(Gp1, fm_struct_k_k)
135 30 : CALL cp_fm_create(Gp2, fm_struct_k_k)
136 : !
137 : ! printing
138 30 : IF (output_unit > 0) THEN
139 15 : WRITE (output_unit, '(1X)')
140 15 : WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
141 15 : WRITE (output_unit, '(A,I5)') ' Nbr iterations =', iterations
142 15 : WRITE (output_unit, '(A,E10.2)') ' eps convergence =', eps
143 15 : WRITE (output_unit, '(A,I5)') ' Max Taylor order =', taylor_order
144 15 : WRITE (output_unit, '(A,E10.2)') ' f2 eps =', f2_eps
145 15 : WRITE (output_unit, '(A,E10.2)') ' alpha =', alpha
146 15 : WRITE (output_unit, '(A)') ' iteration approx_l1_norm g_norm rel_err'
147 : END IF
148 : !
149 30 : f2old = 0.0_dp
150 30 : converged = .FALSE.
151 : !
152 : ! Start the steepest descent
153 1480 : DO istep = 1, iterations
154 : !
155 : !-------------------------------------------------------------------
156 : ! compute f_2
157 : ! f_2(x)=(x^2+eps)^1/2
158 1470 : f2 = 0.0_dp
159 22748 : DO p = 1, ncol_local ! p
160 1100956 : DO i = 1, nrow_local ! i
161 1099486 : f2 = f2 + SQRT(C%local_data(i, p)**2 + f2_eps)
162 : END DO
163 : END DO
164 1470 : CALL C%matrix_struct%para_env%sum(f2)
165 : !write(*,*) 'qs_localize: f_2=',f2
166 : !-------------------------------------------------------------------
167 : ! compute the derivative of f_2
168 : ! f_2(x)=(x^2+eps)^1/2
169 22748 : DO p = 1, ncol_local ! p
170 1100956 : DO i = 1, nrow_local ! i
171 1099486 : CTmp%local_data(i, p) = C%local_data(i, p)/SQRT(C%local_data(i, p)**2 + f2_eps)
172 : END DO
173 : END DO
174 1470 : CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, CTmp, C, 0.0_dp, G)
175 : ! antisymmetrize
176 1470 : CALL cp_fm_transpose(G, U)
177 1470 : CALL cp_fm_scale_and_add(-0.5_dp, G, 0.5_dp, U)
178 : !
179 : !-------------------------------------------------------------------
180 : !
181 1470 : gnorm = cp_fm_frobenius_norm(G)
182 : !write(*,*) 'qs_localize: norm(G)=',gnorm
183 : !
184 : ! rescale for steepest descent
185 1470 : CALL cp_fm_scale(-alpha, G)
186 : !
187 : ! compute unitary transform
188 : ! zeroth order
189 1470 : CALL cp_fm_set_all(U, 0.0_dp, 1.0_dp)
190 : ! first order
191 1470 : expfactor = 1.0_dp
192 1470 : CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, G)
193 1470 : tnorm = cp_fm_frobenius_norm(G)
194 : !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
195 1470 : IF (tnorm > 1.0E-10_dp) THEN
196 : ! other orders
197 1470 : CALL cp_fm_to_fm(G, Gp1)
198 4866 : DO i = 2, taylor_order
199 : ! new power of G
200 4866 : CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, G, Gp1, 0.0_dp, Gp2)
201 4866 : CALL cp_fm_to_fm(Gp2, Gp1)
202 : ! add to the taylor expansion so far
203 4866 : expfactor = expfactor/REAL(i, KIND=dp)
204 4866 : CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, Gp1)
205 4866 : tnorm = cp_fm_frobenius_norm(Gp1)
206 : !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
207 4866 : IF (tnorm*expfactor < 1.0E-10_dp) EXIT
208 : END DO
209 : END IF
210 : !
211 : ! incrementaly rotate the MOs
212 1470 : CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, C, U, 0.0_dp, CTmp)
213 1470 : CALL cp_fm_to_fm(CTmp, C)
214 : !
215 : ! printing
216 1470 : IF (output_unit > 0) THEN
217 735 : WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, ABS((f2 - f2old)/f2)
218 : END IF
219 : !
220 : ! Are we done?
221 1470 : sweeps = istep
222 : !IF(gnorm<=grad_thresh.AND.ABS((f2-f2old)/f2)<=f2_thresh.AND.istep>1) THEN
223 1470 : IF (ABS((f2 - f2old)/f2) <= eps .AND. istep > 1) THEN
224 20 : converged = .TRUE.
225 20 : EXIT
226 : END IF
227 1460 : f2old = f2
228 : END DO
229 : !
230 : ! here we should do one refine step to enforce C'*S*C=1 for any case
231 : !
232 : ! Print the final result
233 30 : IF (output_unit > 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
234 : !
235 : ! sparsity
236 : !DO i=1,size(thresh,1)
237 : ! gnorm = 0.0_dp
238 : ! DO o=1,ncol_local
239 : ! DO p=1,nrow_local
240 : ! IF(ABS(C%local_data(p,o))>thresh(i)) THEN
241 : ! gnorm = gnorm + 1.0_dp
242 : ! ENDIF
243 : ! ENDDO
244 : ! ENDDO
245 : ! CALL C%matrix_struct%para_env%sum(gnorm)
246 : ! IF(output_unit>0) THEN
247 : ! WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
248 : ! ENDIF
249 : !ENDDO
250 : !
251 : ! deallocate
252 30 : CALL cp_fm_struct_release(fm_struct_k_k)
253 30 : CALL cp_fm_release(CTmp)
254 30 : CALL cp_fm_release(U)
255 30 : CALL cp_fm_release(G)
256 30 : CALL cp_fm_release(Gp1)
257 30 : CALL cp_fm_release(Gp2)
258 :
259 30 : CALL timestop(handle)
260 :
261 30 : END SUBROUTINE approx_l1_norm_sd
262 : ! **************************************************************************************************
263 : !> \brief ...
264 : !> \param cell ...
265 : !> \param weights ...
266 : ! **************************************************************************************************
267 504 : SUBROUTINE initialize_weights(cell, weights)
268 :
269 : TYPE(cell_type), POINTER :: cell
270 : REAL(KIND=dp), DIMENSION(:) :: weights
271 :
272 : REAL(KIND=dp), DIMENSION(3, 3) :: metric
273 :
274 504 : CPASSERT(ASSOCIATED(cell))
275 :
276 504 : metric = 0.0_dp
277 504 : CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
278 :
279 504 : weights(1) = METRIC(1, 1) - METRIC(1, 2) - METRIC(1, 3)
280 504 : weights(2) = METRIC(2, 2) - METRIC(1, 2) - METRIC(2, 3)
281 504 : weights(3) = METRIC(3, 3) - METRIC(1, 3) - METRIC(2, 3)
282 504 : weights(4) = METRIC(1, 2)
283 504 : weights(5) = METRIC(1, 3)
284 504 : weights(6) = METRIC(2, 3)
285 :
286 504 : END SUBROUTINE initialize_weights
287 :
288 : ! **************************************************************************************************
289 : !> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
290 : !> can deal with serial para_envs.
291 : !> \param weights ...
292 : !> \param zij ...
293 : !> \param vectors ...
294 : !> \param para_env ...
295 : !> \param max_iter ...
296 : !> \param eps_localization ...
297 : !> \param sweeps ...
298 : !> \param out_each ...
299 : !> \param target_time ...
300 : !> \param start_time ...
301 : !> \param restricted ...
302 : !> \par History
303 : !> \author Joost VandeVondele (02.2010)
304 : ! **************************************************************************************************
305 390 : SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
306 : eps_localization, sweeps, out_each, target_time, start_time, restricted)
307 :
308 : REAL(KIND=dp), INTENT(IN) :: weights(:)
309 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
310 : TYPE(mp_para_env_type), POINTER :: para_env
311 : INTEGER, INTENT(IN) :: max_iter
312 : REAL(KIND=dp), INTENT(IN) :: eps_localization
313 : INTEGER :: sweeps
314 : INTEGER, INTENT(IN) :: out_each
315 : REAL(dp) :: target_time, start_time
316 : INTEGER :: restricted
317 :
318 390 : IF (para_env%num_pe == 1) THEN
319 : CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
320 0 : sweeps, out_each, restricted=restricted)
321 : ELSE
322 : CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
323 390 : sweeps, out_each, target_time, start_time, restricted=restricted)
324 : END IF
325 :
326 390 : END SUBROUTINE jacobi_rotations
327 :
328 : ! **************************************************************************************************
329 : !> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
330 : !> while the routine below works in parallel, it is too slow to be useful
331 : !> \param weights ...
332 : !> \param zij ...
333 : !> \param vectors ...
334 : !> \param max_iter ...
335 : !> \param eps_localization ...
336 : !> \param sweeps ...
337 : !> \param out_each ...
338 : !> \param restricted ...
339 : ! **************************************************************************************************
340 0 : SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
341 : out_each, restricted)
342 : REAL(KIND=dp), INTENT(IN) :: weights(:)
343 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
344 : INTEGER, INTENT(IN) :: max_iter
345 : REAL(KIND=dp), INTENT(IN) :: eps_localization
346 : INTEGER :: sweeps
347 : INTEGER, INTENT(IN) :: out_each
348 : INTEGER :: restricted
349 :
350 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial'
351 :
352 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
353 : INTEGER :: dim2, handle, idim, istate, jstate, &
354 : nstate, unit_nr
355 : REAL(KIND=dp) :: ct, st, t1, t2, theta, tolerance
356 : TYPE(cp_cfm_type) :: c_rmat
357 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
358 : TYPE(cp_fm_type) :: rmat
359 :
360 0 : CALL timeset(routineN, handle)
361 :
362 0 : dim2 = SIZE(zij, 2)
363 0 : ALLOCATE (c_zij(dim2))
364 : NULLIFY (mii, mij, mjj)
365 0 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
366 :
367 0 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
368 0 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
369 :
370 0 : CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
371 0 : CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
372 0 : DO idim = 1, dim2
373 0 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
374 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
375 0 : zij(2, idim)%local_data, dp)
376 : END DO
377 :
378 0 : CALL cp_fm_get_info(rmat, nrow_global=nstate)
379 0 : tolerance = 1.0e10_dp
380 :
381 0 : sweeps = 0
382 0 : unit_nr = -1
383 0 : IF (rmat%matrix_struct%para_env%is_source()) THEN
384 0 : unit_nr = cp_logger_get_default_unit_nr()
385 0 : WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
386 : END IF
387 :
388 0 : IF (restricted > 0) THEN
389 0 : unit_nr = cp_logger_get_default_unit_nr()
390 0 : WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
391 0 : nstate = nstate - restricted
392 : END IF
393 :
394 : ! do jacobi sweeps until converged
395 0 : DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
396 0 : sweeps = sweeps + 1
397 0 : t1 = m_walltime()
398 :
399 0 : DO istate = 1, nstate
400 0 : DO jstate = istate + 1, nstate
401 0 : DO idim = 1, dim2
402 0 : CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
403 0 : CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
404 0 : CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
405 : END DO
406 0 : CALL get_angle(mii, mjj, mij, weights, theta)
407 0 : st = SIN(theta)
408 0 : ct = COS(theta)
409 0 : CALL rotate_zij(istate, jstate, st, ct, c_zij)
410 :
411 0 : CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
412 : END DO
413 : END DO
414 :
415 0 : CALL check_tolerance(c_zij, weights, tolerance)
416 :
417 0 : t2 = m_walltime()
418 0 : IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
419 : WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
420 0 : "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
421 0 : CALL m_flush(unit_nr)
422 : END IF
423 :
424 : END DO
425 :
426 0 : DO idim = 1, dim2
427 0 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
428 0 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
429 0 : CALL cp_cfm_release(c_zij(idim))
430 : END DO
431 0 : DEALLOCATE (c_zij)
432 0 : DEALLOCATE (mii, mij, mjj)
433 0 : rmat%local_data = REAL(c_rmat%local_data, dp)
434 :
435 0 : CALL rotate_orbitals(rmat, vectors)
436 :
437 0 : CALL cp_cfm_release(c_rmat)
438 0 : CALL cp_fm_release(rmat)
439 :
440 0 : CALL timestop(handle)
441 :
442 0 : END SUBROUTINE jacobi_rotations_serial
443 : ! **************************************************************************************************
444 : !> \brief very similar to jacobi_rotations_serial with some extra output options
445 : !> \param weights ...
446 : !> \param c_zij ...
447 : !> \param max_iter ...
448 : !> \param c_rmat ...
449 : !> \param eps_localization ...
450 : !> \param tol_out ...
451 : !> \param jsweeps ...
452 : !> \param out_each ...
453 : !> \param c_zij_out ...
454 : !> \param grad_final ...
455 : ! **************************************************************************************************
456 0 : SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
457 0 : tol_out, jsweeps, out_each, c_zij_out, grad_final)
458 : REAL(KIND=dp), INTENT(IN) :: weights(:)
459 : TYPE(cp_cfm_type), INTENT(IN) :: c_zij(:)
460 : INTEGER, INTENT(IN) :: max_iter
461 : TYPE(cp_cfm_type), INTENT(IN) :: c_rmat
462 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_localization
463 : REAL(KIND=dp), INTENT(OUT), OPTIONAL :: tol_out
464 : INTEGER, INTENT(OUT), OPTIONAL :: jsweeps
465 : INTEGER, INTENT(IN), OPTIONAL :: out_each
466 : TYPE(cp_cfm_type), INTENT(IN), OPTIONAL :: c_zij_out(:)
467 : TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER :: grad_final
468 :
469 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial_1'
470 :
471 : COMPLEX(KIND=dp) :: mzii
472 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
473 : INTEGER :: dim2, handle, idim, istate, jstate, &
474 : nstate, sweeps, unit_nr
475 : REAL(KIND=dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
476 : sum_spread_ii, t1, t2, theta, tolerance
477 : TYPE(cp_cfm_type) :: c_rmat_local
478 : TYPE(cp_cfm_type), ALLOCATABLE :: c_zij_local(:)
479 :
480 0 : CALL timeset(routineN, handle)
481 :
482 0 : dim2 = SIZE(c_zij)
483 : NULLIFY (mii, mij, mjj)
484 0 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
485 :
486 0 : ALLOCATE (c_zij_local(dim2))
487 0 : CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
488 0 : CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
489 0 : DO idim = 1, dim2
490 0 : CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
491 0 : c_zij_local(idim)%local_data = c_zij(idim)%local_data
492 : END DO
493 :
494 0 : CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
495 0 : tolerance = 1.0e10_dp
496 :
497 0 : IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
498 :
499 0 : sweeps = 0
500 0 : IF (PRESENT(out_each)) THEN
501 0 : unit_nr = -1
502 0 : IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
503 0 : unit_nr = cp_logger_get_default_unit_nr()
504 : END IF
505 0 : alpha = 0.0_dp
506 0 : DO idim = 1, dim2
507 0 : alpha = alpha + weights(idim)
508 : END DO
509 : END IF
510 :
511 : ! do jacobi sweeps until converged
512 0 : DO WHILE (sweeps < max_iter)
513 0 : sweeps = sweeps + 1
514 0 : IF (PRESENT(eps_localization)) THEN
515 0 : IF (tolerance < eps_localization) EXIT
516 : END IF
517 0 : IF (PRESENT(out_each)) t1 = m_walltime()
518 :
519 0 : DO istate = 1, nstate
520 0 : DO jstate = istate + 1, nstate
521 0 : DO idim = 1, dim2
522 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
523 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
524 0 : CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
525 : END DO
526 0 : CALL get_angle(mii, mjj, mij, weights, theta)
527 0 : st = SIN(theta)
528 0 : ct = COS(theta)
529 0 : CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
530 :
531 0 : CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
532 : END DO
533 : END DO
534 :
535 0 : IF (PRESENT(grad_final)) THEN
536 0 : CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
537 : ELSE
538 0 : CALL check_tolerance(c_zij_local, weights, tolerance)
539 : END IF
540 0 : IF (PRESENT(tol_out)) tol_out = tolerance
541 :
542 0 : IF (PRESENT(out_each)) THEN
543 0 : t2 = m_walltime()
544 0 : IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
545 0 : sum_spread_ii = 0.0_dp
546 0 : DO istate = 1, nstate
547 : spread_ii = 0.0_dp
548 0 : DO idim = 1, dim2
549 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
550 : spread_ii = spread_ii + weights(idim)* &
551 0 : ABS(mzii)**2/twopi/twopi
552 : END DO
553 0 : sum_spread_ii = sum_spread_ii + spread_ii
554 : END DO
555 0 : sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
556 0 : avg_spread_ii = sum_spread_ii/nstate
557 : WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
558 0 : "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
559 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
560 0 : sweeps, avg_spread_ii, tolerance, t2 - t1
561 0 : CALL m_flush(unit_nr)
562 : END IF
563 0 : IF (PRESENT(jsweeps)) jsweeps = sweeps
564 : END IF
565 :
566 : END DO
567 :
568 0 : IF (PRESENT(c_zij_out)) THEN
569 0 : DO idim = 1, dim2
570 0 : CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
571 : END DO
572 : END IF
573 0 : CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
574 :
575 0 : DEALLOCATE (mii, mij, mjj)
576 0 : DO idim = 1, dim2
577 0 : CALL cp_cfm_release(c_zij_local(idim))
578 : END DO
579 0 : DEALLOCATE (c_zij_local)
580 0 : CALL cp_cfm_release(c_rmat_local)
581 :
582 0 : CALL timestop(handle)
583 :
584 0 : END SUBROUTINE jacobi_rotations_serial_1
585 : ! **************************************************************************************************
586 : !> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
587 : !> for partially occupied wannier functions
588 : !> \param para_env ...
589 : !> \param weights ...
590 : !> \param zij ...
591 : !> \param vectors ...
592 : !> \param max_iter ...
593 : !> \param eps_localization ...
594 : !> \param iter ...
595 : !> \param out_each ...
596 : !> \param nextra ...
597 : !> \param do_cg ...
598 : !> \param nmo ...
599 : !> \param vectors_2 ...
600 : !> \param mos_guess ...
601 : ! **************************************************************************************************
602 2 : SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
603 : iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
604 : TYPE(mp_para_env_type), POINTER :: para_env
605 : REAL(KIND=dp), INTENT(IN) :: weights(:)
606 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
607 : INTEGER, INTENT(IN) :: max_iter
608 : REAL(KIND=dp), INTENT(IN) :: eps_localization
609 : INTEGER :: iter
610 : INTEGER, INTENT(IN) :: out_each, nextra
611 : LOGICAL, INTENT(IN) :: do_cg
612 : INTEGER, INTENT(IN), OPTIONAL :: nmo
613 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: vectors_2, mos_guess
614 :
615 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_cg_edf_ls'
616 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
617 : czero = (0.0_dp, 0.0_dp)
618 : REAL(KIND=dp), PARAMETER :: gold_sec = 0.3819_dp
619 :
620 : COMPLEX(KIND=dp) :: cnorm2_Gct, cnorm2_Gct_cross, mzii
621 2 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_cmat
622 2 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: arr_zii
623 2 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: matrix_zii
624 : INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
625 : lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
626 : INTEGER, DIMENSION(1) :: iloc
627 : LOGICAL :: do_cinit_mo, do_cinit_random, &
628 : do_U_guess_mo, new_direction
629 : REAL(KIND=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_Gct, &
630 : norm2_Gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
631 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: sum_spread
632 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, tmp_mat_1
633 : REAL(KIND=dp), DIMENSION(50) :: energy, pos
634 2 : REAL(KIND=dp), DIMENSION(:), POINTER :: tmp_arr
635 : TYPE(cp_blacs_env_type), POINTER :: context
636 : TYPE(cp_cfm_type) :: c_tilde, ctrans_lambda, Gct_old, &
637 : grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
638 : tmp_cfm_2, U, UL, V, VL, zdiag
639 2 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij, zij_0
640 : TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
641 : TYPE(cp_fm_type) :: id_nextra, matrix_U, matrix_V, &
642 : matrix_V_all, rmat, tmp_fm, vectors_all
643 :
644 2 : CALL timeset(routineN, handle)
645 :
646 2 : dim2 = SIZE(zij, 2)
647 2 : NULLIFY (context)
648 2 : NULLIFY (matrix_zii, arr_zii)
649 2 : NULLIFY (tmp_fm_struct)
650 2 : NULLIFY (tmp_arr)
651 :
652 12 : ALLOCATE (c_zij(dim2))
653 :
654 2 : CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
655 :
656 6 : ALLOCATE (sum_spread(nstate))
657 8 : ALLOCATE (matrix_zii(nstate, dim2))
658 266 : matrix_zii = czero
659 2 : sum_spread = 0.0_dp
660 :
661 2 : alpha = 0.0_dp
662 8 : DO idim = 1, dim2
663 6 : alpha = alpha + weights(idim)
664 6 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
665 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
666 5813 : zij(2, idim)%local_data, dp)
667 : END DO
668 :
669 10 : ALLOCATE (zij_0(dim2))
670 :
671 2 : CALL cp_cfm_create(U, zij(1, 1)%matrix_struct)
672 2 : CALL cp_fm_create(matrix_U, zij(1, 1)%matrix_struct)
673 :
674 2 : CALL cp_cfm_set_all(U, czero, cone)
675 2 : CALL cp_fm_set_all(matrix_U, 0.0_dp, 1.0_dp)
676 :
677 2 : CALL cp_fm_get_info(vectors, nrow_global=nao)
678 2 : IF (nextra > 0) THEN
679 2 : IF (PRESENT(mos_guess)) THEN
680 2 : do_cinit_random = .FALSE.
681 2 : do_cinit_mo = .TRUE.
682 2 : CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
683 : ELSE
684 0 : do_cinit_random = .TRUE.
685 0 : do_cinit_mo = .FALSE.
686 0 : ndummy = nstate
687 : END IF
688 :
689 : IF (do_cinit_random) THEN
690 : icinit = 1
691 : do_U_guess_mo = .FALSE.
692 : ELSEIF (do_cinit_mo) THEN
693 2 : icinit = 2
694 2 : do_U_guess_mo = .TRUE.
695 : END IF
696 :
697 2 : nocc = nstate - nextra
698 2 : northo = nmo - nocc
699 2 : norextra = nmo - nstate
700 2 : CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
701 :
702 8 : ALLOCATE (tmp_cmat(nstate, nstate))
703 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
704 2 : para_env=para_env, context=context)
705 8 : DO idim = 1, dim2
706 6 : CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
707 6 : CALL cp_cfm_set_all(zij_0(idim), czero, cone)
708 6 : CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
709 8 : CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
710 : END DO
711 2 : CALL cp_fm_struct_release(tmp_fm_struct)
712 2 : DEALLOCATE (tmp_cmat)
713 :
714 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
715 2 : para_env=para_env, context=context)
716 2 : CALL cp_cfm_create(V, tmp_fm_struct)
717 2 : CALL cp_fm_create(matrix_V, tmp_fm_struct)
718 2 : CALL cp_cfm_create(zdiag, tmp_fm_struct)
719 2 : CALL cp_fm_create(rmat, tmp_fm_struct)
720 2 : CALL cp_fm_struct_release(tmp_fm_struct)
721 2 : CALL cp_cfm_set_all(V, czero, cone)
722 2 : CALL cp_fm_set_all(matrix_V, 0.0_dp, 1.0_dp)
723 :
724 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
725 2 : para_env=para_env, context=context)
726 2 : CALL cp_fm_create(matrix_V_all, tmp_fm_struct)
727 2 : CALL cp_fm_struct_release(tmp_fm_struct)
728 2 : CALL cp_fm_set_all(matrix_V_all, 0._dp, 1._dp)
729 :
730 6 : ALLOCATE (arr_zii(nstate))
731 :
732 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
733 2 : para_env=para_env, context=context)
734 2 : CALL cp_cfm_create(c_tilde, tmp_fm_struct)
735 2 : CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
736 2 : CALL cp_cfm_create(Gct_old, tmp_fm_struct)
737 2 : CALL cp_cfm_create(skc, tmp_fm_struct)
738 2 : CALL cp_fm_struct_release(tmp_fm_struct)
739 2 : CALL cp_cfm_set_all(c_tilde, czero)
740 2 : CALL cp_cfm_set_all(Gct_old, czero)
741 2 : CALL cp_cfm_set_all(skc, czero)
742 :
743 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
744 2 : para_env=para_env, context=context)
745 2 : CALL cp_cfm_create(VL, tmp_fm_struct)
746 2 : CALL cp_cfm_set_all(VL, czero)
747 2 : CALL cp_fm_struct_release(tmp_fm_struct)
748 :
749 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
750 2 : para_env=para_env, context=context)
751 2 : CALL cp_fm_create(id_nextra, tmp_fm_struct)
752 2 : CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
753 2 : CALL cp_fm_struct_release(tmp_fm_struct)
754 2 : CALL cp_cfm_set_all(ctrans_lambda, czero)
755 2 : CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
756 :
757 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
758 2 : para_env=para_env, context=context)
759 2 : CALL cp_cfm_create(UL, tmp_fm_struct)
760 2 : CALL cp_fm_struct_release(tmp_fm_struct)
761 2 : CALL cp_cfm_set_all(UL, czero)
762 :
763 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
764 2 : para_env=para_env, context=context)
765 2 : CALL cp_fm_create(vectors_all, tmp_fm_struct)
766 2 : CALL cp_fm_struct_release(tmp_fm_struct)
767 8 : ALLOCATE (tmp_mat(nao, nstate))
768 2 : CALL cp_fm_get_submatrix(vectors, tmp_mat)
769 2 : CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
770 2 : DEALLOCATE (tmp_mat)
771 8 : ALLOCATE (tmp_mat(nao, norextra))
772 2 : CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
773 2 : CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
774 2 : DEALLOCATE (tmp_mat)
775 :
776 : ! initialize c_tilde
777 14 : SELECT CASE (icinit)
778 : CASE (1) ! random coefficients
779 : !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
780 0 : CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
781 0 : CALL cp_fm_init_random(tmp_fm, nextra)
782 0 : CALL ortho_vectors(tmp_fm)
783 0 : c_tilde%local_data = tmp_fm%local_data
784 0 : CALL cp_fm_release(tmp_fm)
785 0 : ALLOCATE (tmp_cmat(northo, nextra))
786 0 : CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
787 0 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
788 0 : DEALLOCATE (tmp_cmat)
789 : CASE (2) ! MO based coeffs
790 2 : CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_V_all)
791 6 : ALLOCATE (tmp_arr(nmo))
792 8 : ALLOCATE (tmp_mat(nmo, ndummy))
793 8 : ALLOCATE (tmp_mat_1(nmo, nstate))
794 : ! normalize matrix_V_all
795 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat)
796 88 : DO istate = 1, ndummy
797 4558 : tmp_arr(:) = tmp_mat(:, istate)
798 4558 : norm = NORM2(tmp_arr)
799 4558 : tmp_arr(:) = tmp_arr(:)/norm
800 4560 : tmp_mat(:, istate) = tmp_arr(:)
801 : END DO
802 2 : CALL cp_fm_set_submatrix(matrix_V_all, tmp_mat)
803 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat_1, 1, 1, nmo, nstate)
804 2 : CALL cp_fm_set_submatrix(matrix_V, tmp_mat_1)
805 2 : DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
806 2 : CALL cp_fm_to_cfm(msourcer=matrix_V, mtarget=V)
807 8 : ALLOCATE (tmp_mat(northo, ndummy))
808 8 : ALLOCATE (tmp_mat_1(northo, nextra))
809 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat, nocc + 1, 1, northo, ndummy)
810 6 : ALLOCATE (tmp_arr(ndummy))
811 88 : tmp_arr = 0.0_dp
812 88 : DO istate = 1, ndummy
813 1034 : tmp_arr(istate) = NORM2(tmp_mat(:, istate))
814 : END DO
815 : ! find edfs
816 6 : DO istate = 1, nextra
817 180 : iloc = MAXLOC(tmp_arr)
818 48 : tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
819 6 : tmp_arr(iloc(1)) = 0.0_dp
820 : END DO
821 :
822 2 : DEALLOCATE (tmp_arr, tmp_mat)
823 :
824 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
825 2 : para_env=para_env, context=context)
826 2 : CALL cp_fm_create(tmp_fm, tmp_fm_struct)
827 2 : CALL cp_fm_struct_release(tmp_fm_struct)
828 2 : CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
829 2 : DEALLOCATE (tmp_mat_1)
830 2 : CALL ortho_vectors(tmp_fm)
831 2 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
832 2 : CALL cp_fm_release(tmp_fm)
833 : ! initialize U
834 2 : IF (do_U_guess_mo) THEN
835 8 : ALLOCATE (tmp_cmat(nocc, nstate))
836 2 : CALL cp_cfm_get_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
837 2 : CALL cp_cfm_set_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
838 2 : DEALLOCATE (tmp_cmat)
839 8 : ALLOCATE (tmp_cmat(northo, nstate))
840 2 : CALL cp_cfm_get_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
841 2 : CALL cp_cfm_set_submatrix(VL, tmp_cmat, 1, 1, northo, nstate)
842 2 : DEALLOCATE (tmp_cmat)
843 2 : CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, VL, czero, UL)
844 8 : ALLOCATE (tmp_cmat(nextra, nstate))
845 2 : CALL cp_cfm_get_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
846 2 : CALL cp_cfm_set_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
847 2 : DEALLOCATE (tmp_cmat)
848 2 : CALL cp_fm_create(tmp_fm, U%matrix_struct)
849 1937 : tmp_fm%local_data = REAL(U%local_data, KIND=dp)
850 2 : CALL ortho_vectors(tmp_fm)
851 2 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=U)
852 2 : CALL cp_fm_release(tmp_fm)
853 4 : CALL cp_cfm_to_fm(U, matrix_U)
854 : END IF
855 : ! reevaluate V
856 8 : ALLOCATE (tmp_cmat(nocc, nstate))
857 2 : CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
858 2 : CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
859 2 : DEALLOCATE (tmp_cmat)
860 8 : ALLOCATE (tmp_cmat(nextra, nstate))
861 2 : CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
862 2 : CALL cp_cfm_set_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
863 2 : DEALLOCATE (tmp_cmat)
864 2 : CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
865 8 : ALLOCATE (tmp_cmat(northo, nstate))
866 2 : CALL cp_cfm_get_submatrix(VL, tmp_cmat)
867 2 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
868 6 : DEALLOCATE (tmp_cmat)
869 : END SELECT
870 : ELSE
871 0 : DO idim = 1, dim2
872 0 : CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
873 0 : CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
874 : END DO
875 0 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
876 0 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
877 : END IF
878 :
879 2 : unit_nr = -1
880 2 : IF (rmat%matrix_struct%para_env%is_source()) THEN
881 1 : unit_nr = cp_logger_get_default_unit_nr()
882 1 : WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
883 : END IF
884 :
885 2 : norm2_old = 1.0E30_dp
886 2 : ds_min = 1.0_dp
887 2 : new_direction = .TRUE.
888 2 : iter = 0
889 2 : line_searches = 0
890 2 : line_search_count = 0
891 2 : tol = 1.0E+20_dp
892 2 : mintol = 1.0E+10_dp
893 2 : miniter = 0
894 :
895 : !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
896 : ! do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
897 :
898 : ! do conjugate gradient until converged
899 34 : DO WHILE (iter < max_iter)
900 34 : iter = iter + 1
901 : !WRITE(*,*) 'iter = ', iter
902 34 : t1 = m_walltime()
903 :
904 34 : IF (iter > 1) THEN
905 : ! comput U
906 32 : CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
907 32 : CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
908 32 : IF (para_env%num_pe == 1) THEN
909 0 : CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
910 : ELSE
911 32 : CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
912 : END IF
913 32 : CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, U, tmp_cfm_2, czero, tmp_cfm)
914 32 : CALL cp_cfm_to_cfm(tmp_cfm, U)
915 32 : CALL cp_cfm_release(tmp_cfm)
916 32 : CALL cp_cfm_release(tmp_cfm_2)
917 : END IF
918 :
919 34 : IF (nextra > 0) THEN
920 136 : ALLOCATE (tmp_cmat(nextra, nstate))
921 34 : CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
922 34 : CALL cp_cfm_set_submatrix(UL, tmp_cmat)
923 34 : DEALLOCATE (tmp_cmat)
924 34 : IF (iter > 1) THEN
925 : ! orthonormalize c_tilde
926 32 : CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
927 448 : tmp_fm%local_data = REAL(c_tilde%local_data, KIND=dp)
928 32 : CALL ortho_vectors(tmp_fm)
929 32 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
930 32 : CALL cp_fm_release(tmp_fm)
931 :
932 128 : ALLOCATE (tmp_cmat(nocc, nstate))
933 32 : CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
934 32 : CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
935 32 : DEALLOCATE (tmp_cmat)
936 32 : CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
937 128 : ALLOCATE (tmp_cmat(northo, nstate))
938 32 : CALL cp_cfm_get_submatrix(VL, tmp_cmat)
939 32 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
940 64 : DEALLOCATE (tmp_cmat)
941 : END IF
942 :
943 : ! reset if new_direction
944 34 : IF (new_direction .AND. MOD(line_searches, 20) == 5) THEN
945 0 : CALL cp_cfm_set_all(skc, czero)
946 0 : CALL cp_cfm_set_all(Gct_old, czero)
947 0 : norm2_old = 1.0E30_dp
948 : END IF
949 :
950 34 : CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
951 34 : CALL cp_cfm_to_cfm(V, tmp_cfm)
952 34 : CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
953 68 : ndummy = nmo
954 : ELSE
955 0 : CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
956 0 : CALL cp_cfm_to_cfm(U, tmp_cfm)
957 0 : CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
958 0 : ndummy = nstate
959 : END IF
960 : ! update z_ij
961 136 : DO idim = 1, dim2
962 : ! 'tmp_cfm_1 = zij_0*tmp_cfm'
963 : CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
964 102 : tmp_cfm, czero, tmp_cfm_1)
965 : ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
966 : CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
967 136 : czero, c_zij(idim))
968 : END DO
969 34 : CALL cp_cfm_release(tmp_cfm)
970 34 : CALL cp_cfm_release(tmp_cfm_1)
971 : ! compute spread
972 1496 : DO istate = 1, nstate
973 1462 : spread_ii = 0.0_dp
974 5848 : DO idim = 1, dim2
975 4386 : CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
976 : spread_ii = spread_ii + weights(idim)* &
977 4386 : ABS(mzii)**2/twopi/twopi
978 5848 : matrix_zii(istate, idim) = mzii
979 : END DO
980 : !WRITE(*,*) 'spread_ii', spread_ii
981 1496 : sum_spread(istate) = spread_ii
982 : END DO
983 34 : CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
984 34 : spread_sum = accurate_sum(sum_spread)
985 :
986 34 : IF (nextra > 0) THEN
987 : ! update c_tilde
988 34 : CALL cp_cfm_set_all(zdiag, czero)
989 34 : CALL cp_cfm_set_all(grad_ctilde, czero)
990 34 : CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
991 34 : CALL cp_cfm_set_all(tmp_cfm, czero)
992 34 : CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
993 34 : CALL cp_cfm_set_all(tmp_cfm_1, czero)
994 136 : ALLOCATE (tmp_cmat(northo, nstate))
995 136 : DO idim = 1, dim2
996 102 : weight = weights(idim)
997 8874 : arr_zii = matrix_zii(:, idim)
998 : ! tmp_cfm = zij_0*V
999 : CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
1000 102 : zij_0(idim), V, czero, tmp_cfm)
1001 : ! tmp_cfm = tmp_cfm*diag_zij_dagg
1002 4488 : CALL cp_cfm_column_scale(tmp_cfm, CONJG(arr_zii))
1003 : ! tmp_cfm_1 = tmp_cfm*U_dagg
1004 : CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1005 102 : U, czero, tmp_cfm_1)
1006 102 : CALL cp_cfm_scale(weight, tmp_cfm_1)
1007 : ! zdiag = zdiag + tmp_cfm_1'
1008 102 : CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1009 :
1010 : ! tmp_cfm = zij_0_dagg*V
1011 : CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
1012 102 : zij_0(idim), V, czero, tmp_cfm)
1013 :
1014 : ! tmp_cfm = tmp_cfm*diag_zij
1015 102 : CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
1016 : ! tmp_cfm_1 = tmp_cfm*U_dagg
1017 : CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1018 102 : U, czero, tmp_cfm_1)
1019 102 : CALL cp_cfm_scale(weight, tmp_cfm_1)
1020 : ! zdiag = zdiag + tmp_cfm_1'
1021 136 : CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1022 : END DO ! idim
1023 34 : CALL cp_cfm_release(tmp_cfm)
1024 34 : CALL cp_cfm_release(tmp_cfm_1)
1025 34 : DEALLOCATE (tmp_cmat)
1026 136 : ALLOCATE (tmp_cmat(northo, nextra))
1027 : CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
1028 34 : northo, nextra, .FALSE.)
1029 : ! 'grad_ctilde'
1030 34 : CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
1031 34 : DEALLOCATE (tmp_cmat)
1032 : ! ctrans_lambda = c_tilde_dagg*grad_ctilde
1033 34 : CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1034 : !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
1035 : ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
1036 102 : CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1037 : END IF ! nextra > 0
1038 :
1039 : ! tolerance
1040 34 : IF (nextra > 0) THEN
1041 : tolc = 0.0_dp
1042 34 : CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
1043 34 : CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
1044 34 : CALL cp_fm_maxabsval(tmp_fm, tolc)
1045 34 : CALL cp_fm_release(tmp_fm)
1046 : !WRITE(*,*) 'tolc = ', tolc
1047 34 : tol = tol + tolc
1048 : END IF
1049 : !WRITE(*,*) 'tol = ', tol
1050 :
1051 36 : IF (nextra > 0) THEN
1052 : !WRITE(*,*) 'new_direction: ', new_direction
1053 34 : IF (new_direction) THEN
1054 6 : line_searches = line_searches + 1
1055 6 : IF (mintol > tol) THEN
1056 4 : mintol = tol
1057 4 : miniter = iter
1058 : END IF
1059 :
1060 6 : IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
1061 0 : sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1062 0 : avg_spread_ii = sum_spread_ii/nstate
1063 : WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1064 0 : "Iteration", "Avg. Spread_ii", "Tolerance"
1065 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1066 0 : iter, avg_spread_ii, tol
1067 0 : CALL m_flush(unit_nr)
1068 : END IF
1069 6 : IF (tol < eps_localization) EXIT
1070 :
1071 4 : IF (do_cg) THEN
1072 : cnorm2_Gct = czero
1073 : cnorm2_Gct_cross = czero
1074 4 : CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct_cross)
1075 4 : norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
1076 56 : Gct_old%local_data = grad_ctilde%local_data
1077 4 : CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct)
1078 4 : norm2_Gct = REAL(cnorm2_Gct, KIND=dp)
1079 : ! compute beta_pr
1080 4 : beta_pr = (norm2_Gct - norm2_Gct_cross)/norm2_old
1081 4 : norm2_old = norm2_Gct
1082 4 : beta = MAX(0.0_dp, beta_pr)
1083 : !WRITE(*,*) 'beta = ', beta
1084 : ! compute skc / ska = beta * skc / ska + grad_ctilde / G
1085 4 : CALL cp_cfm_scale(beta, skc)
1086 4 : CALL cp_cfm_scale_and_add(cone, skc, cone, Gct_old)
1087 4 : CALL cp_cfm_trace(skc, Gct_old, cnorm2_Gct_cross)
1088 4 : norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
1089 4 : IF (norm2_Gct_cross <= 0.0_dp) THEN ! back to steepest ascent
1090 0 : CALL cp_cfm_scale_and_add(czero, skc, cone, Gct_old)
1091 : END IF
1092 : ELSE
1093 0 : CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
1094 : END IF
1095 : line_search_count = 0
1096 : END IF
1097 :
1098 32 : line_search_count = line_search_count + 1
1099 : !WRITE(*,*) 'line_search_count = ', line_search_count
1100 32 : energy(line_search_count) = spread_sum
1101 :
1102 : ! gold line search
1103 32 : new_direction = .FALSE.
1104 32 : IF (line_search_count == 1) THEN
1105 4 : lsl = 1
1106 4 : lsr = 0
1107 4 : lsm = 1
1108 4 : pos(1) = 0.0_dp
1109 4 : pos(2) = ds_min/gold_sec
1110 4 : ds = pos(2)
1111 : ELSE
1112 28 : IF (line_search_count == 50) THEN
1113 0 : IF (ABS(energy(line_search_count) - energy(line_search_count - 1)) < 1.0E-4_dp) THEN
1114 0 : CPWARN("Line search failed to converge properly")
1115 0 : ds_min = 0.1_dp
1116 0 : new_direction = .TRUE.
1117 : ds = pos(line_search_count)
1118 0 : line_search_count = 0
1119 : ELSE
1120 0 : CPABORT("No. of line searches exceeds 50")
1121 : END IF
1122 : ELSE
1123 28 : IF (lsr == 0) THEN
1124 28 : IF (energy(line_search_count - 1) > energy(line_search_count)) THEN
1125 0 : lsr = line_search_count
1126 0 : pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1127 : ELSE
1128 28 : lsl = lsm
1129 28 : lsm = line_search_count
1130 28 : pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1131 : END IF
1132 : ELSE
1133 0 : IF (pos(line_search_count) < pos(lsm)) THEN
1134 0 : IF (energy(line_search_count) > energy(lsm)) THEN
1135 : lsr = lsm
1136 : lsm = line_search_count
1137 : ELSE
1138 0 : lsl = line_search_count
1139 : END IF
1140 : ELSE
1141 0 : IF (energy(line_search_count) > energy(lsm)) THEN
1142 : lsl = lsm
1143 : lsm = line_search_count
1144 : ELSE
1145 0 : lsr = line_search_count
1146 : END IF
1147 : END IF
1148 0 : IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
1149 0 : pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1150 : ELSE
1151 0 : pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1152 : END IF
1153 0 : IF ((pos(lsr) - pos(lsl)) < 1.0E-3_dp*pos(lsr)) THEN
1154 0 : new_direction = .TRUE.
1155 : END IF
1156 : END IF ! lsr .eq. 0
1157 : END IF ! line_search_count .eq. 50
1158 : ! now go to the suggested point
1159 28 : ds = pos(line_search_count + 1) - pos(line_search_count)
1160 : !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
1161 28 : IF ((ABS(ds) < 1.0E-10_dp) .AND. (lsl == 1)) THEN
1162 0 : new_direction = .TRUE.
1163 0 : ds_min = 0.5_dp/alpha
1164 28 : ELSEIF (ABS(ds) > 10.0_dp) THEN
1165 4 : new_direction = .TRUE.
1166 4 : ds_min = 0.5_dp/alpha
1167 : ELSE
1168 : ds_min = pos(line_search_count + 1)
1169 : END IF
1170 : END IF ! first step
1171 : ! 'c_tilde = c_tilde + d*skc'
1172 32 : CALL cp_cfm_scale(ds, skc)
1173 32 : CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
1174 : ELSE
1175 0 : IF (mintol > tol) THEN
1176 0 : mintol = tol
1177 0 : miniter = iter
1178 : END IF
1179 0 : IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
1180 0 : sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1181 0 : avg_spread_ii = sum_spread_ii/nstate
1182 : WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1183 0 : "Iteration", "Avg. Spread_ii", "Tolerance"
1184 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1185 0 : iter, avg_spread_ii, tol
1186 0 : CALL m_flush(unit_nr)
1187 : END IF
1188 0 : IF (tol < eps_localization) EXIT
1189 : END IF ! nextra > 0
1190 :
1191 : END DO ! iteration
1192 :
1193 2 : IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
1194 0 : WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
1195 0 : WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
1196 0 : CALL m_flush(unit_nr)
1197 : END IF
1198 :
1199 2 : CALL cp_cfm_to_fm(U, matrix_U)
1200 :
1201 2 : IF (nextra > 0) THEN
1202 2324 : rmat%local_data = REAL(V%local_data, KIND=dp)
1203 2 : CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1204 :
1205 2 : CALL cp_cfm_release(c_tilde)
1206 2 : CALL cp_cfm_release(grad_ctilde)
1207 2 : CALL cp_cfm_release(Gct_old)
1208 2 : CALL cp_cfm_release(skc)
1209 2 : CALL cp_cfm_release(UL)
1210 2 : CALL cp_cfm_release(zdiag)
1211 2 : CALL cp_cfm_release(ctrans_lambda)
1212 2 : CALL cp_fm_release(id_nextra)
1213 2 : CALL cp_fm_release(vectors_all)
1214 2 : CALL cp_cfm_release(V)
1215 2 : CALL cp_fm_release(matrix_V)
1216 2 : CALL cp_fm_release(matrix_V_all)
1217 2 : CALL cp_cfm_release(VL)
1218 2 : DEALLOCATE (arr_zii)
1219 : ELSE
1220 0 : rmat%local_data = matrix_U%local_data
1221 0 : CALL rotate_orbitals(rmat, vectors)
1222 : END IF
1223 8 : DO idim = 1, dim2
1224 8 : CALL cp_cfm_release(zij_0(idim))
1225 : END DO
1226 2 : DEALLOCATE (zij_0)
1227 :
1228 8 : DO idim = 1, dim2
1229 5811 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
1230 5811 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
1231 8 : CALL cp_cfm_release(c_zij(idim))
1232 : END DO
1233 2 : DEALLOCATE (c_zij)
1234 2 : CALL cp_fm_release(rmat)
1235 2 : CALL cp_cfm_release(U)
1236 2 : CALL cp_fm_release(matrix_U)
1237 2 : DEALLOCATE (matrix_zii, sum_spread)
1238 :
1239 2 : CALL timestop(handle)
1240 :
1241 10 : END SUBROUTINE jacobi_cg_edf_ls
1242 :
1243 : ! **************************************************************************************************
1244 : !> \brief ...
1245 : !> \param vmatrix ...
1246 : ! **************************************************************************************************
1247 108 : SUBROUTINE ortho_vectors(vmatrix)
1248 :
1249 : TYPE(cp_fm_type), INTENT(IN) :: vmatrix
1250 :
1251 : CHARACTER(LEN=*), PARAMETER :: routineN = 'ortho_vectors'
1252 :
1253 : INTEGER :: handle, n, ncol
1254 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
1255 : TYPE(cp_fm_type) :: overlap_vv
1256 :
1257 36 : CALL timeset(routineN, handle)
1258 :
1259 36 : NULLIFY (fm_struct_tmp)
1260 :
1261 36 : CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1262 :
1263 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
1264 : para_env=vmatrix%matrix_struct%para_env, &
1265 36 : context=vmatrix%matrix_struct%context)
1266 36 : CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
1267 36 : CALL cp_fm_struct_release(fm_struct_tmp)
1268 :
1269 36 : CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1270 36 : CALL cp_fm_cholesky_decompose(overlap_vv)
1271 36 : CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.TRUE.)
1272 :
1273 36 : CALL cp_fm_release(overlap_vv)
1274 :
1275 36 : CALL timestop(handle)
1276 :
1277 36 : END SUBROUTINE ortho_vectors
1278 :
1279 : ! **************************************************************************************************
1280 : !> \brief ...
1281 : !> \param istate ...
1282 : !> \param jstate ...
1283 : !> \param st ...
1284 : !> \param ct ...
1285 : !> \param zij ...
1286 : ! **************************************************************************************************
1287 0 : SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1288 : INTEGER, INTENT(IN) :: istate, jstate
1289 : REAL(KIND=dp), INTENT(IN) :: st, ct
1290 : TYPE(cp_cfm_type) :: zij(:)
1291 :
1292 : INTEGER :: id
1293 :
1294 : ! Locals
1295 :
1296 0 : DO id = 1, SIZE(zij, 1)
1297 0 : CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
1298 0 : CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
1299 : END DO
1300 :
1301 0 : END SUBROUTINE rotate_zij
1302 : ! **************************************************************************************************
1303 : !> \brief ...
1304 : !> \param istate ...
1305 : !> \param jstate ...
1306 : !> \param st ...
1307 : !> \param ct ...
1308 : !> \param rmat ...
1309 : ! **************************************************************************************************
1310 0 : SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1311 : INTEGER, INTENT(IN) :: istate, jstate
1312 : REAL(KIND=dp), INTENT(IN) :: st, ct
1313 : TYPE(cp_cfm_type), INTENT(IN) :: rmat
1314 :
1315 0 : CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
1316 :
1317 0 : END SUBROUTINE rotate_rmat
1318 : ! **************************************************************************************************
1319 : !> \brief ...
1320 : !> \param mii ...
1321 : !> \param mjj ...
1322 : !> \param mij ...
1323 : !> \param weights ...
1324 : !> \param theta ...
1325 : !> \param grad_ij ...
1326 : !> \param step ...
1327 : ! **************************************************************************************************
1328 2326264 : SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1329 : COMPLEX(KIND=dp), POINTER :: mii(:), mjj(:), mij(:)
1330 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1331 : REAL(KIND=dp), INTENT(OUT) :: theta
1332 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: grad_ij, step
1333 :
1334 : COMPLEX(KIND=dp) :: z11, z12, z22
1335 : INTEGER :: dim_m, idim
1336 : REAL(KIND=dp) :: a12, b12, d2, ratio
1337 :
1338 2326264 : a12 = 0.0_dp
1339 2326264 : b12 = 0.0_dp
1340 2326264 : dim_m = SIZE(mii)
1341 9411550 : DO idim = 1, dim_m
1342 7085286 : z11 = mii(idim)
1343 7085286 : z22 = mjj(idim)
1344 7085286 : z12 = mij(idim)
1345 7085286 : a12 = a12 + weights(idim)*REAL(CONJG(z12)*(z11 - z22), KIND=dp)
1346 : b12 = b12 + weights(idim)*REAL((z12*CONJG(z12) - &
1347 9411550 : 0.25_dp*(z11 - z22)*(CONJG(z11) - CONJG(z22))), KIND=dp)
1348 : END DO
1349 2326264 : IF (ABS(b12) > 1.e-10_dp) THEN
1350 2326264 : ratio = -a12/b12
1351 2326264 : theta = 0.25_dp*ATAN(ratio)
1352 0 : ELSEIF (ABS(b12) < 1.e-10_dp) THEN
1353 0 : b12 = 0.0_dp
1354 0 : theta = 0.0_dp
1355 : ELSE
1356 0 : theta = 0.25_dp*pi
1357 : END IF
1358 2326264 : IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
1359 : ! Check second derivative info
1360 2326264 : d2 = a12*SIN(4._dp*theta) - b12*COS(4._dp*theta)
1361 2326264 : IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
1362 3262 : IF (theta > 0.0_dp) THEN ! make theta as small as possible
1363 1619 : theta = theta - 0.25_dp*pi
1364 : ELSE
1365 1643 : theta = theta + 0.25_dp*pi
1366 : END IF
1367 : END IF
1368 2326264 : END SUBROUTINE get_angle
1369 : ! **************************************************************************************************
1370 : !> \brief ...
1371 : !> \param zij ...
1372 : !> \param weights ...
1373 : !> \param tolerance ...
1374 : !> \param grad ...
1375 : ! **************************************************************************************************
1376 104 : SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1377 : TYPE(cp_cfm_type) :: zij(:)
1378 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1379 : REAL(KIND=dp), INTENT(OUT) :: tolerance
1380 : TYPE(cp_fm_type), INTENT(OUT), OPTIONAL :: grad
1381 :
1382 : CHARACTER(len=*), PARAMETER :: routineN = 'check_tolerance'
1383 :
1384 : INTEGER :: handle
1385 : TYPE(cp_fm_type) :: force
1386 :
1387 104 : CALL timeset(routineN, handle)
1388 :
1389 : ! compute gradient at t=0
1390 :
1391 104 : CALL cp_fm_create(force, zij(1)%matrix_struct)
1392 104 : CALL cp_fm_set_all(force, 0._dp)
1393 104 : CALL grad_at_0(zij, weights, force)
1394 104 : CALL cp_fm_maxabsval(force, tolerance)
1395 104 : IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
1396 104 : CALL cp_fm_release(force)
1397 :
1398 104 : CALL timestop(handle)
1399 :
1400 104 : END SUBROUTINE check_tolerance
1401 :
1402 : ! **************************************************************************************************
1403 : !> \brief ...
1404 : !> \param rmat ...
1405 : !> \param vectors ...
1406 : ! **************************************************************************************************
1407 1128 : SUBROUTINE rotate_orbitals(rmat, vectors)
1408 : TYPE(cp_fm_type), INTENT(IN) :: rmat, vectors
1409 :
1410 : INTEGER :: k, n
1411 : TYPE(cp_fm_type) :: wf
1412 :
1413 564 : CALL cp_fm_create(wf, vectors%matrix_struct)
1414 564 : CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
1415 564 : CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1416 564 : CALL cp_fm_to_fm(wf, vectors)
1417 564 : CALL cp_fm_release(wf)
1418 564 : END SUBROUTINE rotate_orbitals
1419 :
1420 : ! **************************************************************************************************
1421 : !> \brief ...
1422 : !> \param rmat ...
1423 : !> \param vectors ...
1424 : ! **************************************************************************************************
1425 24 : SUBROUTINE rotate_orbitals_cfm(rmat, vectors)
1426 : TYPE(cp_cfm_type), INTENT(IN) :: rmat, vectors
1427 :
1428 : INTEGER :: k, n
1429 : TYPE(cp_cfm_type) :: wf
1430 :
1431 12 : CALL cp_cfm_create(wf, vectors%matrix_struct)
1432 12 : CALL cp_cfm_get_info(vectors, nrow_global=n, ncol_global=k)
1433 12 : CALL parallel_gemm("N", "N", n, k, k, z_one, vectors, rmat, z_zero, wf)
1434 12 : CALL cp_cfm_to_cfm(wf, vectors)
1435 12 : CALL cp_cfm_release(wf)
1436 12 : END SUBROUTINE rotate_orbitals_cfm
1437 :
1438 : ! **************************************************************************************************
1439 : !> \brief ...
1440 : !> \param rmat ...
1441 : !> \param vec_all ...
1442 : !> \param vectors ...
1443 : ! **************************************************************************************************
1444 6 : SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1445 : TYPE(cp_fm_type), INTENT(IN) :: rmat, vec_all, vectors
1446 :
1447 : INTEGER :: k, l, n
1448 : TYPE(cp_fm_type) :: wf
1449 :
1450 2 : CALL cp_fm_create(wf, vectors%matrix_struct)
1451 2 : CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
1452 2 : CALL cp_fm_get_info(rmat, ncol_global=l)
1453 :
1454 2 : CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1455 2 : CALL cp_fm_to_fm(wf, vectors)
1456 2 : CALL cp_fm_release(wf)
1457 2 : END SUBROUTINE rotate_orbitals_edf
1458 : ! **************************************************************************************************
1459 : !> \brief ...
1460 : !> \param diag ...
1461 : !> \param weights ...
1462 : !> \param matrix ...
1463 : !> \param ndim ...
1464 : ! **************************************************************************************************
1465 1670 : SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1466 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1467 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1468 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1469 : INTEGER, INTENT(IN) :: ndim
1470 :
1471 : COMPLEX(KIND=dp) :: zii, zjj
1472 : INTEGER :: idim, istate, jstate, ncol_local, &
1473 : nrow_global, nrow_local
1474 1670 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1475 : REAL(KIND=dp) :: gradsq_ij
1476 :
1477 : CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1478 : ncol_local=ncol_local, nrow_global=nrow_global, &
1479 1670 : row_indices=row_indices, col_indices=col_indices)
1480 :
1481 5010 : DO istate = 1, nrow_local
1482 18370 : DO jstate = 1, ncol_local
1483 : ! get real and imaginary parts
1484 13360 : gradsq_ij = 0.0_dp
1485 93520 : DO idim = 1, ndim
1486 80160 : zii = diag(row_indices(istate), idim)
1487 80160 : zjj = diag(col_indices(jstate), idim)
1488 : gradsq_ij = gradsq_ij + weights(idim)* &
1489 93520 : 4.0_dp*REAL((CONJG(zii)*zii + CONJG(zjj)*zjj), KIND=dp)
1490 : END DO
1491 16700 : matrix%local_data(istate, jstate) = gradsq_ij
1492 : END DO
1493 : END DO
1494 1670 : END SUBROUTINE gradsq_at_0
1495 : ! **************************************************************************************************
1496 : !> \brief ...
1497 : !> \param matrix_p ...
1498 : !> \param weights ...
1499 : !> \param matrix ...
1500 : ! **************************************************************************************************
1501 104 : SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1502 : TYPE(cp_cfm_type) :: matrix_p(:)
1503 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1504 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1505 :
1506 : COMPLEX(KIND=dp) :: zii, zij, zjj
1507 104 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1508 : INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1509 : nrow_global, nrow_local
1510 104 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1511 : REAL(KIND=dp) :: grad_ij
1512 :
1513 104 : NULLIFY (diag)
1514 : CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1515 : ncol_local=ncol_local, nrow_global=nrow_global, &
1516 104 : row_indices=row_indices, col_indices=col_indices)
1517 104 : dim_m = SIZE(matrix_p, 1)
1518 416 : ALLOCATE (diag(nrow_global, dim_m))
1519 :
1520 728 : DO idim = 1, dim_m
1521 23192 : DO istate = 1, nrow_global
1522 23088 : CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
1523 : END DO
1524 : END DO
1525 :
1526 1976 : DO istate = 1, nrow_local
1527 69368 : DO jstate = 1, ncol_local
1528 : ! get real and imaginary parts
1529 : grad_ij = 0.0_dp
1530 471744 : DO idim = 1, dim_m
1531 404352 : zii = diag(row_indices(istate), idim)
1532 404352 : zjj = diag(col_indices(jstate), idim)
1533 404352 : zij = matrix_p(idim)%local_data(istate, jstate)
1534 : grad_ij = grad_ij + weights(idim)* &
1535 471744 : REAL(4.0_dp*CONJG(zij)*(zjj - zii), dp)
1536 : END DO
1537 69264 : matrix%local_data(istate, jstate) = grad_ij
1538 : END DO
1539 : END DO
1540 104 : DEALLOCATE (diag)
1541 104 : END SUBROUTINE grad_at_0
1542 :
1543 : ! return energy and maximum gradient in the current point
1544 : ! **************************************************************************************************
1545 : !> \brief ...
1546 : !> \param weights ...
1547 : !> \param zij ...
1548 : !> \param tolerance ...
1549 : !> \param value ...
1550 : ! **************************************************************************************************
1551 4944 : SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1552 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1553 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :)
1554 : REAL(KIND=dp) :: tolerance, value
1555 :
1556 : COMPLEX(KIND=dp) :: kii, kij, kjj
1557 4944 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1558 : INTEGER :: idim, istate, jstate, ncol_local, &
1559 : nrow_global, nrow_local
1560 4944 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1561 : REAL(KIND=dp) :: grad_ij, ra, rb
1562 :
1563 4944 : NULLIFY (diag)
1564 : CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
1565 : ncol_local=ncol_local, nrow_global=nrow_global, &
1566 4944 : row_indices=row_indices, col_indices=col_indices)
1567 19776 : ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
1568 4944 : value = 0.0_dp
1569 19890 : DO idim = 1, SIZE(zij, 2)
1570 201120 : DO istate = 1, nrow_global
1571 181230 : CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
1572 181230 : CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
1573 181230 : diag(istate, idim) = CMPLX(ra, rb, dp)
1574 196176 : value = value + weights(idim) - weights(idim)*ABS(diag(istate, idim))**2
1575 : END DO
1576 : END DO
1577 4944 : tolerance = 0.0_dp
1578 35054 : DO istate = 1, nrow_local
1579 499774 : DO jstate = 1, ncol_local
1580 1860305 : grad_ij = 0.0_dp
1581 1860305 : DO idim = 1, SIZE(zij, 2)
1582 1395585 : kii = diag(row_indices(istate), idim)
1583 1395585 : kjj = diag(col_indices(jstate), idim)
1584 1395585 : ra = zij(1, idim)%local_data(istate, jstate)
1585 1395585 : rb = zij(2, idim)%local_data(istate, jstate)
1586 1395585 : kij = CMPLX(ra, rb, dp)
1587 : grad_ij = grad_ij + weights(idim)* &
1588 1860305 : REAL(4.0_dp*CONJG(kij)*(kjj - kii), dp)
1589 : END DO
1590 494830 : tolerance = MAX(ABS(grad_ij), tolerance)
1591 : END DO
1592 : END DO
1593 4944 : CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1594 :
1595 4944 : DEALLOCATE (diag)
1596 :
1597 4944 : END SUBROUTINE check_tolerance_new
1598 :
1599 : ! **************************************************************************************************
1600 : !> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
1601 : !> and rotates them all at the same time (hoping for the best of course)
1602 : !> \param weights ...
1603 : !> \param zij ...
1604 : !> \param vectors ...
1605 : !> \param max_iter ...
1606 : !> \param max_crazy_angle ...
1607 : !> \param crazy_scale ...
1608 : !> \param crazy_use_diag ...
1609 : !> \param eps_localization ...
1610 : !> \param iterations ...
1611 : !> \param converged ...
1612 : ! **************************************************************************************************
1613 136 : SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1614 : eps_localization, iterations, converged)
1615 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1616 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1617 : INTEGER, INTENT(IN) :: max_iter
1618 : REAL(KIND=dp), INTENT(IN) :: max_crazy_angle
1619 : REAL(KIND=dp) :: crazy_scale
1620 : LOGICAL :: crazy_use_diag
1621 : REAL(KIND=dp), INTENT(IN) :: eps_localization
1622 : INTEGER :: iterations
1623 : LOGICAL, INTENT(out), OPTIONAL :: converged
1624 :
1625 : CHARACTER(len=*), PARAMETER :: routineN = 'crazy_rotations'
1626 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1627 : czero = (0.0_dp, 0.0_dp)
1628 :
1629 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1630 136 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1631 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
1632 : INTEGER :: dim2, handle, i, icol, idim, irow, &
1633 : method, ncol_global, ncol_local, &
1634 : norder, nrow_global, nrow_local, &
1635 : nsquare, unit_nr
1636 136 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1637 : LOGICAL :: do_emd
1638 : REAL(KIND=dp) :: eps_exp, limit_crazy_angle, maxeval, &
1639 : norm, ra, rb, theta, tolerance, value
1640 : REAL(KIND=dp), DIMENSION(:), POINTER :: evals
1641 : TYPE(cp_cfm_type) :: cmat_A, cmat_R, cmat_t1
1642 : TYPE(cp_fm_type) :: mat_R, mat_t, mat_theta, mat_U
1643 :
1644 136 : CALL timeset(routineN, handle)
1645 136 : NULLIFY (row_indices, col_indices)
1646 : CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
1647 : ncol_global=ncol_global, &
1648 : row_indices=row_indices, col_indices=col_indices, &
1649 136 : nrow_local=nrow_local, ncol_local=ncol_local)
1650 :
1651 136 : limit_crazy_angle = max_crazy_angle
1652 :
1653 136 : NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1654 136 : dim2 = SIZE(zij, 2)
1655 544 : ALLOCATE (diag_z(nrow_global, dim2))
1656 408 : ALLOCATE (evals(nrow_global))
1657 408 : ALLOCATE (evals_exp(nrow_global))
1658 :
1659 136 : CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
1660 136 : CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
1661 136 : CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1662 :
1663 136 : CALL cp_fm_create(mat_U, zij(1, 1)%matrix_struct)
1664 136 : CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
1665 136 : CALL cp_fm_create(mat_R, zij(1, 1)%matrix_struct)
1666 :
1667 136 : CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
1668 :
1669 136 : CALL cp_fm_set_all(mat_R, 0.0_dp, 1.0_dp)
1670 136 : CALL cp_fm_set_all(mat_t, 0.0_dp)
1671 680 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1672 550 : DO idim = 1, dim2
1673 414 : CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
1674 550 : CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
1675 : END DO
1676 136 : CALL cp_fm_syevd(mat_t, mat_U, evals)
1677 550 : DO idim = 1, dim2
1678 : ! rotate z's
1679 414 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
1680 414 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
1681 414 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
1682 550 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
1683 : END DO
1684 : ! collect rotation matrix
1685 136 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
1686 136 : CALL cp_fm_to_fm(mat_t, mat_R)
1687 :
1688 136 : unit_nr = -1
1689 136 : IF (cmat_A%matrix_struct%para_env%is_source()) THEN
1690 68 : unit_nr = cp_logger_get_default_unit_nr()
1691 : WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
1692 68 : "CRAZY| ", "Iter", "value ", "gradient", "Max. eval", "limit"
1693 : END IF
1694 :
1695 136 : iterations = 0
1696 136 : tolerance = 1.0_dp
1697 :
1698 : DO
1699 4944 : iterations = iterations + 1
1700 19890 : DO idim = 1, dim2
1701 201120 : DO i = 1, nrow_global
1702 181230 : CALL cp_fm_get_element(zij(1, idim), i, i, ra)
1703 181230 : CALL cp_fm_get_element(zij(2, idim), i, i, rb)
1704 196176 : diag_z(i, idim) = CMPLX(ra, rb, dp)
1705 : END DO
1706 : END DO
1707 35054 : DO irow = 1, nrow_local
1708 499774 : DO icol = 1, ncol_local
1709 1860305 : DO idim = 1, dim2
1710 1395585 : ra = zij(1, idim)%local_data(irow, icol)
1711 1395585 : rb = zij(2, idim)%local_data(irow, icol)
1712 1395585 : mij(idim) = CMPLX(ra, rb, dp)
1713 1395585 : mii(idim) = diag_z(row_indices(irow), idim)
1714 1860305 : mjj(idim) = diag_z(col_indices(icol), idim)
1715 : END DO
1716 494830 : IF (row_indices(irow) /= col_indices(icol)) THEN
1717 434610 : CALL get_angle(mii, mjj, mij, weights, theta)
1718 434610 : theta = crazy_scale*theta
1719 434610 : IF (theta > limit_crazy_angle) theta = limit_crazy_angle
1720 434610 : IF (theta < -limit_crazy_angle) theta = -limit_crazy_angle
1721 434610 : IF (crazy_use_diag) THEN
1722 0 : cmat_A%local_data(irow, icol) = -CMPLX(0.0_dp, theta, dp)
1723 : ELSE
1724 434610 : mat_theta%local_data(irow, icol) = -theta
1725 : END IF
1726 : ELSE
1727 30110 : IF (crazy_use_diag) THEN
1728 0 : cmat_A%local_data(irow, icol) = czero
1729 : ELSE
1730 30110 : mat_theta%local_data(irow, icol) = 0.0_dp
1731 : END IF
1732 : END IF
1733 : END DO
1734 : END DO
1735 :
1736 : ! construct rotation matrix U based on A using diagonalization
1737 : ! alternatively, exp based on repeated squaring could be faster
1738 4944 : IF (crazy_use_diag) THEN
1739 0 : CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
1740 0 : maxeval = MAXVAL(ABS(evals))
1741 0 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1742 0 : CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
1743 0 : CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1744 : CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
1745 0 : cmat_t1, cmat_R, czero, cmat_A)
1746 0 : mat_U%local_data = REAL(cmat_A%local_data, KIND=dp) ! U is a real matrix
1747 : ELSE
1748 4944 : do_emd = .FALSE.
1749 4944 : method = 2
1750 4944 : eps_exp = 1.0_dp*EPSILON(eps_exp)
1751 4944 : CALL cp_fm_maxabsrownorm(mat_theta, norm)
1752 4944 : maxeval = norm ! an upper bound
1753 4944 : CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
1754 4944 : CALL exp_pade_real(mat_U, mat_theta, nsquare, norder)
1755 : END IF
1756 :
1757 19890 : DO idim = 1, dim2
1758 : ! rotate z's
1759 14946 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
1760 14946 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
1761 14946 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
1762 19890 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
1763 : END DO
1764 : ! collect rotation matrix
1765 4944 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
1766 4944 : CALL cp_fm_to_fm(mat_t, mat_R)
1767 :
1768 4944 : CALL check_tolerance_new(weights, zij, tolerance, value)
1769 :
1770 4944 : IF (unit_nr > 0) THEN
1771 : WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1772 2472 : "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
1773 2472 : CALL m_flush(unit_nr)
1774 : END IF
1775 4944 : IF (tolerance < eps_localization .OR. iterations >= max_iter) EXIT
1776 : END DO
1777 :
1778 136 : IF (PRESENT(converged)) converged = (tolerance < eps_localization)
1779 :
1780 136 : CALL cp_cfm_release(cmat_A)
1781 136 : CALL cp_cfm_release(cmat_R)
1782 136 : CALL cp_cfm_release(cmat_T1)
1783 :
1784 136 : CALL cp_fm_release(mat_U)
1785 136 : CALL cp_fm_release(mat_T)
1786 136 : CALL cp_fm_release(mat_theta)
1787 :
1788 136 : CALL rotate_orbitals(mat_R, vectors)
1789 :
1790 136 : CALL cp_fm_release(mat_R)
1791 136 : DEALLOCATE (evals_exp, evals, diag_z)
1792 136 : DEALLOCATE (mii, mij, mjj)
1793 :
1794 136 : CALL timestop(handle)
1795 :
1796 272 : END SUBROUTINE crazy_rotations
1797 :
1798 : ! **************************************************************************************************
1799 : !> \brief use the exponential parametrization as described in to perform a direct mini
1800 : !> Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
1801 : !> none of the input is modified for the time being, just finds the rotations
1802 : !> that minimizes, and throws it away afterwards :-)
1803 : !> apart from being expensive and not cleaned, this works fine
1804 : !> useful to try different spread functionals
1805 : !> \param weights ...
1806 : !> \param zij ...
1807 : !> \param vectors ...
1808 : !> \param max_iter ...
1809 : !> \param eps_localization ...
1810 : !> \param iterations ...
1811 : ! **************************************************************************************************
1812 2 : SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1813 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1814 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1815 : INTEGER, INTENT(IN) :: max_iter
1816 : REAL(KIND=dp), INTENT(IN) :: eps_localization
1817 : INTEGER :: iterations
1818 :
1819 : CHARACTER(len=*), PARAMETER :: routineN = 'direct_mini'
1820 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1821 : czero = (0.0_dp, 0.0_dp)
1822 : REAL(KIND=dp), PARAMETER :: gold_sec = 0.3819_dp
1823 :
1824 : COMPLEX(KIND=dp) :: lk, ll, tmp
1825 2 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1826 2 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1827 : INTEGER :: handle, i, icol, idim, irow, &
1828 : line_search_count, line_searches, lsl, &
1829 : lsm, lsr, n, ncol_local, ndim, &
1830 : nrow_local, output_unit
1831 2 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1832 : LOGICAL :: new_direction
1833 : REAL(KIND=dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1834 : fb, fc, nom, normg, normg_cross, &
1835 : normg_old, npos, omega, tol, val, x0, &
1836 : x1, xa, xb, xc
1837 : REAL(KIND=dp), DIMENSION(150) :: energy, grad, pos
1838 2 : REAL(KIND=dp), DIMENSION(:), POINTER :: evals, fval, fvald
1839 : TYPE(cp_cfm_type) :: cmat_A, cmat_B, cmat_M, cmat_R, cmat_t1, &
1840 : cmat_t2, cmat_U
1841 2 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
1842 : TYPE(cp_fm_type) :: matrix_A, matrix_G, matrix_G_old, &
1843 : matrix_G_search, matrix_H, matrix_R, &
1844 : matrix_T
1845 :
1846 2 : NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1847 :
1848 2 : CALL timeset(routineN, handle)
1849 2 : output_unit = cp_logger_get_default_io_unit()
1850 :
1851 2 : n = zij(1, 1)%matrix_struct%nrow_global
1852 2 : ndim = (SIZE(zij, 2))
1853 :
1854 2 : IF (output_unit > 0) THEN
1855 1 : WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
1856 1 : WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
1857 : END IF
1858 :
1859 20 : ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1860 18 : ALLOCATE (c_zij(ndim))
1861 :
1862 : ! create the three complex matrices Z
1863 14 : DO idim = 1, ndim
1864 12 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
1865 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
1866 158 : zij(2, idim)%local_data, dp)
1867 : END DO
1868 :
1869 2 : CALL cp_fm_create(matrix_A, zij(1, 1)%matrix_struct)
1870 2 : CALL cp_fm_create(matrix_G, zij(1, 1)%matrix_struct)
1871 2 : CALL cp_fm_create(matrix_T, zij(1, 1)%matrix_struct)
1872 2 : CALL cp_fm_create(matrix_H, zij(1, 1)%matrix_struct)
1873 2 : CALL cp_fm_create(matrix_G_search, zij(1, 1)%matrix_struct)
1874 2 : CALL cp_fm_create(matrix_G_old, zij(1, 1)%matrix_struct)
1875 2 : CALL cp_fm_create(matrix_R, zij(1, 1)%matrix_struct)
1876 2 : CALL cp_fm_set_all(matrix_R, 0.0_dp, 1.0_dp)
1877 :
1878 2 : CALL cp_fm_set_all(matrix_A, 0.0_dp)
1879 : ! CALL cp_fm_init_random ( matrix_A )
1880 :
1881 2 : CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
1882 2 : CALL cp_cfm_create(cmat_U, zij(1, 1)%matrix_struct)
1883 2 : CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
1884 2 : CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1885 2 : CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
1886 2 : CALL cp_cfm_create(cmat_B, zij(1, 1)%matrix_struct)
1887 2 : CALL cp_cfm_create(cmat_M, zij(1, 1)%matrix_struct)
1888 :
1889 : CALL cp_cfm_get_info(cmat_B, nrow_local=nrow_local, ncol_local=ncol_local, &
1890 2 : row_indices=row_indices, col_indices=col_indices)
1891 :
1892 2 : CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
1893 2 : CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
1894 2 : normg_old = 1.0E30_dp
1895 2 : ds_min = 1.0_dp
1896 2 : new_direction = .TRUE.
1897 2 : Iterations = 0
1898 2 : line_searches = 0
1899 2 : line_search_count = 0
1900 1670 : DO
1901 1670 : iterations = iterations + 1
1902 : ! compute U,R,evals given A
1903 21710 : cmat_A%local_data = CMPLX(0.0_dp, matrix_A%local_data, dp) ! cmat_A is hermitian, evals are reals
1904 1670 : CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
1905 15030 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1906 1670 : CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
1907 1670 : CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1908 1670 : CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_U)
1909 21710 : cmat_U%local_data = REAL(cmat_U%local_data, KIND=dp) ! enforce numerics, U is a real matrix
1910 :
1911 1670 : IF (new_direction .AND. MOD(line_searches, 20) == 5) THEN ! reset with A .eq. 0
1912 28 : DO idim = 1, ndim
1913 24 : CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1)
1914 28 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, c_zij(idim))
1915 : END DO
1916 : ! collect rotation matrix
1917 52 : matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
1918 4 : CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
1919 4 : CALL cp_fm_to_fm(matrix_T, matrix_R)
1920 :
1921 4 : CALL cp_cfm_set_all(cmat_U, czero, cone)
1922 4 : CALL cp_cfm_set_all(cmat_R, czero, cone)
1923 4 : CALL cp_cfm_set_all(cmat_A, czero)
1924 4 : CALL cp_fm_set_all(matrix_A, 0.0_dp)
1925 20 : evals(:) = 0.0_dp
1926 36 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1927 4 : CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
1928 4 : CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
1929 4 : normg_old = 1.0E30_dp
1930 : END IF
1931 :
1932 : ! compute Omega and M
1933 1670 : CALL cp_cfm_set_all(cmat_M, czero)
1934 1670 : omega = 0.0_dp
1935 11690 : DO idim = 1, ndim
1936 10020 : CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1) ! t1=ZU
1937 10020 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
1938 50100 : DO i = 1, n
1939 40080 : CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
1940 : SELECT CASE (2) ! allows for selection of different spread functionals
1941 : CASE (1)
1942 : fval(i) = -weights(idim)*LOG(ABS(diag_z(i, idim))**2)
1943 : fvald(i) = -weights(idim)/(ABS(diag_z(i, idim))**2)
1944 : CASE (2) ! corresponds to the jacobi setup
1945 40080 : fval(i) = weights(idim) - weights(idim)*ABS(diag_z(i, idim))**2
1946 40080 : fvald(i) = -weights(idim)
1947 : END SELECT
1948 50100 : omega = omega + fval(i)
1949 : END DO
1950 51770 : DO icol = 1, ncol_local
1951 130260 : DO irow = 1, nrow_local
1952 80160 : tmp = cmat_t1%local_data(irow, icol)*CONJG(diag_z(col_indices(icol), idim))
1953 : cmat_M%local_data(irow, icol) = cmat_M%local_data(irow, icol) &
1954 120240 : + 4.0_dp*fvald(col_indices(icol))*REAL(tmp, KIND=dp)
1955 : END DO
1956 : END DO
1957 : END DO
1958 :
1959 : ! compute Hessian diagonal approximation for the preconditioner
1960 : IF (.TRUE.) THEN
1961 1670 : CALL gradsq_at_0(diag_z, weights, matrix_H, ndim)
1962 : ELSE
1963 : CALL cp_fm_set_all(matrix_H, 1.0_dp)
1964 : END IF
1965 :
1966 : ! compute B
1967 8350 : DO icol = 1, ncol_local
1968 21710 : DO irow = 1, nrow_local
1969 13360 : ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1970 13360 : lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1971 20040 : IF (ABS(ll - lk) < 0.5_dp) THEN ! use a series expansion to avoid loss of precision
1972 5378 : tmp = 1.0_dp
1973 5378 : cmat_B%local_data(irow, icol) = 0.0_dp
1974 91426 : DO i = 1, 16
1975 86048 : cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol) + tmp
1976 91426 : tmp = tmp*(ll - lk)/(i + 1)
1977 : END DO
1978 5378 : cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol)*EXP(lk)
1979 : ELSE
1980 7982 : cmat_B%local_data(irow, icol) = (EXP(lk) - EXP(ll))/(lk - ll)
1981 : END IF
1982 : END DO
1983 : END DO
1984 : ! compute gradient matrix_G
1985 :
1986 1670 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_M, cmat_R, czero, cmat_t1) ! t1=(M^T)(R^T)
1987 1670 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_R, cmat_t1, czero, cmat_t2) ! t2=(R)t1
1988 1670 : CALL cp_cfm_schur_product(cmat_t2, cmat_B, cmat_t1)
1989 1670 : CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_t2)
1990 1670 : CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_R, cmat_t2, czero, cmat_t1)
1991 21710 : matrix_G%local_data = REAL(cmat_t1%local_data, KIND=dp)
1992 1670 : CALL cp_fm_transpose(matrix_G, matrix_T)
1993 1670 : CALL cp_fm_scale_and_add(-1.0_dp, matrix_G, 1.0_dp, matrix_T)
1994 1670 : CALL cp_fm_maxabsval(matrix_G, tol)
1995 :
1996 : ! from here on, minimizing technology
1997 1670 : IF (new_direction) THEN
1998 : ! energy converged up to machine precision ?
1999 66 : line_searches = line_searches + 1
2000 : ! DO i=1,line_search_count
2001 : ! write(15,*) pos(i),energy(i)
2002 : ! ENDDO
2003 : ! write(15,*) ""
2004 : ! CALL m_flush(15)
2005 : !write(16,*) evals(:)
2006 : !write(17,*) matrix_A%local_data(:,:)
2007 : !write(18,*) matrix_G%local_data(:,:)
2008 66 : IF (output_unit > 0) THEN
2009 33 : WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, Iterations, Omega, tol, ds_min
2010 33 : CALL m_flush(output_unit)
2011 : END IF
2012 66 : IF (tol < eps_localization .OR. iterations > max_iter) EXIT
2013 :
2014 : IF (.TRUE.) THEN ! do conjugate gradient CG
2015 64 : CALL cp_fm_trace(matrix_G, matrix_G_old, normg_cross)
2016 64 : normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
2017 : ! apply the preconditioner
2018 320 : DO icol = 1, ncol_local
2019 832 : DO irow = 1, nrow_local
2020 768 : matrix_G_old%local_data(irow, icol) = matrix_G%local_data(irow, icol)/matrix_H%local_data(irow, icol)
2021 : END DO
2022 : END DO
2023 64 : CALL cp_fm_trace(matrix_G, matrix_G_old, normg)
2024 64 : normg = normg*0.5_dp
2025 64 : beta_pr = (normg - normg_cross)/normg_old
2026 64 : normg_old = normg
2027 64 : beta_pr = MAX(beta_pr, 0.0_dp)
2028 64 : CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
2029 64 : CALL cp_fm_trace(matrix_G_search, matrix_G_old, normg_cross)
2030 64 : IF (normg_cross >= 0) THEN ! back to SD
2031 6 : IF (matrix_A%matrix_struct%para_env%is_source()) THEN
2032 3 : WRITE (cp_logger_get_default_unit_nr(), *) "!"
2033 : END IF
2034 6 : beta_pr = 0.0_dp
2035 6 : CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
2036 : END IF
2037 : ELSE ! SD
2038 : CALL cp_fm_scale_and_add(0.0_dp, matrix_G_search, -1.0_dp, matrix_G)
2039 : END IF
2040 : ! ds_min=1.0E-4_dp
2041 : line_search_count = 0
2042 : END IF
2043 1668 : line_search_count = line_search_count + 1
2044 1668 : energy(line_search_count) = Omega
2045 :
2046 : ! line search section
2047 : SELECT CASE (3)
2048 : CASE (1) ! two point line search
2049 : SELECT CASE (line_search_count)
2050 : CASE (1)
2051 : pos(1) = 0.0_dp
2052 : pos(2) = ds_min
2053 : CALL cp_fm_trace(matrix_G, matrix_G_search, grad(1))
2054 : grad(1) = grad(1)/2.0_dp
2055 : new_direction = .FALSE.
2056 : CASE (2)
2057 : new_direction = .TRUE.
2058 : x0 = pos(1) ! 0.0_dp
2059 : c = energy(1)
2060 : b = grad(1)
2061 : x1 = pos(2)
2062 : a = (energy(2) - b*x1 - c)/(x1**2)
2063 : IF (a <= 0.0_dp) a = 1.0E-15_dp
2064 : npos = -b/(2.0_dp*a)
2065 : val = a*npos**2 + b*npos + c
2066 : IF (val < energy(1) .AND. val <= energy(2)) THEN
2067 : ! we go to a minimum, but ...
2068 : ! we take a guard against too large steps
2069 : pos(3) = MIN(npos, MAXVAL(pos(1:2))*4.0_dp)
2070 : ELSE ! just take an extended step
2071 : pos(3) = MAXVAL(pos(1:2))*2.0_dp
2072 : END IF
2073 : END SELECT
2074 : CASE (2) ! 3 point line search
2075 : SELECT CASE (line_search_count)
2076 : CASE (1)
2077 : new_direction = .FALSE.
2078 : pos(1) = 0.0_dp
2079 : pos(2) = ds_min*0.8_dp
2080 : CASE (2)
2081 : new_direction = .FALSE.
2082 : IF (energy(2) > energy(1)) THEN
2083 : pos(3) = ds_min*0.7_dp
2084 : ELSE
2085 : pos(3) = ds_min*1.4_dp
2086 : END IF
2087 : CASE (3)
2088 : new_direction = .TRUE.
2089 : xa = pos(1)
2090 : xb = pos(2)
2091 : xc = pos(3)
2092 : fa = energy(1)
2093 : fb = energy(2)
2094 : fc = energy(3)
2095 : nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
2096 : denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
2097 : IF (ABS(denom) <= 1.0E-18_dp*MAX(ABS(fb - fc), ABS(fb - fa))) THEN
2098 : npos = xb
2099 : ELSE
2100 : npos = xb - 0.5_dp*nom/denom ! position of the stationary point
2101 : END IF
2102 : val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
2103 : (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
2104 : (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
2105 : IF (val < fa .AND. val <= fb .AND. val <= fc) THEN ! OK, we go to a minimum
2106 : ! we take a guard against too large steps
2107 : pos(4) = MAX(MAXVAL(pos(1:3))*0.01_dp, &
2108 : MIN(npos, MAXVAL(pos(1:3))*4.0_dp))
2109 : ELSE ! just take an extended step
2110 : pos(4) = MAXVAL(pos(1:3))*2.0_dp
2111 : END IF
2112 : END SELECT
2113 : CASE (3) ! golden section hunt
2114 1668 : new_direction = .FALSE.
2115 1668 : IF (line_search_count == 1) THEN
2116 64 : lsl = 1
2117 64 : lsr = 0
2118 64 : lsm = 1
2119 64 : pos(1) = 0.0_dp
2120 64 : pos(2) = ds_min/gold_sec
2121 : ELSE
2122 1604 : IF (line_search_count == 150) CPABORT("Too many")
2123 1604 : IF (lsr == 0) THEN
2124 148 : IF (energy(line_search_count - 1) < energy(line_search_count)) THEN
2125 64 : lsr = line_search_count
2126 64 : pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2127 : ELSE
2128 84 : lsl = lsm
2129 84 : lsm = line_search_count
2130 84 : pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2131 : END IF
2132 : ELSE
2133 1456 : IF (pos(line_search_count) < pos(lsm)) THEN
2134 690 : IF (energy(line_search_count) < energy(lsm)) THEN
2135 : lsr = lsm
2136 : lsm = line_search_count
2137 : ELSE
2138 558 : lsl = line_search_count
2139 : END IF
2140 : ELSE
2141 766 : IF (energy(line_search_count) < energy(lsm)) THEN
2142 : lsl = lsm
2143 : lsm = line_search_count
2144 : ELSE
2145 518 : lsr = line_search_count
2146 : END IF
2147 : END IF
2148 1456 : IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
2149 722 : pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2150 : ELSE
2151 734 : pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2152 : END IF
2153 1456 : IF ((pos(lsr) - pos(lsl)) < 1.0E-3_dp*pos(lsr)) THEN
2154 64 : new_direction = .TRUE.
2155 : END IF
2156 : END IF ! lsr .eq. 0
2157 : END IF ! first step
2158 : END SELECT
2159 : ! now go to the suggested point
2160 1668 : ds_min = pos(line_search_count + 1)
2161 1668 : ds = pos(line_search_count + 1) - pos(line_search_count)
2162 1670 : CALL cp_fm_scale_and_add(1.0_dp, matrix_A, ds, matrix_G_search)
2163 : END DO
2164 :
2165 : ! collect rotation matrix
2166 26 : matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
2167 2 : CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
2168 2 : CALL cp_fm_to_fm(matrix_T, matrix_R)
2169 2 : CALL rotate_orbitals(matrix_R, vectors)
2170 2 : CALL cp_fm_release(matrix_R)
2171 :
2172 2 : CALL cp_fm_release(matrix_A)
2173 2 : CALL cp_fm_release(matrix_G)
2174 2 : CALL cp_fm_release(matrix_H)
2175 2 : CALL cp_fm_release(matrix_T)
2176 2 : CALL cp_fm_release(matrix_G_search)
2177 2 : CALL cp_fm_release(matrix_G_old)
2178 2 : CALL cp_cfm_release(cmat_A)
2179 2 : CALL cp_cfm_release(cmat_U)
2180 2 : CALL cp_cfm_release(cmat_R)
2181 2 : CALL cp_cfm_release(cmat_t1)
2182 2 : CALL cp_cfm_release(cmat_t2)
2183 2 : CALL cp_cfm_release(cmat_B)
2184 2 : CALL cp_cfm_release(cmat_M)
2185 :
2186 2 : DEALLOCATE (evals, evals_exp, fval, fvald)
2187 :
2188 14 : DO idim = 1, SIZE(c_zij)
2189 156 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
2190 156 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
2191 14 : CALL cp_cfm_release(c_zij(idim))
2192 : END DO
2193 2 : DEALLOCATE (c_zij)
2194 2 : DEALLOCATE (diag_z)
2195 :
2196 2 : CALL timestop(handle)
2197 :
2198 6 : END SUBROUTINE direct_mini
2199 :
2200 : ! **************************************************************************************************
2201 : !> \brief Parallel algorithm for jacobi rotations
2202 : !> \param weights ...
2203 : !> \param zij ...
2204 : !> \param vectors ...
2205 : !> \param para_env ...
2206 : !> \param max_iter ...
2207 : !> \param eps_localization ...
2208 : !> \param sweeps ...
2209 : !> \param out_each ...
2210 : !> \param target_time ...
2211 : !> \param start_time ...
2212 : !> \param restricted ...
2213 : !> \par History
2214 : !> use allgather for improved performance
2215 : !> \author MI (11.2009)
2216 : ! **************************************************************************************************
2217 390 : SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2218 : sweeps, out_each, target_time, start_time, restricted)
2219 :
2220 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2221 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
2222 : TYPE(mp_para_env_type), POINTER :: para_env
2223 : INTEGER, INTENT(IN) :: max_iter
2224 : REAL(KIND=dp), INTENT(IN) :: eps_localization
2225 : INTEGER :: sweeps
2226 : INTEGER, INTENT(IN) :: out_each
2227 : REAL(dp) :: target_time, start_time
2228 : INTEGER :: restricted
2229 :
2230 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rot_para'
2231 :
2232 : INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2233 : nblock, nblock_max, ns_me, nstate, &
2234 : output_unit
2235 390 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2236 390 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2237 : REAL(KIND=dp) :: xstate
2238 : TYPE(cp_fm_type) :: rmat
2239 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2240 :
2241 390 : CALL timeset(routineN, handle)
2242 :
2243 390 : output_unit = cp_logger_get_default_io_unit()
2244 :
2245 : NULLIFY (cz_ij_loc)
2246 :
2247 390 : dim2 = SIZE(zij, 2)
2248 :
2249 390 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
2250 390 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
2251 :
2252 390 : CALL cp_fm_get_info(rmat, nrow_global=nstate)
2253 :
2254 390 : IF (restricted > 0) THEN
2255 0 : IF (output_unit > 0) THEN
2256 0 : WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
2257 : END IF
2258 0 : nstate = nstate - restricted
2259 : END IF
2260 :
2261 : ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2262 390 : xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
2263 1560 : ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2264 1170 : DO ip = 1, para_env%num_pe
2265 780 : ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
2266 1170 : ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
2267 : END DO
2268 390 : nblock_max = 0
2269 1170 : DO ip = 0, para_env%num_pe - 1
2270 780 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2271 1170 : nblock_max = MAX(nblock_max, nblock)
2272 : END DO
2273 :
2274 : ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2275 1560 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2276 1170 : ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2277 2364 : ALLOCATE (cz_ij_loc(dim2))
2278 1584 : DO idim = 1, dim2
2279 3972 : DO ip = 0, para_env%num_pe - 1
2280 2388 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2281 2388 : CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2282 2388 : CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2283 3582 : IF (para_env%mepos == ip) THEN
2284 4752 : ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2285 5034 : DO i = 1, nblock
2286 37830 : DO j = 1, nstate
2287 36636 : cz_ij_loc(idim)%c_array(j, i) = CMPLX(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
2288 : END DO
2289 : END DO
2290 : END IF
2291 : END DO ! ip
2292 : END DO
2293 390 : DEALLOCATE (z_ij_loc_re)
2294 390 : DEALLOCATE (z_ij_loc_im)
2295 :
2296 1560 : ALLOCATE (rotmat(nstate, 2*nblock_max))
2297 :
2298 : CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2299 : cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2300 390 : target_time=target_time, start_time=start_time)
2301 :
2302 390 : ilow1 = ns_bound(para_env%mepos, 1)
2303 390 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2304 1170 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2305 1170 : ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2306 1584 : DO idim = 1, dim2
2307 3972 : DO ip = 0, para_env%num_pe - 1
2308 2388 : z_ij_loc_re = 0.0_dp
2309 2388 : z_ij_loc_im = 0.0_dp
2310 2388 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2311 2388 : IF (ip == para_env%mepos) THEN
2312 5034 : ns_me = nblock
2313 5034 : DO i = 1, ns_me
2314 36636 : ii = ilow1 + i - 1
2315 37830 : DO j = 1, nstate
2316 32796 : z_ij_loc_re(j, i) = REAL(cz_ij_loc(idim)%c_array(j, i), dp)
2317 36636 : z_ij_loc_im(j, i) = AIMAG(cz_ij_loc(idim)%c_array(j, i))
2318 : END DO
2319 : END DO
2320 : END IF
2321 2388 : CALL para_env%bcast(z_ij_loc_re, ip)
2322 2388 : CALL para_env%bcast(z_ij_loc_im, ip)
2323 2388 : CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2324 3582 : CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2325 : END DO ! ip
2326 : END DO
2327 :
2328 1170 : DO ip = 0, para_env%num_pe - 1
2329 780 : z_ij_loc_re = 0.0_dp
2330 780 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2331 780 : IF (ip == para_env%mepos) THEN
2332 1633 : ns_me = nblock
2333 1633 : DO i = 1, ns_me
2334 11774 : ii = ilow1 + i - 1
2335 12164 : DO j = 1, nstate
2336 11774 : z_ij_loc_re(j, i) = rotmat(j, i)
2337 : END DO
2338 : END DO
2339 : END IF
2340 780 : CALL para_env%bcast(z_ij_loc_re, ip)
2341 1170 : CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2342 : END DO
2343 :
2344 390 : DEALLOCATE (z_ij_loc_re)
2345 390 : DEALLOCATE (z_ij_loc_im)
2346 1584 : DO idim = 1, dim2
2347 1584 : DEALLOCATE (cz_ij_loc(idim)%c_array)
2348 : END DO
2349 390 : DEALLOCATE (cz_ij_loc)
2350 :
2351 390 : CALL para_env%sync()
2352 390 : CALL rotate_orbitals(rmat, vectors)
2353 390 : CALL cp_fm_release(rmat)
2354 :
2355 390 : DEALLOCATE (rotmat)
2356 390 : DEALLOCATE (ns_bound)
2357 :
2358 390 : CALL timestop(handle)
2359 :
2360 1170 : END SUBROUTINE jacobi_rot_para
2361 :
2362 : ! **************************************************************************************************
2363 : !> \brief almost identical to 'jacobi_rot_para' but with different inout variables
2364 : !> \param weights ...
2365 : !> \param czij ...
2366 : !> \param para_env ...
2367 : !> \param max_iter ...
2368 : !> \param rmat ...
2369 : !> \param tol_out ...
2370 : !> \author Soumya Ghosh (08/21)
2371 : ! **************************************************************************************************
2372 32 : SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2373 :
2374 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2375 : TYPE(cp_cfm_type), INTENT(IN) :: czij(:)
2376 : TYPE(mp_para_env_type), POINTER :: para_env
2377 : INTEGER, INTENT(IN) :: max_iter
2378 : TYPE(cp_cfm_type), INTENT(IN) :: rmat
2379 : REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2380 :
2381 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rot_para_1'
2382 :
2383 32 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: czij_array
2384 : INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2385 : nblock, nblock_max, ns_me, nstate, &
2386 : sweeps
2387 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2388 32 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_re
2389 : REAL(KIND=dp) :: xstate
2390 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2391 :
2392 32 : CALL timeset(routineN, handle)
2393 :
2394 32 : dim2 = SIZE(czij)
2395 :
2396 32 : CALL cp_cfm_set_all(rmat, CMPLX(0._dp, 0._dp, dp), CMPLX(1._dp, 0._dp, dp))
2397 :
2398 32 : CALL cp_cfm_get_info(rmat, nrow_global=nstate)
2399 :
2400 : ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2401 32 : xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
2402 128 : ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2403 96 : DO ip = 1, para_env%num_pe
2404 64 : ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
2405 96 : ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
2406 : END DO
2407 32 : nblock_max = 0
2408 96 : DO ip = 0, para_env%num_pe - 1
2409 64 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2410 96 : nblock_max = MAX(nblock_max, nblock)
2411 : END DO
2412 :
2413 : ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2414 128 : ALLOCATE (czij_array(nstate, nblock_max))
2415 192 : ALLOCATE (cz_ij_loc(dim2))
2416 128 : DO idim = 1, dim2
2417 320 : DO ip = 0, para_env%num_pe - 1
2418 192 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2419 : ! cfm --> allocatable
2420 192 : CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
2421 288 : IF (para_env%mepos == ip) THEN
2422 96 : ns_me = nblock
2423 384 : ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2424 2160 : DO i = 1, ns_me
2425 90912 : DO j = 1, nstate
2426 90816 : cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2427 : END DO
2428 : END DO
2429 : END IF
2430 : END DO ! ip
2431 : END DO
2432 32 : DEALLOCATE (czij_array)
2433 :
2434 128 : ALLOCATE (rotmat(nstate, 2*nblock_max))
2435 :
2436 : CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2437 32 : cz_ij_loc, rotmat, 0, tol_out=tol_out)
2438 :
2439 32 : ilow1 = ns_bound(para_env%mepos, 1)
2440 32 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2441 128 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2442 :
2443 96 : DO ip = 0, para_env%num_pe - 1
2444 64 : z_ij_loc_re = 0.0_dp
2445 64 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2446 64 : IF (ip == para_env%mepos) THEN
2447 720 : ns_me = nblock
2448 720 : DO i = 1, ns_me
2449 30272 : ii = ilow1 + i - 1
2450 30304 : DO j = 1, nstate
2451 30272 : z_ij_loc_re(j, i) = rotmat(j, i)
2452 : END DO
2453 : END DO
2454 : END IF
2455 64 : CALL para_env%bcast(z_ij_loc_re, ip)
2456 62048 : CALL cp_cfm_set_submatrix(rmat, CMPLX(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
2457 : END DO
2458 :
2459 32 : DEALLOCATE (z_ij_loc_re)
2460 128 : DO idim = 1, dim2
2461 128 : DEALLOCATE (cz_ij_loc(idim)%c_array)
2462 : END DO
2463 32 : DEALLOCATE (cz_ij_loc)
2464 :
2465 32 : CALL para_env%sync()
2466 :
2467 32 : DEALLOCATE (rotmat)
2468 32 : DEALLOCATE (ns_bound)
2469 :
2470 32 : CALL timestop(handle)
2471 :
2472 64 : END SUBROUTINE jacobi_rot_para_1
2473 :
2474 : ! **************************************************************************************************
2475 : !> \brief Parallel algorithm for jacobi rotations
2476 : !> \param weights ...
2477 : !> \param para_env ...
2478 : !> \param max_iter ...
2479 : !> \param sweeps ...
2480 : !> \param out_each ...
2481 : !> \param dim2 ...
2482 : !> \param nstate ...
2483 : !> \param nblock_max ...
2484 : !> \param ns_bound ...
2485 : !> \param cz_ij_loc ...
2486 : !> \param rotmat ...
2487 : !> \param output_unit ...
2488 : !> \param tol_out ...
2489 : !> \param eps_localization ...
2490 : !> \param target_time ...
2491 : !> \param start_time ...
2492 : !> \par History
2493 : !> split out to reuse with different input types
2494 : !> \author HF (05.2022)
2495 : ! **************************************************************************************************
2496 1266 : SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2497 422 : ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2498 :
2499 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2500 : TYPE(mp_para_env_type), POINTER :: para_env
2501 : INTEGER, INTENT(IN) :: max_iter
2502 : INTEGER, INTENT(OUT) :: sweeps
2503 : INTEGER, INTENT(IN) :: out_each, dim2, nstate, nblock_max
2504 : INTEGER, DIMENSION(0:, :), INTENT(IN) :: ns_bound
2505 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2506 : REAL(dp), DIMENSION(:, :), INTENT(OUT) :: rotmat
2507 : INTEGER, INTENT(IN) :: output_unit
2508 : REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2509 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_localization
2510 : REAL(dp), OPTIONAL :: target_time, start_time
2511 :
2512 : COMPLEX(KIND=dp) :: zi, zj
2513 422 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: c_array_me, c_array_partner
2514 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
2515 : INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2516 : ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2517 : iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2518 : ns_recv_from, ns_recv_partner
2519 422 : INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl
2520 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: list_pair
2521 : LOGICAL :: should_stop
2522 422 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2523 422 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: rmat_recv_all
2524 : REAL(KIND=dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2525 : t2, theta, tolerance, zc, zr
2526 422 : TYPE(set_c_1d_type), DIMENSION(:), POINTER :: zdiag_all, zdiag_me
2527 422 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: xyz_mix, xyz_mix_ns
2528 :
2529 422 : NULLIFY (zdiag_all, zdiag_me)
2530 422 : NULLIFY (xyz_mix, xyz_mix_ns)
2531 : NULLIFY (mii, mij, mjj)
2532 :
2533 2110 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2534 :
2535 1266 : ALLOCATE (rcount(para_env%num_pe), STAT=istat)
2536 844 : ALLOCATE (rdispl(para_env%num_pe), STAT=istat)
2537 :
2538 422 : tolerance = 1.0e10_dp
2539 422 : sweeps = 0
2540 :
2541 : ! number of processor pairs and number of permutations
2542 422 : npair = (para_env%num_pe + 1)/2
2543 422 : nperm = para_env%num_pe - MOD(para_env%num_pe + 1, 2)
2544 1266 : ALLOCATE (list_pair(2, npair))
2545 :
2546 : ! initialize rotation matrix
2547 87298 : rotmat = 0.0_dp
2548 2353 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2549 1931 : ii = i - ns_bound(para_env%mepos, 1) + 1
2550 2353 : rotmat(i, ii) = 1.0_dp
2551 : END DO
2552 :
2553 2556 : ALLOCATE (xyz_mix(dim2))
2554 2134 : ALLOCATE (xyz_mix_ns(dim2))
2555 2556 : ALLOCATE (zdiag_me(dim2))
2556 2134 : ALLOCATE (zdiag_all(dim2))
2557 :
2558 422 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2559 422 : IF (ns_me /= 0) THEN
2560 2070 : ALLOCATE (c_array_me(nstate, ns_me, dim2))
2561 1680 : DO idim = 1, dim2
2562 5478 : ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2563 : END DO
2564 1656 : ALLOCATE (gmat(nstate, ns_me))
2565 : END IF
2566 :
2567 1712 : DO idim = 1, dim2
2568 3870 : ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2569 7530 : zdiag_me(idim)%c_array = z_zero
2570 3870 : ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2571 14192 : zdiag_all(idim)%c_array = z_zero
2572 : END DO
2573 1688 : ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2574 1266 : ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2575 :
2576 : ! buffer for message passing
2577 2110 : ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2578 :
2579 422 : IF (output_unit > 0) THEN
2580 195 : WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
2581 195 : WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
2582 : END IF
2583 :
2584 87304 : DO lsweep = 1, max_iter + 1
2585 87304 : sweeps = lsweep
2586 87304 : IF (sweeps == max_iter + 1) THEN
2587 156 : IF (output_unit > 0) THEN
2588 62 : WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2589 62 : WRITE (output_unit, *) ' Present Max. gradient = ', tolerance
2590 : END IF
2591 : EXIT
2592 : END IF
2593 87148 : t1 = m_walltime()
2594 :
2595 174296 : DO iperm = 1, nperm
2596 :
2597 : ! fix partners for this permutation, and get the number of states
2598 87148 : CALL eberlein(iperm, para_env, list_pair)
2599 87148 : ip_partner = -1
2600 87148 : ns_partner = 0
2601 87148 : DO ipair = 1, npair
2602 87148 : IF (list_pair(1, ipair) == para_env%mepos) THEN
2603 43574 : ip_partner = list_pair(2, ipair)
2604 43574 : EXIT
2605 43574 : ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
2606 43574 : ip_partner = list_pair(1, ipair)
2607 43574 : EXIT
2608 : END IF
2609 : END DO
2610 87148 : IF (ip_partner >= 0) THEN
2611 87148 : ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2612 : ELSE
2613 : ns_partner = 0
2614 : END IF
2615 :
2616 : ! if there is a non-zero block connecting two partners, jacobi-sweep it.
2617 87148 : IF (ns_partner*ns_me /= 0) THEN
2618 :
2619 348528 : ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2620 87132 : rmat_loc = 0.0_dp
2621 699326 : DO i = 1, ns_me + ns_partner
2622 699326 : rmat_loc(i, i) = 1.0_dp
2623 : END DO
2624 :
2625 435660 : ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2626 :
2627 350130 : DO idim = 1, dim2
2628 1051992 : ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2629 1277862 : DO i = 1, ns_me
2630 7898778 : c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2631 : END DO
2632 : END DO
2633 :
2634 : CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2635 87132 : msgout=c_array_partner, source=ip_partner)
2636 :
2637 87132 : n1 = ns_me
2638 87132 : n2 = ns_partner
2639 87132 : ilow1 = ns_bound(para_env%mepos, 1)
2640 87132 : iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2641 87132 : ilow2 = ns_bound(ip_partner, 1)
2642 87132 : iup2 = ns_bound(ip_partner, 1) + n2 - 1
2643 87132 : IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
2644 43566 : il1 = 1
2645 : iu1 = n1
2646 43566 : iu1 = n1
2647 43566 : il2 = 1 + n1
2648 43566 : iu2 = n1 + n2
2649 : ELSE
2650 43566 : il1 = 1 + n2
2651 : iu1 = n1 + n2
2652 43566 : iu1 = n1 + n2
2653 43566 : il2 = 1
2654 43566 : iu2 = n2
2655 : END IF
2656 :
2657 350130 : DO idim = 1, dim2
2658 1190730 : DO i = 1, n1
2659 4342242 : xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2660 4484268 : xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2661 : END DO
2662 1277862 : DO i = 1, n2
2663 4342242 : xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2664 4484268 : xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2665 : END DO
2666 : END DO
2667 :
2668 699326 : DO istate = 1, n1 + n2
2669 2590980 : DO jstate = istate + 1, n1 + n2
2670 7671970 : DO idim = 1, dim2
2671 5780316 : mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2672 5780316 : mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2673 7671970 : mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2674 : END DO
2675 1891654 : CALL get_angle(mii, mjj, mij, weights, theta)
2676 1891654 : st = SIN(theta)
2677 1891654 : ct = COS(theta)
2678 7671970 : DO idim = 1, dim2
2679 51334218 : DO i = 1, n1 + n2
2680 45553902 : zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2681 45553902 : zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2682 45553902 : xyz_mix(idim)%c_array(i, istate) = zi
2683 51334218 : xyz_mix(idim)%c_array(i, jstate) = zj
2684 : END DO
2685 53225872 : DO i = 1, n1 + n2
2686 45553902 : zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2687 45553902 : zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2688 45553902 : xyz_mix(idim)%c_array(istate, i) = zi
2689 51334218 : xyz_mix(idim)%c_array(jstate, i) = zj
2690 : END DO
2691 : END DO
2692 :
2693 17253578 : DO i = 1, n1 + n2
2694 14749730 : ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2695 14749730 : rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2696 14749730 : rmat_loc(i, istate) = ri
2697 16641384 : rmat_loc(i, jstate) = rj
2698 : END DO
2699 : END DO
2700 : END DO
2701 :
2702 87132 : k = nblock_max + 1
2703 : CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2704 5094828 : rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2705 :
2706 87132 : IF (ilow1 < ilow2) THEN
2707 : ! no longer compiles in official sdgb:
2708 : !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
2709 : ! probably inefficient:
2710 : CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2711 1468694 : n2, 0.0_dp, gmat(:, :), nstate)
2712 : CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2713 43566 : n1 + n2, 1.0_dp, gmat(:, :), nstate)
2714 : ELSE
2715 : CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2716 43566 : rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2717 : ! no longer compiles in official sdgb:
2718 : !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
2719 : ! probably inefficient:
2720 : CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2721 1148318 : n1, 1.0_dp, gmat(:, :), nstate)
2722 : END IF
2723 :
2724 87132 : CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2725 :
2726 350130 : DO idim = 1, dim2
2727 1190730 : DO i = 1, n1
2728 7898778 : xyz_mix_ns(idim)%c_array(1:nstate, i) = z_zero
2729 : END DO
2730 :
2731 1190730 : DO istate = 1, n1
2732 7898778 : DO jstate = 1, nstate
2733 33346560 : DO i = 1, n2
2734 : xyz_mix_ns(idim)%c_array(jstate, istate) = &
2735 : xyz_mix_ns(idim)%c_array(jstate, istate) + &
2736 32418828 : c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2737 : END DO
2738 : END DO
2739 : END DO
2740 1277862 : DO istate = 1, n1
2741 7898778 : DO jstate = 1, nstate
2742 34186950 : DO i = 1, n1
2743 : xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2744 33259218 : c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2745 : END DO
2746 : END DO
2747 : END DO
2748 : END DO ! idim
2749 :
2750 87132 : DEALLOCATE (c_array_partner)
2751 :
2752 : ELSE ! save my data
2753 64 : DO idim = 1, dim2
2754 88 : DO i = 1, ns_me
2755 120 : xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2756 : END DO
2757 : END DO
2758 : END IF
2759 :
2760 350194 : DO idim = 1, dim2
2761 1277950 : DO i = 1, ns_me
2762 7898874 : cz_ij_loc(idim)%c_array(1:nstate, i) = z_zero
2763 : END DO
2764 : END DO
2765 :
2766 87148 : IF (ns_partner*ns_me /= 0) THEN
2767 : ! transpose rotation matrix rmat_loc
2768 699326 : DO i = 1, ns_me + ns_partner
2769 2590980 : DO j = i + 1, ns_me + ns_partner
2770 1891654 : ri = rmat_loc(i, j)
2771 1891654 : rmat_loc(i, j) = rmat_loc(j, i)
2772 2503848 : rmat_loc(j, i) = ri
2773 : END DO
2774 : END DO
2775 :
2776 : ! prepare for distribution
2777 393229 : DO i = 1, n1
2778 1512260 : rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2779 : END DO
2780 87132 : ik = nblock_max
2781 393229 : DO i = 1, n2
2782 1471949 : rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2783 : END DO
2784 : ELSE
2785 16 : rmat_send = 0.0_dp
2786 : END IF
2787 :
2788 : ! collect data from all tasks (this takes some significant time)
2789 87148 : CALL para_env%allgather(rmat_send, rmat_recv_all)
2790 :
2791 : ! update blocks everywhere
2792 261444 : DO ip = 0, para_env%num_pe - 1
2793 :
2794 174296 : ip_recv_from = MOD(para_env%mepos - IP + para_env%num_pe, para_env%num_pe)
2795 6463592 : rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2796 :
2797 174296 : ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2798 :
2799 261444 : IF (ns_me /= 0) THEN
2800 174280 : IF (ns_recv_from /= 0) THEN
2801 : !look for the partner of ip_recv_from
2802 174272 : ip_recv_partner = -1
2803 174272 : ns_recv_partner = 0
2804 174272 : DO ipair = 1, npair
2805 174272 : IF (list_pair(1, ipair) == ip_recv_from) THEN
2806 87140 : ip_recv_partner = list_pair(2, ipair)
2807 87140 : EXIT
2808 87132 : ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
2809 : ip_recv_partner = list_pair(1, ipair)
2810 : EXIT
2811 : END IF
2812 : END DO
2813 :
2814 174272 : IF (ip_recv_partner >= 0) THEN
2815 174272 : ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2816 : END IF
2817 174272 : IF (ns_recv_partner > 0) THEN
2818 174264 : il1 = ns_bound(para_env%mepos, 1)
2819 174264 : il_recv = ns_bound(ip_recv_from, 1)
2820 174264 : il_recv_partner = ns_bound(ip_recv_partner, 1)
2821 174264 : ik = nblock_max
2822 :
2823 700260 : DO idim = 1, dim2
2824 2381460 : DO i = 1, ns_recv_from
2825 1855464 : ii = il_recv + i - 1
2826 9089508 : DO j = 1, ns_me
2827 33259218 : jj = j
2828 35114682 : DO k = 1, ns_recv_from
2829 26551170 : kk = il_recv + k - 1
2830 : cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2831 33259218 : rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2832 : END DO
2833 : END DO
2834 : END DO
2835 2555724 : DO i = 1, ns_recv_from
2836 1855464 : ii = il_recv + i - 1
2837 9089508 : DO j = 1, ns_me
2838 32418828 : jj = j
2839 34274292 : DO k = 1, ns_recv_partner
2840 25710780 : kk = il_recv_partner + k - 1
2841 : cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2842 32418828 : rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2843 : END DO
2844 : END DO
2845 : END DO
2846 : END DO ! idim
2847 : ELSE
2848 8 : il1 = ns_bound(para_env%mepos, 1)
2849 8 : il_recv = ns_bound(ip_recv_from, 1)
2850 32 : DO idim = 1, dim2
2851 56 : DO j = 1, ns_me
2852 48 : jj = j
2853 72 : DO i = 1, ns_recv_from
2854 24 : ii = il_recv + i - 1
2855 48 : cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2856 : END DO
2857 : END DO
2858 : END DO ! idim
2859 : END IF
2860 : END IF
2861 : END IF ! ns_me
2862 : END DO ! ip
2863 :
2864 174296 : IF (ns_partner*ns_me /= 0) THEN
2865 87132 : DEALLOCATE (rmat_loc)
2866 350130 : DO idim = 1, dim2
2867 350130 : DEALLOCATE (xyz_mix(idim)%c_array)
2868 : END DO
2869 : END IF
2870 :
2871 : END DO ! iperm
2872 :
2873 : ! calculate the max gradient
2874 350194 : DO idim = 1, dim2
2875 1190802 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2876 927756 : ii = i - ns_bound(para_env%mepos, 1) + 1
2877 927756 : zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2878 1190802 : zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2879 : END DO
2880 789138 : rcount(:) = SIZE(zdiag_me(idim)%c_array)
2881 263046 : rdispl(1) = 0
2882 526092 : DO ip = 2, para_env%num_pe
2883 526092 : rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2884 : END DO
2885 : ! collect all the diagonal elements in a replicated 1d array
2886 3496450 : CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2887 : END DO
2888 :
2889 87148 : gmax = 0.0_dp
2890 393253 : DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2891 306105 : k = j - ns_bound(para_env%mepos, 1) + 1
2892 1339080 : DO i = 1, j - 1
2893 : ! find the location of the diagonal element (i,i)
2894 1089122 : DO ip = 0, para_env%num_pe - 1
2895 1089122 : IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
2896 : ip_has_i = ip
2897 : EXIT
2898 : END IF
2899 : END DO
2900 945827 : ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2901 : ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
2902 945827 : jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2903 945827 : grad = 0.0_dp
2904 3835985 : DO idim = 1, dim2
2905 2890158 : zi = zdiag_all(idim)%c_array(ii)
2906 2890158 : zj = zdiag_all(idim)%c_array(jj)
2907 3835985 : grad = grad + weights(idim)*REAL(4.0_dp*CONJG(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
2908 : END DO
2909 1251932 : gmax = MAX(gmax, ABS(grad))
2910 : END DO
2911 : END DO
2912 :
2913 87148 : CALL para_env%max(gmax)
2914 87148 : tolerance = gmax
2915 87148 : IF (PRESENT(tol_out)) tol_out = tolerance
2916 :
2917 87148 : func = 0.0_dp
2918 393253 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2919 306105 : k = i - ns_bound(para_env%mepos, 1) + 1
2920 1321009 : DO idim = 1, dim2
2921 927756 : zr = REAL(cz_ij_loc(idim)%c_array(i, k), dp)
2922 927756 : zc = AIMAG(cz_ij_loc(idim)%c_array(i, k))
2923 1233861 : func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
2924 : END DO
2925 : END DO
2926 87148 : CALL para_env%sum(func)
2927 87148 : t2 = m_walltime()
2928 :
2929 87148 : IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
2930 440 : WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2931 440 : CALL m_flush(output_unit)
2932 : END IF
2933 87148 : IF (PRESENT(eps_localization)) THEN
2934 87116 : IF (tolerance < eps_localization) EXIT
2935 : END IF
2936 87148 : IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
2937 86850 : CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
2938 86850 : IF (should_stop) EXIT
2939 : END IF
2940 :
2941 : END DO ! lsweep
2942 :
2943 : ! buffer for message passing
2944 422 : DEALLOCATE (rmat_recv_all)
2945 :
2946 422 : DEALLOCATE (rmat_recv)
2947 422 : DEALLOCATE (rmat_send)
2948 422 : IF (ns_me > 0) THEN
2949 414 : DEALLOCATE (c_array_me)
2950 : END IF
2951 1712 : DO idim = 1, dim2
2952 1290 : DEALLOCATE (zdiag_me(idim)%c_array)
2953 1712 : DEALLOCATE (zdiag_all(idim)%c_array)
2954 : END DO
2955 422 : DEALLOCATE (zdiag_me)
2956 422 : DEALLOCATE (zdiag_all)
2957 422 : DEALLOCATE (xyz_mix)
2958 1712 : DO idim = 1, dim2
2959 1712 : IF (ns_me /= 0) THEN
2960 1266 : DEALLOCATE (xyz_mix_ns(idim)%c_array)
2961 : END IF
2962 : END DO
2963 422 : DEALLOCATE (xyz_mix_ns)
2964 422 : IF (ns_me /= 0) THEN
2965 414 : DEALLOCATE (gmat)
2966 : END IF
2967 422 : DEALLOCATE (mii)
2968 422 : DEALLOCATE (mij)
2969 422 : DEALLOCATE (mjj)
2970 422 : DEALLOCATE (list_pair)
2971 :
2972 844 : END SUBROUTINE jacobi_rot_para_core
2973 :
2974 : ! **************************************************************************************************
2975 : !> \brief ...
2976 : !> \param iperm ...
2977 : !> \param para_env ...
2978 : !> \param list_pair ...
2979 : ! **************************************************************************************************
2980 87148 : SUBROUTINE eberlein(iperm, para_env, list_pair)
2981 : INTEGER, INTENT(IN) :: iperm
2982 : TYPE(mp_para_env_type), POINTER :: para_env
2983 : INTEGER, DIMENSION(:, :) :: list_pair
2984 :
2985 : INTEGER :: i, ii, jj, npair
2986 :
2987 87148 : npair = (para_env%num_pe + 1)/2
2988 87148 : IF (iperm == 1) THEN
2989 : !..set up initial ordering
2990 261444 : DO I = 0, para_env%num_pe - 1
2991 174296 : II = ((i + 1) + 1)/2
2992 174296 : JJ = MOD((i + 1) + 1, 2) + 1
2993 261444 : list_pair(JJ, II) = i
2994 : END DO
2995 87148 : IF (MOD(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2996 0 : ELSEIF (MOD(iperm, 2) == 0) THEN
2997 : !..a type shift
2998 0 : jj = list_pair(1, npair)
2999 0 : DO I = npair, 3, -1
3000 0 : list_pair(1, I) = list_pair(1, I - 1)
3001 : END DO
3002 0 : list_pair(1, 2) = list_pair(2, 1)
3003 0 : list_pair(2, 1) = jj
3004 : ELSE
3005 : !..b type shift
3006 0 : jj = list_pair(2, 1)
3007 0 : DO I = 1, npair - 1
3008 0 : list_pair(2, I) = list_pair(2, I + 1)
3009 : END DO
3010 0 : list_pair(2, npair) = jj
3011 : END IF
3012 :
3013 87148 : END SUBROUTINE eberlein
3014 :
3015 : ! **************************************************************************************************
3016 : !> \brief ...
3017 : !> \param vectors ...
3018 : !> \param op_sm_set ...
3019 : !> \param zij_fm_set ...
3020 : ! **************************************************************************************************
3021 532 : SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
3022 :
3023 : TYPE(cp_fm_type), INTENT(IN) :: vectors
3024 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: op_sm_set
3025 : TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: zij_fm_set
3026 :
3027 : CHARACTER(len=*), PARAMETER :: routineN = 'zij_matrix'
3028 :
3029 : INTEGER :: handle, i, j, nao, nmoloc
3030 : TYPE(cp_fm_type) :: opvec
3031 :
3032 532 : CALL timeset(routineN, handle)
3033 :
3034 : ! get rows and cols of the input
3035 532 : CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3036 : ! replicate the input kind of matrix
3037 532 : CALL cp_fm_create(opvec, vectors%matrix_struct)
3038 :
3039 : ! Compute zij here
3040 2164 : DO i = 1, SIZE(zij_fm_set, 2)
3041 5428 : DO j = 1, SIZE(zij_fm_set, 1)
3042 3264 : CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
3043 3264 : CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
3044 : CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3045 4896 : zij_fm_set(j, i))
3046 : END DO
3047 : END DO
3048 :
3049 532 : CALL cp_fm_release(opvec)
3050 532 : CALL timestop(handle)
3051 :
3052 532 : END SUBROUTINE zij_matrix
3053 :
3054 : ! **************************************************************************************************
3055 : !> \brief ...
3056 : !> \param vectors ...
3057 : ! **************************************************************************************************
3058 38 : SUBROUTINE scdm_qrfact(vectors)
3059 :
3060 : TYPE(cp_fm_type), INTENT(IN) :: vectors
3061 :
3062 : CHARACTER(len=*), PARAMETER :: routineN = 'scdm_qrfact'
3063 :
3064 : INTEGER :: handle, ncolT, nrowT
3065 38 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
3066 : TYPE(cp_fm_struct_type), POINTER :: cstruct
3067 : TYPE(cp_fm_type) :: CTp, Qf, tmp
3068 :
3069 38 : CALL timeset(routineN, handle)
3070 :
3071 : ! Create Transpose of Coefficient Matrix vectors
3072 38 : nrowT = vectors%matrix_struct%ncol_global
3073 38 : ncolT = vectors%matrix_struct%nrow_global
3074 :
3075 : CALL cp_fm_struct_create(cstruct, template_fmstruct=vectors%matrix_struct, &
3076 38 : nrow_global=nrowT, ncol_global=ncolT)
3077 38 : CALL cp_fm_create(CTp, cstruct)
3078 38 : CALL cp_fm_struct_release(cstruct)
3079 :
3080 114 : ALLOCATE (tau(nrowT))
3081 :
3082 38 : CALL cp_fm_transpose(vectors, CTp)
3083 :
3084 : ! Get QR decomposition of CTs
3085 38 : CALL cp_fm_pdgeqpf(CTp, tau, nrowT, ncolT, 1, 1)
3086 :
3087 : ! Construction of Q from the scalapack output
3088 : CALL cp_fm_struct_create(cstruct, para_env=CTp%matrix_struct%para_env, &
3089 : context=CTp%matrix_struct%context, nrow_global=CTp%matrix_struct%nrow_global, &
3090 38 : ncol_global=CTp%matrix_struct%nrow_global)
3091 38 : CALL cp_fm_create(Qf, cstruct)
3092 38 : CALL cp_fm_struct_release(cstruct)
3093 38 : CALL cp_fm_to_fm_submat(CTp, Qf, nrowT, nrowT, 1, 1, 1, 1)
3094 :
3095 : ! Get Q
3096 38 : CALL cp_fm_pdorgqr(Qf, tau, nrowT, 1, 1)
3097 :
3098 : ! Transform original coefficient matrix vectors
3099 38 : CALL cp_fm_create(tmp, vectors%matrix_struct)
3100 38 : CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
3101 38 : CALL cp_fm_to_fm(vectors, tmp)
3102 38 : CALL parallel_gemm('N', 'N', ncolT, nrowT, nrowT, 1.0_dp, tmp, Qf, 0.0_dp, vectors)
3103 :
3104 : ! Cleanup
3105 38 : CALL cp_fm_release(CTp)
3106 38 : CALL cp_fm_release(tmp)
3107 38 : CALL cp_fm_release(Qf)
3108 38 : DEALLOCATE (tau)
3109 :
3110 38 : CALL timestop(handle)
3111 :
3112 152 : END SUBROUTINE scdm_qrfact
3113 :
3114 : ! **************************************************************************************************
3115 : !> \brief Achieves minimisation of the spread functional by simultaneous diagonalisation with Jacobi
3116 : !> rotations as presented in Cardoso & Souloumiac, SIAM J. Matrix Anal. Appl., 17(1), 161.
3117 : !> Generalizes the Jacobi algorithm to complex matrices.
3118 : !> \param weights array of weights for calculating the total spread
3119 : !> \param zij spread operator matrices
3120 : !> \param max_iter maximum number iterations
3121 : !> \param eps_localization numerical tolerance
3122 : !> \param sweeps counts number of sweeps required
3123 : !> \param out_each how often to print info
3124 : !> \param vectors complex vectors to be localized
3125 : !> \par History
3126 : !> 2020-04 created [LS]
3127 : !> \author Lukas Schreder
3128 : ! **************************************************************************************************
3129 6 : SUBROUTINE cardoso_souloumiac(weights, zij, max_iter, eps_localization, sweeps, &
3130 : out_each, vectors)
3131 :
3132 : REAL(KIND=dp), INTENT(IN) :: weights(:)
3133 : TYPE(cp_cfm_type), INTENT(INOUT) :: zij(:, :)
3134 : INTEGER, INTENT(IN) :: max_iter
3135 : REAL(KIND=dp), INTENT(IN) :: eps_localization
3136 : INTEGER :: sweeps
3137 : INTEGER, INTENT(IN) :: out_each
3138 : TYPE(cp_cfm_type), POINTER :: vectors
3139 :
3140 : CHARACTER(len=*), PARAMETER :: routineN = 'cardoso_souloumiac'
3141 :
3142 : COMPLEX(KIND=dp) :: s
3143 6 : COMPLEX(KIND=dp), ALLOCATABLE :: mii(:), mij(:), mji(:), mjj(:)
3144 : INTEGER :: dim1, dim2, handle, idim, istate, jdim, &
3145 : jstate, nstate, unit_nr
3146 : REAL(KIND=dp) :: c, old_spread, spread, t1, t2, tolerance
3147 : TYPE(cp_cfm_type), POINTER :: c_rmat, c_zij(:)
3148 :
3149 6 : CALL timeset(routineN, handle)
3150 :
3151 6 : dim1 = SIZE(zij, 1)
3152 6 : dim2 = SIZE(zij, 2)
3153 :
3154 6 : NULLIFY (c_rmat, c_zij)
3155 84 : ALLOCATE (c_rmat, c_zij(dim1*dim2), mii(dim1*dim2), mij(dim1*dim2), mji(dim1*dim2), mjj(dim1*dim2))
3156 6 : CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
3157 6 : CALL cp_cfm_set_all(c_rmat, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp)) ! start with the identity transformation
3158 24 : DO idim = 1, dim2
3159 60 : DO jdim = 1, dim1
3160 36 : CALL cp_cfm_create(c_zij((idim - 1)*dim1 + jdim), zij(jdim, idim)%matrix_struct)
3161 54 : CALL cp_cfm_to_cfm(zij(jdim, idim), c_zij((idim - 1)*dim1 + jdim))
3162 : END DO
3163 : END DO
3164 :
3165 6 : CALL cp_cfm_get_info(c_rmat, nrow_global=nstate)
3166 6 : tolerance = 1.0e10_dp
3167 6 : old_spread = 1.0e10_dp
3168 6 : sweeps = 0
3169 6 : unit_nr = -1
3170 6 : IF (c_rmat%matrix_struct%para_env%is_source()) THEN
3171 3 : unit_nr = cp_logger_get_default_unit_nr()
3172 : WRITE (unit_nr, "(T4,A )") " Localization by iterative Jacobi rotation using "// &
3173 3 : "Cardoso-Souloumiac angles"
3174 :
3175 : END IF
3176 :
3177 : ! Jacobi sweeps until converged
3178 58 : DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
3179 52 : sweeps = sweeps + 1
3180 52 : t1 = m_walltime()
3181 1924 : DO jstate = 1, nstate
3182 34684 : DO istate = jstate + 1, nstate
3183 229320 : DO idim = 1, dim1*dim2
3184 196560 : CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
3185 196560 : CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
3186 196560 : CALL cp_cfm_get_element(c_zij(idim), jstate, istate, mji(idim))
3187 229320 : CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
3188 : END DO
3189 32760 : CALL get_cardoso_angles(mii, mij, mji, mjj, c, s, vectors%matrix_struct)
3190 229320 : DO idim = 1, dim1*dim2
3191 196560 : CALL cp_cfm_rot_cols(c_zij(idim), istate, jstate, c, REAL(s))
3192 229320 : CALL cp_cfm_rot_rows(c_zij(idim), istate, jstate, c, REAL(s))
3193 : END DO
3194 67392 : CALL cp_cfm_rot_cols(c_rmat, istate, jstate, c, REAL(s))
3195 : END DO
3196 : END DO
3197 52 : CALL check_tolerance(c_zij, weights, spread)
3198 52 : CALL check_tolerance(c_zij, weights, tolerance)
3199 52 : tolerance = ABS(spread - old_spread)
3200 52 : old_spread = spread
3201 :
3202 52 : t2 = m_walltime()
3203 58 : IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
3204 : WRITE (unit_nr, "(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
3205 26 : "Iteration:", sweeps, "Functional", spread, "Tolerance:", tolerance, "Time:", t2 - t1
3206 26 : CALL m_flush(unit_nr)
3207 : END IF
3208 : END DO
3209 :
3210 24 : DO idim = 1, dim2
3211 : ! back to an interlaced matrix
3212 60 : DO jdim = 1, dim1
3213 36 : CALL cp_cfm_to_cfm(c_zij((idim - 1)*dim1 + jdim), zij(jdim, idim))
3214 54 : CALL cp_cfm_release(c_zij((idim - 1)*dim1 + jdim))
3215 : END DO
3216 : END DO
3217 :
3218 6 : CALL rotate_orbitals_cfm(c_rmat, vectors)
3219 :
3220 6 : DEALLOCATE (c_zij, mii, mij, mji, mjj)
3221 6 : CALL cp_cfm_release(c_rmat)
3222 6 : DEALLOCATE (c_rmat)
3223 :
3224 6 : CALL timestop(handle)
3225 :
3226 18 : END SUBROUTINE cardoso_souloumiac
3227 :
3228 : ! **************************************************************************************************
3229 : !> \brief Pipek-Mezey version of the Cardoso-Souloumiac PADE algorithm for complex-valued matrices.
3230 : !> \param zij spread operator matrices
3231 : !> \param vec complex vectors to be localised
3232 : !> \param sweeps counts number of sweeps required
3233 : !> \param max_iter maximum number iterations
3234 : !> \param eps numerical tolerance
3235 : !> \param out_each how often to print info
3236 : !> \par History
3237 : !> 2020-04 created [LS]
3238 : !> \author Lukas Schreder
3239 : ! **************************************************************************************************
3240 6 : SUBROUTINE cardoso_souloumiac_pipek(zij, vec, sweeps, max_iter, eps, out_each)
3241 :
3242 : TYPE(cp_cfm_type), POINTER :: zij(:, :), vec
3243 : INTEGER :: sweeps, max_iter
3244 : REAL(dp) :: eps
3245 : INTEGER :: out_each
3246 :
3247 : CHARACTER(*), PARAMETER :: routineN = 'cardoso_souloumiac_pipek'
3248 :
3249 : COMPLEX(dp) :: c_spread, s
3250 : COMPLEX(dp), POINTER :: qii(:), qij(:), qji(:), qjj(:)
3251 : INTEGER :: handle, i, j, k, n_dim, n_states, &
3252 : output_unit
3253 : REAL(dp) :: c, old_spread, spread, t1, t2, tol
3254 6 : TYPE(cp_cfm_type), POINTER :: c_zij(:), rmat
3255 :
3256 6 : CALL timeset(routineN, handle)
3257 :
3258 6 : CALL cite_reference(Schreder2024_2)
3259 :
3260 18 : n_dim = SIZE(zij)
3261 :
3262 6 : ALLOCATE (rmat)
3263 6 : CALL cp_cfm_create(rmat, zij(1, 1)%matrix_struct)
3264 :
3265 6 : CALL cp_cfm_set_all(rmat, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
3266 120 : ALLOCATE (c_zij(n_dim), qii(n_dim), qij(n_dim), qji(n_dim), qjj(n_dim))
3267 :
3268 : ! build Qij matrix
3269 78 : DO k = 1, n_dim
3270 78 : c_zij(k) = zij(k, 1)
3271 : END DO
3272 :
3273 6 : CALL cp_cfm_get_info(c_zij(1), ncol_global=n_states)
3274 :
3275 6 : tol = 1.0e10_dp
3276 6 : c_spread = (0.0_dp, 0.0_dp)
3277 78 : DO k = 1, n_dim
3278 2382 : DO i = 1, n_states
3279 2304 : CALL cp_cfm_get_element(c_zij(k), i, i, s)
3280 2376 : c_spread = c_spread + s*s
3281 : END DO
3282 : END DO
3283 6 : spread = REAL(c_spread)
3284 6 : old_spread = spread
3285 :
3286 6 : sweeps = 0
3287 6 : output_unit = cp_logger_get_default_unit_nr()
3288 : WRITE (output_unit, "(T4,A )") " Localization by iterative Jacobi rotation using "// &
3289 6 : "Cardoso-Souloumiac angles"
3290 6 : WRITE (output_unit, "(T4,A )") " and Pipek-Mezey spread functional"
3291 :
3292 6 : IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
3293 : WRITE (output_unit, "(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
3294 6 : "Iteration:", sweeps, " Functional", spread, " Tolerance:", tol, " Time:", 0.0_dp
3295 : END IF
3296 :
3297 36 : DO WHILE (tol >= eps .AND. sweeps < max_iter)
3298 30 : t1 = m_walltime()
3299 30 : sweeps = sweeps + 1
3300 :
3301 990 : DO i = 1, n_states
3302 15870 : DO j = i + 1, n_states
3303 193440 : DO k = 1, n_dim
3304 178560 : CALL cp_cfm_get_element(c_zij(k), i, i, qii(k))
3305 178560 : CALL cp_cfm_get_element(c_zij(k), i, j, qij(k))
3306 178560 : CALL cp_cfm_get_element(c_zij(k), j, i, qji(k))
3307 193440 : CALL cp_cfm_get_element(c_zij(k), j, j, qjj(k))
3308 : END DO
3309 14880 : CALL get_cardoso_angles(qii, qij, qji, qjj, c, s, vec%matrix_struct)
3310 193440 : DO k = 1, n_dim
3311 178560 : CALL cp_cfm_rot_cols(c_zij(k), i, j, c, REAL(s))
3312 193440 : CALL cp_cfm_rot_rows(c_zij(k), i, j, c, REAL(s))
3313 : END DO
3314 30720 : CALL cp_cfm_rot_cols(rmat, i, j, c, REAL(s))
3315 : END DO
3316 : END DO
3317 :
3318 30 : c_spread = (0.0_dp, 0.0_dp)
3319 990 : DO i = 1, n_states
3320 12510 : DO k = 1, n_dim
3321 11520 : CALL cp_cfm_get_element(c_zij(k), i, i, s)
3322 12480 : c_spread = c_spread + s*s
3323 : END DO
3324 : END DO
3325 30 : spread = REAL(c_spread)
3326 :
3327 30 : tol = ABS(spread - old_spread)
3328 30 : old_spread = spread
3329 30 : t2 = m_walltime()
3330 36 : IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
3331 : WRITE (output_unit, "(T4,A,I7,A,E12.4,A,E12.4,A,F8.3)") &
3332 30 : "Iteration:", sweeps, " Functional", spread, " Tolerance:", tol, " Time:", t2 - t1
3333 : END IF
3334 : END DO
3335 :
3336 6 : CALL rotate_orbitals_cfm(rmat, vec)
3337 :
3338 : ! c_zij(k) was assigned (shallow copy of zij(k,1)); do not release contents
3339 6 : DEALLOCATE (c_zij, qii, qij, qji, qjj)
3340 6 : CALL cp_cfm_release(rmat)
3341 6 : DEALLOCATE (rmat)
3342 :
3343 6 : CALL timestop(handle)
3344 :
3345 18 : END SUBROUTINE cardoso_souloumiac_pipek
3346 :
3347 : ! **************************************************************************************************
3348 : !> \brief calculates the Jacobi angles needed in serial Cardoso-Souloumiac diagonalisation
3349 : !> \param mii ...
3350 : !> \param mij ...
3351 : !> \param mji ...
3352 : !> \param mjj ...
3353 : !> \param c ...
3354 : !> \param s ...
3355 : !> \param tmp_fm_struct ...
3356 : !> \par History
3357 : !> 2020-04 created [LS]
3358 : !> \author Lukas Schreder
3359 : ! **************************************************************************************************
3360 47640 : SUBROUTINE get_cardoso_angles(mii, mij, mji, mjj, c, s, tmp_fm_struct)
3361 :
3362 : COMPLEX(KIND=dp), DIMENSION(:) :: mii, mij, mji, mjj
3363 : REAL(KIND=dp), INTENT(out) :: c
3364 : COMPLEX(KIND=dp), INTENT(out) :: s
3365 : TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
3366 :
3367 : INTEGER :: dim_m, i, i_max
3368 : REAL(KIND=dp) :: r, x, y, z
3369 : REAL(KIND=dp), DIMENSION(3) :: evals
3370 : TYPE(cp_cfm_type), POINTER :: c_Gmat, hmat
3371 : TYPE(cp_fm_struct_type), POINTER :: G_fm_struct, h_fm_struct
3372 : TYPE(cp_fm_type), POINTER :: evects, Gmat
3373 :
3374 47640 : dim_m = SIZE(mii)
3375 :
3376 : CALL cp_fm_struct_create(h_fm_struct, nrow_global=1, ncol_global=3, &
3377 47640 : template_fmstruct=tmp_fm_struct)
3378 : CALL cp_fm_struct_create(G_fm_struct, nrow_global=3, ncol_global=3, &
3379 47640 : template_fmstruct=tmp_fm_struct)
3380 47640 : ALLOCATE (hmat, c_Gmat, Gmat, evects)
3381 47640 : CALL cp_cfm_create(hmat, h_fm_struct)
3382 47640 : CALL cp_cfm_create(c_Gmat, G_fm_struct)
3383 47640 : CALL cp_cfm_set_all(c_Gmat, (0.0_dp, 0.0_dp), (0.0_dp, 0.0_dp))
3384 47640 : CALL cp_fm_create(Gmat, G_fm_struct)
3385 47640 : CALL cp_fm_create(evects, G_fm_struct)
3386 :
3387 422760 : DO i = 1, dim_m
3388 375120 : CALL cp_cfm_set_element(hmat, 1, 1, (mii(i) - mjj(i)))
3389 375120 : CALL cp_cfm_set_element(hmat, 1, 2, (mij(i) + mji(i)))
3390 375120 : CALL cp_cfm_set_element(hmat, 1, 3, (0.0_dp, 1.0_dp)*(mji(i) - mij(i)))
3391 422760 : CALL cp_cfm_gemm("C", "N", 3, 3, 1, (1.0_dp, 0.0_dp), hmat, hmat, (1.0_dp, 0.0_dp), c_Gmat)
3392 : END DO
3393 47640 : CALL cp_cfm_to_fm(c_Gmat, Gmat)
3394 :
3395 : ! find eigenvector with highest eigenvalue
3396 47640 : CALL choose_eigv_solver(Gmat, evects, evals)
3397 190560 : i_max = MAXLOC(evals, 1)
3398 47640 : CALL cp_fm_get_element(evects, 1, i_max, x)
3399 47640 : CALL cp_fm_get_element(evects, 2, i_max, y)
3400 47640 : CALL cp_fm_get_element(evects, 3, i_max, z)
3401 47640 : IF (x < 0) THEN
3402 47288 : x = -x
3403 47288 : y = -y
3404 47288 : z = -z
3405 : END IF
3406 :
3407 : ! calculate the angles
3408 47640 : r = SQRT(x**2 + y**2 + z**2) ! always 1
3409 47640 : c = SQRT((x + r)/(2*r)) ! always real
3410 47640 : s = (y - gaussi*z)/SQRT(2*r*(x + r)) ! always complex
3411 :
3412 47640 : CALL cp_fm_struct_release(h_fm_struct)
3413 47640 : CALL cp_fm_struct_release(G_fm_struct)
3414 47640 : CALL cp_cfm_release(hmat)
3415 47640 : CALL cp_cfm_release(c_Gmat)
3416 47640 : CALL cp_fm_release(Gmat)
3417 47640 : CALL cp_fm_release(evects)
3418 47640 : DEALLOCATE (hmat, c_Gmat, Gmat, evects)
3419 :
3420 47640 : END SUBROUTINE get_cardoso_angles
3421 :
3422 0 : END MODULE qs_localization_methods
|