Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Localization methods such as 2x2 Jacobi rotations
10 : !> Steepest Decents
11 : !> Conjugate Gradient
12 : !> \par History
13 : !> Initial parallellization of jacobi (JVDV 07.2003)
14 : !> direct minimization using exponential parametrization (JVDV 09.2003)
15 : !> crazy rotations go fast (JVDV 10.2003)
16 : !> \author CJM (04.2003)
17 : ! **************************************************************************************************
18 : MODULE qs_localization_methods
19 : USE cell_types, ONLY: cell_type
20 : USE cp_blacs_env, ONLY: cp_blacs_env_type
21 : USE cp_cfm_basic_linalg, ONLY: cp_cfm_column_scale,&
22 : cp_cfm_rot_cols,&
23 : cp_cfm_rot_rows,&
24 : cp_cfm_scale,&
25 : cp_cfm_scale_and_add,&
26 : cp_cfm_schur_product,&
27 : cp_cfm_trace
28 : USE cp_cfm_diag, ONLY: cp_cfm_heevd
29 : USE cp_cfm_types, ONLY: &
30 : cp_cfm_create, cp_cfm_get_element, cp_cfm_get_info, cp_cfm_get_submatrix, cp_cfm_release, &
31 : cp_cfm_set_all, cp_cfm_set_submatrix, cp_cfm_to_cfm, cp_cfm_to_fm, cp_cfm_type, &
32 : cp_fm_to_cfm
33 : USE cp_dbcsr_operations, ONLY: cp_dbcsr_sm_fm_multiply
34 : USE cp_external_control, ONLY: external_control
35 : USE cp_fm_basic_linalg, ONLY: cp_fm_frobenius_norm,&
36 : cp_fm_pdgeqpf,&
37 : cp_fm_pdorgqr,&
38 : cp_fm_scale,&
39 : cp_fm_scale_and_add,&
40 : cp_fm_trace,&
41 : cp_fm_transpose,&
42 : cp_fm_triangular_multiply
43 : USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
44 : USE cp_fm_diag, ONLY: cp_fm_syevd
45 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
46 : cp_fm_struct_get,&
47 : cp_fm_struct_release,&
48 : cp_fm_struct_type
49 : USE cp_fm_types, ONLY: &
50 : cp_fm_create, cp_fm_get_element, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_init_random, &
51 : cp_fm_maxabsrownorm, cp_fm_maxabsval, cp_fm_release, cp_fm_set_all, cp_fm_set_submatrix, &
52 : cp_fm_to_fm, cp_fm_to_fm_submat, cp_fm_type
53 : USE cp_log_handling, ONLY: cp_logger_get_default_io_unit,&
54 : cp_logger_get_default_unit_nr
55 : USE dbcsr_api, ONLY: dbcsr_p_type
56 : USE kahan_sum, ONLY: accurate_sum
57 : USE kinds, ONLY: dp
58 : USE machine, ONLY: m_flush,&
59 : m_walltime
60 : USE mathconstants, ONLY: pi,&
61 : twopi
62 : USE matrix_exp, ONLY: exp_pade_real,&
63 : get_nsquare_norder
64 : USE message_passing, ONLY: mp_para_env_type
65 : USE parallel_gemm_api, ONLY: parallel_gemm
66 : #include "./base/base_uses.f90"
67 :
68 : IMPLICIT NONE
69 : PUBLIC :: initialize_weights, crazy_rotations, &
70 : direct_mini, rotate_orbitals, approx_l1_norm_sd, jacobi_rotations, scdm_qrfact, zij_matrix, &
71 : jacobi_cg_edf_ls
72 :
73 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
74 :
75 : PRIVATE
76 :
77 : TYPE set_c_1d_type
78 : COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array
79 : END TYPE
80 : TYPE set_c_2d_type
81 : COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array
82 : END TYPE
83 :
84 : CONTAINS
85 : ! **************************************************************************************************
86 : !> \brief ...
87 : !> \param C ...
88 : !> \param iterations ...
89 : !> \param eps ...
90 : !> \param converged ...
91 : !> \param sweeps ...
92 : ! **************************************************************************************************
93 210 : SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
94 : TYPE(cp_fm_type), INTENT(IN) :: C
95 : INTEGER, INTENT(IN) :: iterations
96 : REAL(KIND=dp), INTENT(IN) :: eps
97 : LOGICAL, INTENT(INOUT) :: converged
98 : INTEGER, INTENT(INOUT) :: sweeps
99 :
100 : CHARACTER(len=*), PARAMETER :: routineN = 'approx_l1_norm_sd'
101 : INTEGER, PARAMETER :: taylor_order = 100
102 : REAL(KIND=dp), PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
103 :
104 : INTEGER :: handle, i, istep, k, n, ncol_local, &
105 : nrow_local, output_unit, p
106 : REAL(KIND=dp) :: expfactor, f2, f2old, gnorm, tnorm
107 : TYPE(cp_blacs_env_type), POINTER :: context
108 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_k_k
109 : TYPE(cp_fm_type) :: CTmp, G, Gp1, Gp2, U
110 : TYPE(mp_para_env_type), POINTER :: para_env
111 :
112 30 : CALL timeset(routineN, handle)
113 :
114 30 : NULLIFY (context, para_env, fm_struct_k_k)
115 :
116 30 : output_unit = cp_logger_get_default_io_unit()
117 :
118 : CALL cp_fm_struct_get(C%matrix_struct, nrow_global=n, ncol_global=k, &
119 : nrow_local=nrow_local, ncol_local=ncol_local, &
120 30 : para_env=para_env, context=context)
121 : CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
122 30 : nrow_global=k, ncol_global=k)
123 30 : CALL cp_fm_create(CTmp, C%matrix_struct)
124 30 : CALL cp_fm_create(U, fm_struct_k_k)
125 30 : CALL cp_fm_create(G, fm_struct_k_k)
126 30 : CALL cp_fm_create(Gp1, fm_struct_k_k)
127 30 : CALL cp_fm_create(Gp2, fm_struct_k_k)
128 : !
129 : ! printing
130 30 : IF (output_unit > 0) THEN
131 15 : WRITE (output_unit, '(1X)')
132 15 : WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
133 15 : WRITE (output_unit, '(A,I5)') ' Nbr iterations =', iterations
134 15 : WRITE (output_unit, '(A,E10.2)') ' eps convergence =', eps
135 15 : WRITE (output_unit, '(A,I5)') ' Max Taylor order =', taylor_order
136 15 : WRITE (output_unit, '(A,E10.2)') ' f2 eps =', f2_eps
137 15 : WRITE (output_unit, '(A,E10.2)') ' alpha =', alpha
138 15 : WRITE (output_unit, '(A)') ' iteration approx_l1_norm g_norm rel_err'
139 : END IF
140 : !
141 30 : f2old = 0.0_dp
142 30 : converged = .FALSE.
143 : !
144 : ! Start the steepest descent
145 1496 : DO istep = 1, iterations
146 : !
147 : !-------------------------------------------------------------------
148 : ! compute f_2
149 : ! f_2(x)=(x^2+eps)^1/2
150 1486 : f2 = 0.0_dp
151 23004 : DO p = 1, ncol_local ! p
152 1113692 : DO i = 1, nrow_local ! i
153 1112206 : f2 = f2 + SQRT(C%local_data(i, p)**2 + f2_eps)
154 : END DO
155 : END DO
156 1486 : CALL C%matrix_struct%para_env%sum(f2)
157 : !write(*,*) 'qs_localize: f_2=',f2
158 : !-------------------------------------------------------------------
159 : ! compute the derivative of f_2
160 : ! f_2(x)=(x^2+eps)^1/2
161 23004 : DO p = 1, ncol_local ! p
162 1113692 : DO i = 1, nrow_local ! i
163 1112206 : CTmp%local_data(i, p) = C%local_data(i, p)/SQRT(C%local_data(i, p)**2 + f2_eps)
164 : END DO
165 : END DO
166 1486 : CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, CTmp, C, 0.0_dp, G)
167 : ! antisymmetrize
168 1486 : CALL cp_fm_transpose(G, U)
169 1486 : CALL cp_fm_scale_and_add(-0.5_dp, G, 0.5_dp, U)
170 : !
171 : !-------------------------------------------------------------------
172 : !
173 1486 : gnorm = cp_fm_frobenius_norm(G)
174 : !write(*,*) 'qs_localize: norm(G)=',gnorm
175 : !
176 : ! rescale for steepest descent
177 1486 : CALL cp_fm_scale(-alpha, G)
178 : !
179 : ! compute unitary transform
180 : ! zeroth order
181 1486 : CALL cp_fm_set_all(U, 0.0_dp, 1.0_dp)
182 : ! first order
183 1486 : expfactor = 1.0_dp
184 1486 : CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, G)
185 1486 : tnorm = cp_fm_frobenius_norm(G)
186 : !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
187 1486 : IF (tnorm .GT. 1.0E-10_dp) THEN
188 : ! other orders
189 1486 : CALL cp_fm_to_fm(G, Gp1)
190 4938 : DO i = 2, taylor_order
191 : ! new power of G
192 4938 : CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, G, Gp1, 0.0_dp, Gp2)
193 4938 : CALL cp_fm_to_fm(Gp2, Gp1)
194 : ! add to the taylor expansion so far
195 4938 : expfactor = expfactor/REAL(i, KIND=dp)
196 4938 : CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, Gp1)
197 4938 : tnorm = cp_fm_frobenius_norm(Gp1)
198 : !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
199 4938 : IF (tnorm*expfactor .LT. 1.0E-10_dp) EXIT
200 : END DO
201 : END IF
202 : !
203 : ! incrementaly rotate the MOs
204 1486 : CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, C, U, 0.0_dp, CTmp)
205 1486 : CALL cp_fm_to_fm(CTmp, C)
206 : !
207 : ! printing
208 1486 : IF (output_unit .GT. 0) THEN
209 743 : WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, ABS((f2 - f2old)/f2)
210 : END IF
211 : !
212 : ! Are we done?
213 1486 : sweeps = istep
214 : !IF(gnorm.LE.grad_thresh.AND.ABS((f2-f2old)/f2).LE.f2_thresh.AND.istep.GT.1) THEN
215 1486 : IF (ABS((f2 - f2old)/f2) .LE. eps .AND. istep .GT. 1) THEN
216 20 : converged = .TRUE.
217 20 : EXIT
218 : END IF
219 1476 : f2old = f2
220 : END DO
221 : !
222 : ! here we should do one refine step to enforce C'*S*C=1 for any case
223 : !
224 : ! Print the final result
225 30 : IF (output_unit .GT. 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
226 : !
227 : ! sparsity
228 : !DO i=1,size(thresh,1)
229 : ! gnorm = 0.0_dp
230 : ! DO o=1,ncol_local
231 : ! DO p=1,nrow_local
232 : ! IF(ABS(C%local_data(p,o)).GT.thresh(i)) THEN
233 : ! gnorm = gnorm + 1.0_dp
234 : ! ENDIF
235 : ! ENDDO
236 : ! ENDDO
237 : ! CALL C%matrix_struct%para_env%sum(gnorm)
238 : ! IF(output_unit.GT.0) THEN
239 : ! WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
240 : ! ENDIF
241 : !ENDDO
242 : !
243 : ! deallocate
244 30 : CALL cp_fm_struct_release(fm_struct_k_k)
245 30 : CALL cp_fm_release(CTmp)
246 30 : CALL cp_fm_release(U)
247 30 : CALL cp_fm_release(G)
248 30 : CALL cp_fm_release(Gp1)
249 30 : CALL cp_fm_release(Gp2)
250 :
251 30 : CALL timestop(handle)
252 :
253 30 : END SUBROUTINE approx_l1_norm_sd
254 : ! **************************************************************************************************
255 : !> \brief ...
256 : !> \param cell ...
257 : !> \param weights ...
258 : ! **************************************************************************************************
259 458 : SUBROUTINE initialize_weights(cell, weights)
260 :
261 : TYPE(cell_type), POINTER :: cell
262 : REAL(KIND=dp), DIMENSION(:) :: weights
263 :
264 : REAL(KIND=dp), DIMENSION(3, 3) :: metric
265 :
266 458 : CPASSERT(ASSOCIATED(cell))
267 :
268 458 : metric = 0.0_dp
269 458 : CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
270 :
271 458 : weights(1) = METRIC(1, 1) - METRIC(1, 2) - METRIC(1, 3)
272 458 : weights(2) = METRIC(2, 2) - METRIC(1, 2) - METRIC(2, 3)
273 458 : weights(3) = METRIC(3, 3) - METRIC(1, 3) - METRIC(2, 3)
274 458 : weights(4) = METRIC(1, 2)
275 458 : weights(5) = METRIC(1, 3)
276 458 : weights(6) = METRIC(2, 3)
277 :
278 458 : END SUBROUTINE initialize_weights
279 :
280 : ! **************************************************************************************************
281 : !> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
282 : !> can deal with serial para_envs.
283 : !> \param weights ...
284 : !> \param zij ...
285 : !> \param vectors ...
286 : !> \param para_env ...
287 : !> \param max_iter ...
288 : !> \param eps_localization ...
289 : !> \param sweeps ...
290 : !> \param out_each ...
291 : !> \param target_time ...
292 : !> \param start_time ...
293 : !> \param restricted ...
294 : !> \par History
295 : !> \author Joost VandeVondele (02.2010)
296 : ! **************************************************************************************************
297 388 : SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
298 : eps_localization, sweeps, out_each, target_time, start_time, restricted)
299 :
300 : REAL(KIND=dp), INTENT(IN) :: weights(:)
301 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
302 : TYPE(mp_para_env_type), POINTER :: para_env
303 : INTEGER, INTENT(IN) :: max_iter
304 : REAL(KIND=dp), INTENT(IN) :: eps_localization
305 : INTEGER :: sweeps
306 : INTEGER, INTENT(IN) :: out_each
307 : REAL(dp) :: target_time, start_time
308 : INTEGER :: restricted
309 :
310 388 : IF (para_env%num_pe == 1) THEN
311 : CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
312 0 : sweeps, out_each, restricted=restricted)
313 : ELSE
314 : CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
315 388 : sweeps, out_each, target_time, start_time, restricted=restricted)
316 : END IF
317 :
318 388 : END SUBROUTINE jacobi_rotations
319 :
320 : ! **************************************************************************************************
321 : !> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
322 : !> while the routine below works in parallel, it is too slow to be useful
323 : !> \param weights ...
324 : !> \param zij ...
325 : !> \param vectors ...
326 : !> \param max_iter ...
327 : !> \param eps_localization ...
328 : !> \param sweeps ...
329 : !> \param out_each ...
330 : !> \param restricted ...
331 : ! **************************************************************************************************
332 0 : SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
333 : out_each, restricted)
334 : REAL(KIND=dp), INTENT(IN) :: weights(:)
335 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
336 : INTEGER, INTENT(IN) :: max_iter
337 : REAL(KIND=dp), INTENT(IN) :: eps_localization
338 : INTEGER :: sweeps
339 : INTEGER, INTENT(IN) :: out_each
340 : INTEGER :: restricted
341 :
342 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial'
343 :
344 0 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
345 : INTEGER :: dim2, handle, idim, istate, jstate, &
346 : nstate, unit_nr
347 : REAL(KIND=dp) :: ct, st, t1, t2, theta, tolerance
348 : TYPE(cp_cfm_type) :: c_rmat
349 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
350 : TYPE(cp_fm_type) :: rmat
351 :
352 0 : CALL timeset(routineN, handle)
353 :
354 0 : dim2 = SIZE(zij, 2)
355 0 : ALLOCATE (c_zij(dim2))
356 : NULLIFY (mii, mij, mjj)
357 0 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
358 :
359 0 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
360 0 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
361 :
362 0 : CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
363 0 : CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
364 0 : DO idim = 1, dim2
365 0 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
366 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
367 0 : zij(2, idim)%local_data, dp)
368 : END DO
369 :
370 0 : CALL cp_fm_get_info(rmat, nrow_global=nstate)
371 0 : tolerance = 1.0e10_dp
372 :
373 0 : sweeps = 0
374 0 : unit_nr = -1
375 0 : IF (rmat%matrix_struct%para_env%is_source()) THEN
376 0 : unit_nr = cp_logger_get_default_unit_nr()
377 0 : WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
378 : END IF
379 :
380 0 : IF (restricted > 0) THEN
381 0 : unit_nr = cp_logger_get_default_unit_nr()
382 0 : WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
383 0 : nstate = nstate - restricted
384 : END IF
385 :
386 : ! do jacobi sweeps until converged
387 0 : DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
388 0 : sweeps = sweeps + 1
389 0 : t1 = m_walltime()
390 :
391 0 : DO istate = 1, nstate
392 0 : DO jstate = istate + 1, nstate
393 0 : DO idim = 1, dim2
394 0 : CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
395 0 : CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
396 0 : CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
397 : END DO
398 0 : CALL get_angle(mii, mjj, mij, weights, theta)
399 0 : st = SIN(theta)
400 0 : ct = COS(theta)
401 0 : CALL rotate_zij(istate, jstate, st, ct, c_zij)
402 :
403 0 : CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
404 : END DO
405 : END DO
406 :
407 0 : CALL check_tolerance(c_zij, weights, tolerance)
408 :
409 0 : t2 = m_walltime()
410 0 : IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
411 : WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
412 0 : "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
413 0 : CALL m_flush(unit_nr)
414 : END IF
415 :
416 : END DO
417 :
418 0 : DO idim = 1, dim2
419 0 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
420 0 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
421 0 : CALL cp_cfm_release(c_zij(idim))
422 : END DO
423 0 : DEALLOCATE (c_zij)
424 0 : DEALLOCATE (mii, mij, mjj)
425 0 : rmat%local_data = REAL(c_rmat%local_data, dp)
426 0 : CALL cp_cfm_release(c_rmat)
427 0 : CALL rotate_orbitals(rmat, vectors)
428 0 : CALL cp_fm_release(rmat)
429 :
430 0 : CALL timestop(handle)
431 :
432 0 : END SUBROUTINE jacobi_rotations_serial
433 : ! **************************************************************************************************
434 : !> \brief very similar to jacobi_rotations_serial with some extra output options
435 : !> \param weights ...
436 : !> \param c_zij ...
437 : !> \param max_iter ...
438 : !> \param c_rmat ...
439 : !> \param eps_localization ...
440 : !> \param tol_out ...
441 : !> \param jsweeps ...
442 : !> \param out_each ...
443 : !> \param c_zij_out ...
444 : !> \param grad_final ...
445 : ! **************************************************************************************************
446 0 : SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
447 0 : tol_out, jsweeps, out_each, c_zij_out, grad_final)
448 : REAL(KIND=dp), INTENT(IN) :: weights(:)
449 : TYPE(cp_cfm_type), INTENT(IN) :: c_zij(:)
450 : INTEGER, INTENT(IN) :: max_iter
451 : TYPE(cp_cfm_type), INTENT(IN) :: c_rmat
452 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_localization
453 : REAL(KIND=dp), INTENT(OUT), OPTIONAL :: tol_out
454 : INTEGER, INTENT(OUT), OPTIONAL :: jsweeps
455 : INTEGER, INTENT(IN), OPTIONAL :: out_each
456 : TYPE(cp_cfm_type), INTENT(IN), OPTIONAL :: c_zij_out(:)
457 : TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER :: grad_final
458 :
459 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial_1'
460 :
461 : COMPLEX(KIND=dp) :: mzii
462 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
463 : INTEGER :: dim2, handle, idim, istate, jstate, &
464 : nstate, sweeps, unit_nr
465 : REAL(KIND=dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
466 : sum_spread_ii, t1, t2, theta, tolerance
467 : TYPE(cp_cfm_type) :: c_rmat_local
468 : TYPE(cp_cfm_type), ALLOCATABLE :: c_zij_local(:)
469 :
470 0 : CALL timeset(routineN, handle)
471 :
472 0 : dim2 = SIZE(c_zij)
473 : NULLIFY (mii, mij, mjj)
474 0 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
475 :
476 0 : ALLOCATE (c_zij_local(dim2))
477 0 : CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
478 0 : CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
479 0 : DO idim = 1, dim2
480 0 : CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
481 0 : c_zij_local(idim)%local_data = c_zij(idim)%local_data
482 : END DO
483 :
484 0 : CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
485 0 : tolerance = 1.0e10_dp
486 :
487 0 : IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
488 :
489 0 : sweeps = 0
490 0 : IF (PRESENT(out_each)) THEN
491 0 : unit_nr = -1
492 0 : IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
493 0 : unit_nr = cp_logger_get_default_unit_nr()
494 : END IF
495 0 : alpha = 0.0_dp
496 0 : DO idim = 1, dim2
497 0 : alpha = alpha + weights(idim)
498 : END DO
499 : END IF
500 :
501 : ! do jacobi sweeps until converged
502 0 : DO WHILE (sweeps < max_iter)
503 0 : sweeps = sweeps + 1
504 0 : IF (PRESENT(eps_localization)) THEN
505 0 : IF (tolerance < eps_localization) EXIT
506 : END IF
507 0 : IF (PRESENT(out_each)) t1 = m_walltime()
508 :
509 0 : DO istate = 1, nstate
510 0 : DO jstate = istate + 1, nstate
511 0 : DO idim = 1, dim2
512 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
513 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
514 0 : CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
515 : END DO
516 0 : CALL get_angle(mii, mjj, mij, weights, theta)
517 0 : st = SIN(theta)
518 0 : ct = COS(theta)
519 0 : CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
520 :
521 0 : CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
522 : END DO
523 : END DO
524 :
525 0 : IF (PRESENT(grad_final)) THEN
526 0 : CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
527 : ELSE
528 0 : CALL check_tolerance(c_zij_local, weights, tolerance)
529 : END IF
530 0 : IF (PRESENT(tol_out)) tol_out = tolerance
531 :
532 0 : IF (PRESENT(out_each)) THEN
533 0 : t2 = m_walltime()
534 0 : IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
535 0 : sum_spread_ii = 0.0_dp
536 0 : DO istate = 1, nstate
537 : spread_ii = 0.0_dp
538 0 : DO idim = 1, dim2
539 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
540 : spread_ii = spread_ii + weights(idim)* &
541 0 : ABS(mzii)**2/twopi/twopi
542 : END DO
543 0 : sum_spread_ii = sum_spread_ii + spread_ii
544 : END DO
545 0 : sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
546 0 : avg_spread_ii = sum_spread_ii/nstate
547 : WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
548 0 : "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
549 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
550 0 : sweeps, avg_spread_ii, tolerance, t2 - t1
551 0 : CALL m_flush(unit_nr)
552 : END IF
553 0 : IF (PRESENT(jsweeps)) jsweeps = sweeps
554 : END IF
555 :
556 : END DO
557 :
558 0 : IF (PRESENT(c_zij_out)) THEN
559 0 : DO idim = 1, dim2
560 0 : CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
561 : END DO
562 : END IF
563 0 : CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
564 :
565 0 : DEALLOCATE (mii, mij, mjj)
566 0 : DO idim = 1, dim2
567 0 : CALL cp_cfm_release(c_zij_local(idim))
568 : END DO
569 0 : DEALLOCATE (c_zij_local)
570 0 : CALL cp_cfm_release(c_rmat_local)
571 :
572 0 : CALL timestop(handle)
573 :
574 0 : END SUBROUTINE jacobi_rotations_serial_1
575 : ! **************************************************************************************************
576 : !> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
577 : !> for partially occupied wannier functions
578 : !> \param para_env ...
579 : !> \param weights ...
580 : !> \param zij ...
581 : !> \param vectors ...
582 : !> \param max_iter ...
583 : !> \param eps_localization ...
584 : !> \param iter ...
585 : !> \param out_each ...
586 : !> \param nextra ...
587 : !> \param do_cg ...
588 : !> \param nmo ...
589 : !> \param vectors_2 ...
590 : !> \param mos_guess ...
591 : ! **************************************************************************************************
592 2 : SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
593 : iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
594 : TYPE(mp_para_env_type), POINTER :: para_env
595 : REAL(KIND=dp), INTENT(IN) :: weights(:)
596 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
597 : INTEGER, INTENT(IN) :: max_iter
598 : REAL(KIND=dp), INTENT(IN) :: eps_localization
599 : INTEGER :: iter
600 : INTEGER, INTENT(IN) :: out_each, nextra
601 : LOGICAL, INTENT(IN) :: do_cg
602 : INTEGER, INTENT(IN), OPTIONAL :: nmo
603 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: vectors_2, mos_guess
604 :
605 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_cg_edf_ls'
606 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
607 : czero = (0.0_dp, 0.0_dp)
608 : REAL(KIND=dp), PARAMETER :: gold_sec = 0.3819_dp
609 :
610 : COMPLEX(KIND=dp) :: cnorm2_Gct, cnorm2_Gct_cross, mzii
611 2 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_cmat
612 2 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: arr_zii
613 2 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: matrix_zii
614 : INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
615 : lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
616 : INTEGER, DIMENSION(1) :: iloc
617 : LOGICAL :: do_cinit_mo, do_cinit_random, &
618 : do_U_guess_mo, new_direction
619 : REAL(KIND=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_Gct, &
620 : norm2_Gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
621 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: sum_spread
622 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, tmp_mat_1
623 : REAL(KIND=dp), DIMENSION(50) :: energy, pos
624 2 : REAL(KIND=dp), DIMENSION(:), POINTER :: tmp_arr
625 : TYPE(cp_blacs_env_type), POINTER :: context
626 : TYPE(cp_cfm_type) :: c_tilde, ctrans_lambda, Gct_old, &
627 : grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
628 : tmp_cfm_2, U, UL, V, VL, zdiag
629 2 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij, zij_0
630 : TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
631 : TYPE(cp_fm_type) :: id_nextra, matrix_U, matrix_V, &
632 : matrix_V_all, rmat, tmp_fm, vectors_all
633 :
634 2 : CALL timeset(routineN, handle)
635 :
636 2 : dim2 = SIZE(zij, 2)
637 2 : NULLIFY (context)
638 2 : NULLIFY (matrix_zii, arr_zii)
639 2 : NULLIFY (tmp_fm_struct)
640 2 : NULLIFY (tmp_arr)
641 :
642 12 : ALLOCATE (c_zij(dim2))
643 :
644 2 : CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
645 :
646 6 : ALLOCATE (sum_spread(nstate))
647 8 : ALLOCATE (matrix_zii(nstate, dim2))
648 266 : matrix_zii = czero
649 88 : sum_spread = 0.0_dp
650 :
651 : alpha = 0.0_dp
652 8 : DO idim = 1, dim2
653 6 : alpha = alpha + weights(idim)
654 6 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
655 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
656 5813 : zij(2, idim)%local_data, dp)
657 : END DO
658 :
659 10 : ALLOCATE (zij_0(dim2))
660 :
661 2 : CALL cp_cfm_create(U, zij(1, 1)%matrix_struct)
662 2 : CALL cp_fm_create(matrix_U, zij(1, 1)%matrix_struct)
663 :
664 2 : CALL cp_cfm_set_all(U, czero, cone)
665 2 : CALL cp_fm_set_all(matrix_U, 0.0_dp, 1.0_dp)
666 :
667 2 : CALL cp_fm_get_info(vectors, nrow_global=nao)
668 2 : IF (nextra > 0) THEN
669 2 : IF (PRESENT(mos_guess)) THEN
670 2 : do_cinit_random = .FALSE.
671 2 : do_cinit_mo = .TRUE.
672 2 : CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
673 : ELSE
674 0 : do_cinit_random = .TRUE.
675 0 : do_cinit_mo = .FALSE.
676 0 : ndummy = nstate
677 : END IF
678 :
679 : IF (do_cinit_random) THEN
680 : icinit = 1
681 : do_U_guess_mo = .FALSE.
682 : ELSEIF (do_cinit_mo) THEN
683 2 : icinit = 2
684 2 : do_U_guess_mo = .TRUE.
685 : END IF
686 :
687 2 : nocc = nstate - nextra
688 2 : northo = nmo - nocc
689 2 : norextra = nmo - nstate
690 2 : CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
691 :
692 8 : ALLOCATE (tmp_cmat(nstate, nstate))
693 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
694 2 : para_env=para_env, context=context)
695 8 : DO idim = 1, dim2
696 6 : CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
697 6 : CALL cp_cfm_set_all(zij_0(idim), czero, cone)
698 6 : CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
699 8 : CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
700 : END DO
701 2 : CALL cp_fm_struct_release(tmp_fm_struct)
702 2 : DEALLOCATE (tmp_cmat)
703 :
704 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
705 2 : para_env=para_env, context=context)
706 2 : CALL cp_cfm_create(V, tmp_fm_struct)
707 2 : CALL cp_fm_create(matrix_V, tmp_fm_struct)
708 2 : CALL cp_cfm_create(zdiag, tmp_fm_struct)
709 2 : CALL cp_fm_create(rmat, tmp_fm_struct)
710 2 : CALL cp_fm_struct_release(tmp_fm_struct)
711 2 : CALL cp_cfm_set_all(V, czero, cone)
712 2 : CALL cp_fm_set_all(matrix_V, 0.0_dp, 1.0_dp)
713 :
714 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
715 2 : para_env=para_env, context=context)
716 2 : CALL cp_fm_create(matrix_V_all, tmp_fm_struct)
717 2 : CALL cp_fm_struct_release(tmp_fm_struct)
718 2 : CALL cp_fm_set_all(matrix_V_all, 0._dp, 1._dp)
719 :
720 6 : ALLOCATE (arr_zii(nstate))
721 :
722 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
723 2 : para_env=para_env, context=context)
724 2 : CALL cp_cfm_create(c_tilde, tmp_fm_struct)
725 2 : CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
726 2 : CALL cp_cfm_create(Gct_old, tmp_fm_struct)
727 2 : CALL cp_cfm_create(skc, tmp_fm_struct)
728 2 : CALL cp_fm_struct_release(tmp_fm_struct)
729 2 : CALL cp_cfm_set_all(c_tilde, czero)
730 2 : CALL cp_cfm_set_all(Gct_old, czero)
731 2 : CALL cp_cfm_set_all(skc, czero)
732 :
733 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
734 2 : para_env=para_env, context=context)
735 2 : CALL cp_cfm_create(VL, tmp_fm_struct)
736 2 : CALL cp_cfm_set_all(VL, czero)
737 2 : CALL cp_fm_struct_release(tmp_fm_struct)
738 :
739 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
740 2 : para_env=para_env, context=context)
741 2 : CALL cp_fm_create(id_nextra, tmp_fm_struct)
742 2 : CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
743 2 : CALL cp_fm_struct_release(tmp_fm_struct)
744 2 : CALL cp_cfm_set_all(ctrans_lambda, czero)
745 2 : CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
746 :
747 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
748 2 : para_env=para_env, context=context)
749 2 : CALL cp_cfm_create(UL, tmp_fm_struct)
750 2 : CALL cp_fm_struct_release(tmp_fm_struct)
751 2 : CALL cp_cfm_set_all(UL, czero)
752 :
753 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
754 2 : para_env=para_env, context=context)
755 2 : CALL cp_fm_create(vectors_all, tmp_fm_struct)
756 2 : CALL cp_fm_struct_release(tmp_fm_struct)
757 8 : ALLOCATE (tmp_mat(nao, nstate))
758 2 : CALL cp_fm_get_submatrix(vectors, tmp_mat)
759 2 : CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
760 2 : DEALLOCATE (tmp_mat)
761 8 : ALLOCATE (tmp_mat(nao, norextra))
762 2 : CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
763 2 : CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
764 2 : DEALLOCATE (tmp_mat)
765 :
766 : ! initialize c_tilde
767 14 : SELECT CASE (icinit)
768 : CASE (1) ! random coefficients
769 : !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
770 0 : CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
771 0 : CALL cp_fm_init_random(tmp_fm, nextra)
772 0 : CALL ortho_vectors(tmp_fm)
773 0 : c_tilde%local_data = tmp_fm%local_data
774 0 : CALL cp_fm_release(tmp_fm)
775 0 : ALLOCATE (tmp_cmat(northo, nextra))
776 0 : CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
777 0 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
778 0 : DEALLOCATE (tmp_cmat)
779 : CASE (2) ! MO based coeffs
780 2 : CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_V_all)
781 6 : ALLOCATE (tmp_arr(nmo))
782 8 : ALLOCATE (tmp_mat(nmo, ndummy))
783 8 : ALLOCATE (tmp_mat_1(nmo, nstate))
784 : ! normalize matrix_V_all
785 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat)
786 88 : DO istate = 1, ndummy
787 4558 : tmp_arr(:) = tmp_mat(:, istate)
788 4558 : norm = norm2(tmp_arr)
789 4558 : tmp_arr(:) = tmp_arr(:)/norm
790 4560 : tmp_mat(:, istate) = tmp_arr(:)
791 : END DO
792 2 : CALL cp_fm_set_submatrix(matrix_V_all, tmp_mat)
793 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat_1, 1, 1, nmo, nstate)
794 2 : CALL cp_fm_set_submatrix(matrix_V, tmp_mat_1)
795 2 : DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
796 2 : CALL cp_fm_to_cfm(msourcer=matrix_V, mtarget=V)
797 8 : ALLOCATE (tmp_mat(northo, ndummy))
798 8 : ALLOCATE (tmp_mat_1(northo, nextra))
799 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat, nocc + 1, 1, northo, ndummy)
800 6 : ALLOCATE (tmp_arr(ndummy))
801 88 : tmp_arr = 0.0_dp
802 88 : DO istate = 1, ndummy
803 1034 : tmp_arr(istate) = norm2(tmp_mat(:, istate))
804 : END DO
805 : ! find edfs
806 6 : DO istate = 1, nextra
807 180 : iloc = MAXLOC(tmp_arr)
808 48 : tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
809 6 : tmp_arr(iloc(1)) = 0.0_dp
810 : END DO
811 :
812 2 : DEALLOCATE (tmp_arr, tmp_mat)
813 :
814 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
815 2 : para_env=para_env, context=context)
816 2 : CALL cp_fm_create(tmp_fm, tmp_fm_struct)
817 2 : CALL cp_fm_struct_release(tmp_fm_struct)
818 2 : CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
819 2 : DEALLOCATE (tmp_mat_1)
820 2 : CALL ortho_vectors(tmp_fm)
821 2 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
822 2 : CALL cp_fm_release(tmp_fm)
823 : ! initialize U
824 2 : IF (do_U_guess_mo) THEN
825 8 : ALLOCATE (tmp_cmat(nocc, nstate))
826 2 : CALL cp_cfm_get_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
827 2 : CALL cp_cfm_set_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
828 2 : DEALLOCATE (tmp_cmat)
829 8 : ALLOCATE (tmp_cmat(northo, nstate))
830 2 : CALL cp_cfm_get_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
831 2 : CALL cp_cfm_set_submatrix(VL, tmp_cmat, 1, 1, northo, nstate)
832 2 : DEALLOCATE (tmp_cmat)
833 2 : CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, VL, czero, UL)
834 8 : ALLOCATE (tmp_cmat(nextra, nstate))
835 2 : CALL cp_cfm_get_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
836 2 : CALL cp_cfm_set_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
837 2 : DEALLOCATE (tmp_cmat)
838 2 : CALL cp_fm_create(tmp_fm, U%matrix_struct)
839 1937 : tmp_fm%local_data = REAL(U%local_data, KIND=dp)
840 2 : CALL ortho_vectors(tmp_fm)
841 2 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=U)
842 2 : CALL cp_fm_release(tmp_fm)
843 4 : CALL cp_cfm_to_fm(U, matrix_U)
844 : END IF
845 : ! reevaluate V
846 8 : ALLOCATE (tmp_cmat(nocc, nstate))
847 2 : CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
848 2 : CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
849 2 : DEALLOCATE (tmp_cmat)
850 8 : ALLOCATE (tmp_cmat(nextra, nstate))
851 2 : CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
852 2 : CALL cp_cfm_set_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
853 2 : DEALLOCATE (tmp_cmat)
854 2 : CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
855 8 : ALLOCATE (tmp_cmat(northo, nstate))
856 2 : CALL cp_cfm_get_submatrix(VL, tmp_cmat)
857 2 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
858 6 : DEALLOCATE (tmp_cmat)
859 : END SELECT
860 : ELSE
861 0 : DO idim = 1, dim2
862 0 : CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
863 0 : CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
864 : END DO
865 0 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
866 0 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
867 : END IF
868 :
869 2 : unit_nr = -1
870 2 : IF (rmat%matrix_struct%para_env%is_source()) THEN
871 1 : unit_nr = cp_logger_get_default_unit_nr()
872 1 : WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
873 : END IF
874 :
875 2 : norm2_old = 1.0E30_dp
876 2 : ds_min = 1.0_dp
877 2 : new_direction = .TRUE.
878 2 : iter = 0
879 2 : line_searches = 0
880 2 : line_search_count = 0
881 2 : tol = 1.0E+20_dp
882 2 : mintol = 1.0E+10_dp
883 2 : miniter = 0
884 :
885 : !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
886 : ! do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
887 :
888 : ! do conjugate gradient until converged
889 34 : DO WHILE (iter < max_iter)
890 34 : iter = iter + 1
891 : !WRITE(*,*) 'iter = ', iter
892 34 : t1 = m_walltime()
893 :
894 34 : IF (iter > 1) THEN
895 : ! comput U
896 32 : CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
897 32 : CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
898 32 : IF (para_env%num_pe == 1) THEN
899 0 : CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
900 : ELSE
901 32 : CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
902 : END IF
903 32 : CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, U, tmp_cfm_2, czero, tmp_cfm)
904 32 : CALL cp_cfm_to_cfm(tmp_cfm, U)
905 32 : CALL cp_cfm_release(tmp_cfm)
906 32 : CALL cp_cfm_release(tmp_cfm_2)
907 : END IF
908 :
909 34 : IF (nextra > 0) THEN
910 136 : ALLOCATE (tmp_cmat(nextra, nstate))
911 34 : CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
912 34 : CALL cp_cfm_set_submatrix(UL, tmp_cmat)
913 34 : DEALLOCATE (tmp_cmat)
914 34 : IF (iter > 1) THEN
915 : ! orthonormalize c_tilde
916 32 : CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
917 448 : tmp_fm%local_data = REAL(c_tilde%local_data, KIND=dp)
918 32 : CALL ortho_vectors(tmp_fm)
919 32 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
920 32 : CALL cp_fm_release(tmp_fm)
921 :
922 128 : ALLOCATE (tmp_cmat(nocc, nstate))
923 32 : CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
924 32 : CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
925 32 : DEALLOCATE (tmp_cmat)
926 32 : CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
927 128 : ALLOCATE (tmp_cmat(northo, nstate))
928 32 : CALL cp_cfm_get_submatrix(VL, tmp_cmat)
929 32 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
930 64 : DEALLOCATE (tmp_cmat)
931 : END IF
932 :
933 : ! reset if new_direction
934 34 : IF (new_direction .AND. MOD(line_searches, 20) .EQ. 5) THEN
935 0 : CALL cp_cfm_set_all(skc, czero)
936 0 : CALL cp_cfm_set_all(Gct_old, czero)
937 0 : norm2_old = 1.0E30_dp
938 : END IF
939 :
940 34 : CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
941 34 : CALL cp_cfm_to_cfm(V, tmp_cfm)
942 34 : CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
943 68 : ndummy = nmo
944 : ELSE
945 0 : CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
946 0 : CALL cp_cfm_to_cfm(U, tmp_cfm)
947 0 : CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
948 0 : ndummy = nstate
949 : END IF
950 : ! update z_ij
951 136 : DO idim = 1, dim2
952 : ! 'tmp_cfm_1 = zij_0*tmp_cfm'
953 : CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
954 102 : tmp_cfm, czero, tmp_cfm_1)
955 : ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
956 : CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
957 136 : czero, c_zij(idim))
958 : END DO
959 34 : CALL cp_cfm_release(tmp_cfm)
960 34 : CALL cp_cfm_release(tmp_cfm_1)
961 : ! compute spread
962 1496 : DO istate = 1, nstate
963 1462 : spread_ii = 0.0_dp
964 5848 : DO idim = 1, dim2
965 4386 : CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
966 : spread_ii = spread_ii + weights(idim)* &
967 4386 : ABS(mzii)**2/twopi/twopi
968 5848 : matrix_zii(istate, idim) = mzii
969 : END DO
970 : !WRITE(*,*) 'spread_ii', spread_ii
971 1496 : sum_spread(istate) = spread_ii
972 : END DO
973 34 : CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
974 34 : spread_sum = accurate_sum(sum_spread)
975 :
976 34 : IF (nextra > 0) THEN
977 : ! update c_tilde
978 34 : CALL cp_cfm_set_all(zdiag, czero)
979 34 : CALL cp_cfm_set_all(grad_ctilde, czero)
980 34 : CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
981 34 : CALL cp_cfm_set_all(tmp_cfm, czero)
982 34 : CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
983 34 : CALL cp_cfm_set_all(tmp_cfm_1, czero)
984 136 : ALLOCATE (tmp_cmat(northo, nstate))
985 136 : DO idim = 1, dim2
986 102 : weight = weights(idim)
987 8874 : arr_zii = matrix_zii(:, idim)
988 : ! tmp_cfm = zij_0*V
989 : CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
990 102 : zij_0(idim), V, czero, tmp_cfm)
991 : ! tmp_cfm = tmp_cfm*diag_zij_dagg
992 4488 : CALL cp_cfm_column_scale(tmp_cfm, CONJG(arr_zii))
993 : ! tmp_cfm_1 = tmp_cfm*U_dagg
994 : CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
995 102 : U, czero, tmp_cfm_1)
996 102 : CALL cp_cfm_scale(weight, tmp_cfm_1)
997 : ! zdiag = zdiag + tmp_cfm_1'
998 102 : CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
999 :
1000 : ! tmp_cfm = zij_0_dagg*V
1001 : CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
1002 102 : zij_0(idim), V, czero, tmp_cfm)
1003 :
1004 : ! tmp_cfm = tmp_cfm*diag_zij
1005 102 : CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
1006 : ! tmp_cfm_1 = tmp_cfm*U_dagg
1007 : CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1008 102 : U, czero, tmp_cfm_1)
1009 102 : CALL cp_cfm_scale(weight, tmp_cfm_1)
1010 : ! zdiag = zdiag + tmp_cfm_1'
1011 136 : CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1012 : END DO ! idim
1013 34 : CALL cp_cfm_release(tmp_cfm)
1014 34 : CALL cp_cfm_release(tmp_cfm_1)
1015 34 : DEALLOCATE (tmp_cmat)
1016 136 : ALLOCATE (tmp_cmat(northo, nextra))
1017 : CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
1018 34 : northo, nextra, .FALSE.)
1019 : ! 'grad_ctilde'
1020 34 : CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
1021 34 : DEALLOCATE (tmp_cmat)
1022 : ! ctrans_lambda = c_tilde_dagg*grad_ctilde
1023 34 : CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1024 : !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
1025 : ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
1026 102 : CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1027 : END IF ! nextra > 0
1028 :
1029 : ! tolerance
1030 34 : IF (nextra > 0) THEN
1031 : tolc = 0.0_dp
1032 34 : CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
1033 34 : CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
1034 34 : CALL cp_fm_maxabsval(tmp_fm, tolc)
1035 34 : CALL cp_fm_release(tmp_fm)
1036 : !WRITE(*,*) 'tolc = ', tolc
1037 34 : tol = tol + tolc
1038 : END IF
1039 : !WRITE(*,*) 'tol = ', tol
1040 :
1041 36 : IF (nextra > 0) THEN
1042 : !WRITE(*,*) 'new_direction: ', new_direction
1043 34 : IF (new_direction) THEN
1044 6 : line_searches = line_searches + 1
1045 6 : IF (mintol > tol) THEN
1046 4 : mintol = tol
1047 4 : miniter = iter
1048 : END IF
1049 :
1050 6 : IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
1051 0 : sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1052 0 : avg_spread_ii = sum_spread_ii/nstate
1053 : WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1054 0 : "Iteration", "Avg. Spread_ii", "Tolerance"
1055 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1056 0 : iter, avg_spread_ii, tol
1057 0 : CALL m_flush(unit_nr)
1058 : END IF
1059 6 : IF (tol < eps_localization) EXIT
1060 :
1061 4 : IF (do_cg) THEN
1062 : cnorm2_Gct = czero
1063 : cnorm2_Gct_cross = czero
1064 4 : CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct_cross)
1065 4 : norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
1066 56 : Gct_old%local_data = grad_ctilde%local_data
1067 4 : CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct)
1068 4 : norm2_Gct = REAL(cnorm2_Gct, KIND=dp)
1069 : ! compute beta_pr
1070 4 : beta_pr = (norm2_Gct - norm2_Gct_cross)/norm2_old
1071 4 : norm2_old = norm2_Gct
1072 4 : beta = MAX(0.0_dp, beta_pr)
1073 : !WRITE(*,*) 'beta = ', beta
1074 : ! compute skc / ska = beta * skc / ska + grad_ctilde / G
1075 4 : CALL cp_cfm_scale(beta, skc)
1076 4 : CALL cp_cfm_scale_and_add(cone, skc, cone, Gct_old)
1077 4 : CALL cp_cfm_trace(skc, Gct_old, cnorm2_Gct_cross)
1078 4 : norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
1079 4 : IF (norm2_Gct_cross .LE. 0.0_dp) THEN ! back to steepest ascent
1080 0 : CALL cp_cfm_scale_and_add(czero, skc, cone, Gct_old)
1081 : END IF
1082 : ELSE
1083 0 : CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
1084 : END IF
1085 : line_search_count = 0
1086 : END IF
1087 :
1088 32 : line_search_count = line_search_count + 1
1089 : !WRITE(*,*) 'line_search_count = ', line_search_count
1090 32 : energy(line_search_count) = spread_sum
1091 :
1092 : ! gold line search
1093 32 : new_direction = .FALSE.
1094 32 : IF (line_search_count .EQ. 1) THEN
1095 4 : lsl = 1
1096 4 : lsr = 0
1097 4 : lsm = 1
1098 4 : pos(1) = 0.0_dp
1099 4 : pos(2) = ds_min/gold_sec
1100 4 : ds = pos(2)
1101 : ELSE
1102 28 : IF (line_search_count .EQ. 50) THEN
1103 0 : IF (ABS(energy(line_search_count) - energy(line_search_count - 1)) < 1.0E-4_dp) THEN
1104 0 : CPWARN("Line search failed to converge properly")
1105 0 : ds_min = 0.1_dp
1106 0 : new_direction = .TRUE.
1107 : ds = pos(line_search_count)
1108 0 : line_search_count = 0
1109 : ELSE
1110 0 : CPABORT("No. of line searches exceeds 50")
1111 : END IF
1112 : ELSE
1113 28 : IF (lsr .EQ. 0) THEN
1114 28 : IF (energy(line_search_count - 1) .GT. energy(line_search_count)) THEN
1115 0 : lsr = line_search_count
1116 0 : pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1117 : ELSE
1118 28 : lsl = lsm
1119 28 : lsm = line_search_count
1120 28 : pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1121 : END IF
1122 : ELSE
1123 0 : IF (pos(line_search_count) .LT. pos(lsm)) THEN
1124 0 : IF (energy(line_search_count) .GT. energy(lsm)) THEN
1125 : lsr = lsm
1126 : lsm = line_search_count
1127 : ELSE
1128 0 : lsl = line_search_count
1129 : END IF
1130 : ELSE
1131 0 : IF (energy(line_search_count) .GT. energy(lsm)) THEN
1132 : lsl = lsm
1133 : lsm = line_search_count
1134 : ELSE
1135 0 : lsr = line_search_count
1136 : END IF
1137 : END IF
1138 0 : IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
1139 0 : pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1140 : ELSE
1141 0 : pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1142 : END IF
1143 0 : IF ((pos(lsr) - pos(lsl)) .LT. 1.0E-3_dp*pos(lsr)) THEN
1144 0 : new_direction = .TRUE.
1145 : END IF
1146 : END IF ! lsr .eq. 0
1147 : END IF ! line_search_count .eq. 50
1148 : ! now go to the suggested point
1149 28 : ds = pos(line_search_count + 1) - pos(line_search_count)
1150 : !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
1151 28 : IF ((ABS(ds) < 1.0E-10_dp) .AND. (lsl == 1)) THEN
1152 0 : new_direction = .TRUE.
1153 0 : ds_min = 0.5_dp/alpha
1154 28 : ELSEIF (ABS(ds) > 10.0_dp) THEN
1155 4 : new_direction = .TRUE.
1156 4 : ds_min = 0.5_dp/alpha
1157 : ELSE
1158 : ds_min = pos(line_search_count + 1)
1159 : END IF
1160 : END IF ! first step
1161 : ! 'c_tilde = c_tilde + d*skc'
1162 32 : CALL cp_cfm_scale(ds, skc)
1163 32 : CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
1164 : ELSE
1165 0 : IF (mintol > tol) THEN
1166 0 : mintol = tol
1167 0 : miniter = iter
1168 : END IF
1169 0 : IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
1170 0 : sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1171 0 : avg_spread_ii = sum_spread_ii/nstate
1172 : WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1173 0 : "Iteration", "Avg. Spread_ii", "Tolerance"
1174 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1175 0 : iter, avg_spread_ii, tol
1176 0 : CALL m_flush(unit_nr)
1177 : END IF
1178 0 : IF (tol < eps_localization) EXIT
1179 : END IF ! nextra > 0
1180 :
1181 : END DO ! iteration
1182 :
1183 2 : IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
1184 0 : WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
1185 0 : WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
1186 0 : CALL m_flush(unit_nr)
1187 : END IF
1188 :
1189 2 : CALL cp_cfm_to_fm(U, matrix_U)
1190 :
1191 2 : IF (nextra > 0) THEN
1192 2324 : rmat%local_data = REAL(V%local_data, KIND=dp)
1193 2 : CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1194 :
1195 2 : CALL cp_cfm_release(c_tilde)
1196 2 : CALL cp_cfm_release(grad_ctilde)
1197 2 : CALL cp_cfm_release(Gct_old)
1198 2 : CALL cp_cfm_release(skc)
1199 2 : CALL cp_cfm_release(UL)
1200 2 : CALL cp_cfm_release(zdiag)
1201 2 : CALL cp_cfm_release(ctrans_lambda)
1202 2 : CALL cp_fm_release(id_nextra)
1203 2 : CALL cp_fm_release(vectors_all)
1204 2 : CALL cp_cfm_release(V)
1205 2 : CALL cp_fm_release(matrix_V)
1206 2 : CALL cp_fm_release(matrix_V_all)
1207 2 : CALL cp_cfm_release(VL)
1208 2 : DEALLOCATE (arr_zii)
1209 : ELSE
1210 0 : rmat%local_data = matrix_U%local_data
1211 0 : CALL rotate_orbitals(rmat, vectors)
1212 : END IF
1213 8 : DO idim = 1, dim2
1214 8 : CALL cp_cfm_release(zij_0(idim))
1215 : END DO
1216 2 : DEALLOCATE (zij_0)
1217 :
1218 8 : DO idim = 1, dim2
1219 5811 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
1220 5811 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
1221 8 : CALL cp_cfm_release(c_zij(idim))
1222 : END DO
1223 2 : DEALLOCATE (c_zij)
1224 2 : CALL cp_fm_release(rmat)
1225 2 : CALL cp_cfm_release(U)
1226 2 : CALL cp_fm_release(matrix_U)
1227 2 : DEALLOCATE (matrix_zii, sum_spread)
1228 :
1229 2 : CALL timestop(handle)
1230 :
1231 10 : END SUBROUTINE jacobi_cg_edf_ls
1232 :
1233 : ! **************************************************************************************************
1234 : !> \brief ...
1235 : !> \param vmatrix ...
1236 : ! **************************************************************************************************
1237 108 : SUBROUTINE ortho_vectors(vmatrix)
1238 :
1239 : TYPE(cp_fm_type), INTENT(IN) :: vmatrix
1240 :
1241 : CHARACTER(LEN=*), PARAMETER :: routineN = 'ortho_vectors'
1242 :
1243 : INTEGER :: handle, n, ncol
1244 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
1245 : TYPE(cp_fm_type) :: overlap_vv
1246 :
1247 36 : CALL timeset(routineN, handle)
1248 :
1249 36 : NULLIFY (fm_struct_tmp)
1250 :
1251 36 : CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1252 :
1253 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
1254 : para_env=vmatrix%matrix_struct%para_env, &
1255 36 : context=vmatrix%matrix_struct%context)
1256 36 : CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
1257 36 : CALL cp_fm_struct_release(fm_struct_tmp)
1258 :
1259 36 : CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1260 36 : CALL cp_fm_cholesky_decompose(overlap_vv)
1261 36 : CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.TRUE.)
1262 :
1263 36 : CALL cp_fm_release(overlap_vv)
1264 :
1265 36 : CALL timestop(handle)
1266 :
1267 36 : END SUBROUTINE ortho_vectors
1268 :
1269 : ! **************************************************************************************************
1270 : !> \brief ...
1271 : !> \param istate ...
1272 : !> \param jstate ...
1273 : !> \param st ...
1274 : !> \param ct ...
1275 : !> \param zij ...
1276 : ! **************************************************************************************************
1277 0 : SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1278 : INTEGER, INTENT(IN) :: istate, jstate
1279 : REAL(KIND=dp), INTENT(IN) :: st, ct
1280 : TYPE(cp_cfm_type) :: zij(:)
1281 :
1282 : INTEGER :: id
1283 :
1284 : ! Locals
1285 :
1286 0 : DO id = 1, SIZE(zij, 1)
1287 0 : CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
1288 0 : CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
1289 : END DO
1290 :
1291 0 : END SUBROUTINE rotate_zij
1292 : ! **************************************************************************************************
1293 : !> \brief ...
1294 : !> \param istate ...
1295 : !> \param jstate ...
1296 : !> \param st ...
1297 : !> \param ct ...
1298 : !> \param rmat ...
1299 : ! **************************************************************************************************
1300 0 : SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1301 : INTEGER, INTENT(IN) :: istate, jstate
1302 : REAL(KIND=dp), INTENT(IN) :: st, ct
1303 : TYPE(cp_cfm_type), INTENT(IN) :: rmat
1304 :
1305 0 : CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
1306 :
1307 0 : END SUBROUTINE rotate_rmat
1308 : ! **************************************************************************************************
1309 : !> \brief ...
1310 : !> \param mii ...
1311 : !> \param mjj ...
1312 : !> \param mij ...
1313 : !> \param weights ...
1314 : !> \param theta ...
1315 : !> \param grad_ij ...
1316 : !> \param step ...
1317 : ! **************************************************************************************************
1318 2328754 : SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1319 : COMPLEX(KIND=dp), POINTER :: mii(:), mjj(:), mij(:)
1320 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1321 : REAL(KIND=dp), INTENT(OUT) :: theta
1322 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: grad_ij, step
1323 :
1324 : COMPLEX(KIND=dp) :: z11, z12, z22
1325 : INTEGER :: dim_m, idim
1326 : REAL(KIND=dp) :: a12, b12, d2, ratio
1327 :
1328 2328754 : a12 = 0.0_dp
1329 2328754 : b12 = 0.0_dp
1330 2328754 : dim_m = SIZE(mii)
1331 9421510 : DO idim = 1, dim_m
1332 7092756 : z11 = mii(idim)
1333 7092756 : z22 = mjj(idim)
1334 7092756 : z12 = mij(idim)
1335 7092756 : a12 = a12 + weights(idim)*REAL(CONJG(z12)*(z11 - z22), KIND=dp)
1336 : b12 = b12 + weights(idim)*REAL((z12*CONJG(z12) - &
1337 9421510 : 0.25_dp*(z11 - z22)*(CONJG(z11) - CONJG(z22))), KIND=dp)
1338 : END DO
1339 2328754 : IF (ABS(b12) > 1.e-10_dp) THEN
1340 2328754 : ratio = -a12/b12
1341 2328754 : theta = 0.25_dp*ATAN(ratio)
1342 0 : ELSEIF (ABS(b12) < 1.e-10_dp) THEN
1343 0 : b12 = 0.0_dp
1344 0 : theta = 0.0_dp
1345 : ELSE
1346 0 : theta = 0.25_dp*pi
1347 : END IF
1348 2328754 : IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
1349 : ! Check second derivative info
1350 2328754 : d2 = a12*SIN(4._dp*theta) - b12*COS(4._dp*theta)
1351 2328754 : IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
1352 3226 : IF (theta > 0.0_dp) THEN ! make theta as small as possible
1353 1666 : theta = theta - 0.25_dp*pi
1354 : ELSE
1355 1560 : theta = theta + 0.25_dp*pi
1356 : END IF
1357 : END IF
1358 2328754 : END SUBROUTINE get_angle
1359 : ! **************************************************************************************************
1360 : !> \brief ...
1361 : !> \param zij ...
1362 : !> \param weights ...
1363 : !> \param tolerance ...
1364 : !> \param grad ...
1365 : ! **************************************************************************************************
1366 0 : SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1367 : TYPE(cp_cfm_type) :: zij(:)
1368 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1369 : REAL(KIND=dp), INTENT(OUT) :: tolerance
1370 : TYPE(cp_fm_type), INTENT(OUT), OPTIONAL :: grad
1371 :
1372 : CHARACTER(len=*), PARAMETER :: routineN = 'check_tolerance'
1373 :
1374 : INTEGER :: handle
1375 : TYPE(cp_fm_type) :: force
1376 :
1377 0 : CALL timeset(routineN, handle)
1378 :
1379 : ! compute gradient at t=0
1380 :
1381 0 : CALL cp_fm_create(force, zij(1)%matrix_struct)
1382 0 : CALL cp_fm_set_all(force, 0._dp)
1383 0 : CALL grad_at_0(zij, weights, force)
1384 0 : CALL cp_fm_maxabsval(force, tolerance)
1385 0 : IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
1386 0 : CALL cp_fm_release(force)
1387 :
1388 0 : CALL timestop(handle)
1389 :
1390 0 : END SUBROUTINE check_tolerance
1391 : ! **************************************************************************************************
1392 : !> \brief ...
1393 : !> \param rmat ...
1394 : !> \param vectors ...
1395 : ! **************************************************************************************************
1396 1048 : SUBROUTINE rotate_orbitals(rmat, vectors)
1397 : TYPE(cp_fm_type), INTENT(IN) :: rmat, vectors
1398 :
1399 : INTEGER :: k, n
1400 : TYPE(cp_fm_type) :: wf
1401 :
1402 524 : CALL cp_fm_create(wf, vectors%matrix_struct)
1403 524 : CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
1404 524 : CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1405 524 : CALL cp_fm_to_fm(wf, vectors)
1406 524 : CALL cp_fm_release(wf)
1407 524 : END SUBROUTINE rotate_orbitals
1408 : ! **************************************************************************************************
1409 : !> \brief ...
1410 : !> \param rmat ...
1411 : !> \param vec_all ...
1412 : !> \param vectors ...
1413 : ! **************************************************************************************************
1414 6 : SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1415 : TYPE(cp_fm_type), INTENT(IN) :: rmat, vec_all, vectors
1416 :
1417 : INTEGER :: k, l, n
1418 : TYPE(cp_fm_type) :: wf
1419 :
1420 2 : CALL cp_fm_create(wf, vectors%matrix_struct)
1421 2 : CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
1422 2 : CALL cp_fm_get_info(rmat, ncol_global=l)
1423 :
1424 2 : CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1425 2 : CALL cp_fm_to_fm(wf, vectors)
1426 2 : CALL cp_fm_release(wf)
1427 2 : END SUBROUTINE rotate_orbitals_edf
1428 : ! **************************************************************************************************
1429 : !> \brief ...
1430 : !> \param diag ...
1431 : !> \param weights ...
1432 : !> \param matrix ...
1433 : !> \param ndim ...
1434 : ! **************************************************************************************************
1435 208 : SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1436 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1437 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1438 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1439 : INTEGER, INTENT(IN) :: ndim
1440 :
1441 : COMPLEX(KIND=dp) :: zii, zjj
1442 : INTEGER :: idim, istate, jstate, ncol_local, &
1443 : nrow_global, nrow_local
1444 208 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1445 : REAL(KIND=dp) :: gradsq_ij
1446 :
1447 : CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1448 : ncol_local=ncol_local, nrow_global=nrow_global, &
1449 208 : row_indices=row_indices, col_indices=col_indices)
1450 :
1451 624 : DO istate = 1, nrow_local
1452 2288 : DO jstate = 1, ncol_local
1453 : ! get real and imaginary parts
1454 1664 : gradsq_ij = 0.0_dp
1455 11648 : DO idim = 1, ndim
1456 9984 : zii = diag(row_indices(istate), idim)
1457 9984 : zjj = diag(col_indices(jstate), idim)
1458 : gradsq_ij = gradsq_ij + weights(idim)* &
1459 11648 : 4.0_dp*REAL((CONJG(zii)*zii + CONJG(zjj)*zjj), KIND=dp)
1460 : END DO
1461 2080 : matrix%local_data(istate, jstate) = gradsq_ij
1462 : END DO
1463 : END DO
1464 208 : END SUBROUTINE gradsq_at_0
1465 : ! **************************************************************************************************
1466 : !> \brief ...
1467 : !> \param matrix_p ...
1468 : !> \param weights ...
1469 : !> \param matrix ...
1470 : ! **************************************************************************************************
1471 0 : SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1472 : TYPE(cp_cfm_type) :: matrix_p(:)
1473 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1474 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1475 :
1476 : COMPLEX(KIND=dp) :: zii, zij, zjj
1477 0 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1478 : INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1479 : nrow_global, nrow_local
1480 0 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1481 : REAL(KIND=dp) :: grad_ij
1482 :
1483 0 : NULLIFY (diag)
1484 : CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1485 : ncol_local=ncol_local, nrow_global=nrow_global, &
1486 0 : row_indices=row_indices, col_indices=col_indices)
1487 0 : dim_m = SIZE(matrix_p, 1)
1488 0 : ALLOCATE (diag(nrow_global, dim_m))
1489 :
1490 0 : DO idim = 1, dim_m
1491 0 : DO istate = 1, nrow_global
1492 0 : CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
1493 : END DO
1494 : END DO
1495 :
1496 0 : DO istate = 1, nrow_local
1497 0 : DO jstate = 1, ncol_local
1498 : ! get real and imaginary parts
1499 : grad_ij = 0.0_dp
1500 0 : DO idim = 1, dim_m
1501 0 : zii = diag(row_indices(istate), idim)
1502 0 : zjj = diag(col_indices(jstate), idim)
1503 0 : zij = matrix_p(idim)%local_data(istate, jstate)
1504 : grad_ij = grad_ij + weights(idim)* &
1505 0 : REAL(4.0_dp*CONJG(zij)*(zjj - zii), dp)
1506 : END DO
1507 0 : matrix%local_data(istate, jstate) = grad_ij
1508 : END DO
1509 : END DO
1510 0 : DEALLOCATE (diag)
1511 0 : END SUBROUTINE grad_at_0
1512 :
1513 : ! return energy and maximum gradient in the current point
1514 : ! **************************************************************************************************
1515 : !> \brief ...
1516 : !> \param weights ...
1517 : !> \param zij ...
1518 : !> \param tolerance ...
1519 : !> \param value ...
1520 : ! **************************************************************************************************
1521 4296 : SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1522 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1523 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :)
1524 : REAL(KIND=dp) :: tolerance, value
1525 :
1526 : COMPLEX(KIND=dp) :: kii, kij, kjj
1527 4296 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1528 : INTEGER :: idim, istate, jstate, ncol_local, &
1529 : nrow_global, nrow_local
1530 4296 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1531 : REAL(KIND=dp) :: grad_ij, ra, rb
1532 :
1533 4296 : NULLIFY (diag)
1534 : CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
1535 : ncol_local=ncol_local, nrow_global=nrow_global, &
1536 4296 : row_indices=row_indices, col_indices=col_indices)
1537 17184 : ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
1538 4296 : value = 0.0_dp
1539 17298 : DO idim = 1, SIZE(zij, 2)
1540 193614 : DO istate = 1, nrow_global
1541 176316 : CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
1542 176316 : CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
1543 176316 : diag(istate, idim) = CMPLX(ra, rb, dp)
1544 189318 : value = value + weights(idim) - weights(idim)*ABS(diag(istate, idim))**2
1545 : END DO
1546 : END DO
1547 4296 : tolerance = 0.0_dp
1548 33587 : DO istate = 1, nrow_local
1549 501896 : DO jstate = 1, ncol_local
1550 1874661 : grad_ij = 0.0_dp
1551 1874661 : DO idim = 1, SIZE(zij, 2)
1552 1406352 : kii = diag(row_indices(istate), idim)
1553 1406352 : kjj = diag(col_indices(jstate), idim)
1554 1406352 : ra = zij(1, idim)%local_data(istate, jstate)
1555 1406352 : rb = zij(2, idim)%local_data(istate, jstate)
1556 1406352 : kij = CMPLX(ra, rb, dp)
1557 : grad_ij = grad_ij + weights(idim)* &
1558 1874661 : REAL(4.0_dp*CONJG(kij)*(kjj - kii), dp)
1559 : END DO
1560 497600 : tolerance = MAX(ABS(grad_ij), tolerance)
1561 : END DO
1562 : END DO
1563 4296 : CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1564 :
1565 4296 : DEALLOCATE (diag)
1566 :
1567 4296 : END SUBROUTINE check_tolerance_new
1568 :
1569 : ! **************************************************************************************************
1570 : !> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
1571 : !> and rotates them all at the same time (hoping for the best of course)
1572 : !> \param weights ...
1573 : !> \param zij ...
1574 : !> \param vectors ...
1575 : !> \param max_iter ...
1576 : !> \param max_crazy_angle ...
1577 : !> \param crazy_scale ...
1578 : !> \param crazy_use_diag ...
1579 : !> \param eps_localization ...
1580 : !> \param iterations ...
1581 : !> \param converged ...
1582 : ! **************************************************************************************************
1583 100 : SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1584 : eps_localization, iterations, converged)
1585 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1586 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1587 : INTEGER, INTENT(IN) :: max_iter
1588 : REAL(KIND=dp), INTENT(IN) :: max_crazy_angle
1589 : REAL(KIND=dp) :: crazy_scale
1590 : LOGICAL :: crazy_use_diag
1591 : REAL(KIND=dp), INTENT(IN) :: eps_localization
1592 : INTEGER :: iterations
1593 : LOGICAL, INTENT(out), OPTIONAL :: converged
1594 :
1595 : CHARACTER(len=*), PARAMETER :: routineN = 'crazy_rotations'
1596 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1597 : czero = (0.0_dp, 0.0_dp)
1598 :
1599 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1600 100 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1601 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
1602 : INTEGER :: dim2, handle, i, icol, idim, irow, &
1603 : method, ncol_global, ncol_local, &
1604 : norder, nrow_global, nrow_local, &
1605 : nsquare, unit_nr
1606 100 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1607 : LOGICAL :: do_emd
1608 : REAL(KIND=dp) :: eps_exp, limit_crazy_angle, maxeval, &
1609 : norm, ra, rb, theta, tolerance, value
1610 : REAL(KIND=dp), DIMENSION(:), POINTER :: evals
1611 : TYPE(cp_cfm_type) :: cmat_A, cmat_R, cmat_t1
1612 : TYPE(cp_fm_type) :: mat_R, mat_t, mat_theta, mat_U
1613 :
1614 100 : CALL timeset(routineN, handle)
1615 100 : NULLIFY (row_indices, col_indices)
1616 : CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
1617 : ncol_global=ncol_global, &
1618 : row_indices=row_indices, col_indices=col_indices, &
1619 100 : nrow_local=nrow_local, ncol_local=ncol_local)
1620 :
1621 100 : limit_crazy_angle = max_crazy_angle
1622 :
1623 100 : NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1624 100 : dim2 = SIZE(zij, 2)
1625 400 : ALLOCATE (diag_z(nrow_global, dim2))
1626 300 : ALLOCATE (evals(nrow_global))
1627 300 : ALLOCATE (evals_exp(nrow_global))
1628 :
1629 100 : CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
1630 100 : CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
1631 100 : CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1632 :
1633 100 : CALL cp_fm_create(mat_U, zij(1, 1)%matrix_struct)
1634 100 : CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
1635 100 : CALL cp_fm_create(mat_R, zij(1, 1)%matrix_struct)
1636 :
1637 100 : CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
1638 :
1639 100 : CALL cp_fm_set_all(mat_R, 0.0_dp, 1.0_dp)
1640 100 : CALL cp_fm_set_all(mat_t, 0.0_dp)
1641 500 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1642 406 : DO idim = 1, dim2
1643 306 : CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
1644 406 : CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
1645 : END DO
1646 100 : CALL cp_fm_syevd(mat_t, mat_U, evals)
1647 406 : DO idim = 1, dim2
1648 : ! rotate z's
1649 306 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
1650 306 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
1651 306 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
1652 406 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
1653 : END DO
1654 : ! collect rotation matrix
1655 100 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
1656 100 : CALL cp_fm_to_fm(mat_t, mat_R)
1657 :
1658 100 : unit_nr = -1
1659 100 : IF (cmat_A%matrix_struct%para_env%is_source()) THEN
1660 50 : unit_nr = cp_logger_get_default_unit_nr()
1661 : WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
1662 50 : "CRAZY| ", "Iter", "value ", "gradient", "Max. eval", "limit"
1663 : END IF
1664 :
1665 100 : iterations = 0
1666 100 : tolerance = 1.0_dp
1667 :
1668 : DO
1669 4296 : iterations = iterations + 1
1670 17298 : DO idim = 1, dim2
1671 193614 : DO i = 1, nrow_global
1672 176316 : CALL cp_fm_get_element(zij(1, idim), i, i, ra)
1673 176316 : CALL cp_fm_get_element(zij(2, idim), i, i, rb)
1674 189318 : diag_z(i, idim) = CMPLX(ra, rb, dp)
1675 : END DO
1676 : END DO
1677 33587 : DO irow = 1, nrow_local
1678 501896 : DO icol = 1, ncol_local
1679 1874661 : DO idim = 1, dim2
1680 1406352 : ra = zij(1, idim)%local_data(irow, icol)
1681 1406352 : rb = zij(2, idim)%local_data(irow, icol)
1682 1406352 : mij(idim) = CMPLX(ra, rb, dp)
1683 1406352 : mii(idim) = diag_z(row_indices(irow), idim)
1684 1874661 : mjj(idim) = diag_z(col_indices(icol), idim)
1685 : END DO
1686 497600 : IF (row_indices(irow) .NE. col_indices(icol)) THEN
1687 439018 : CALL get_angle(mii, mjj, mij, weights, theta)
1688 439018 : theta = crazy_scale*theta
1689 439018 : IF (theta .GT. limit_crazy_angle) theta = limit_crazy_angle
1690 439018 : IF (theta .LT. -limit_crazy_angle) theta = -limit_crazy_angle
1691 439018 : IF (crazy_use_diag) THEN
1692 0 : cmat_A%local_data(irow, icol) = -CMPLX(0.0_dp, theta, dp)
1693 : ELSE
1694 439018 : mat_theta%local_data(irow, icol) = -theta
1695 : END IF
1696 : ELSE
1697 29291 : IF (crazy_use_diag) THEN
1698 0 : cmat_A%local_data(irow, icol) = czero
1699 : ELSE
1700 29291 : mat_theta%local_data(irow, icol) = 0.0_dp
1701 : END IF
1702 : END IF
1703 : END DO
1704 : END DO
1705 :
1706 : ! construct rotation matrix U based on A using diagonalization
1707 : ! alternatively, exp based on repeated squaring could be faster
1708 4296 : IF (crazy_use_diag) THEN
1709 0 : CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
1710 0 : maxeval = MAXVAL(ABS(evals))
1711 0 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1712 0 : CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
1713 0 : CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1714 : CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
1715 0 : cmat_t1, cmat_R, czero, cmat_A)
1716 0 : mat_U%local_data = REAL(cmat_A%local_data, KIND=dp) ! U is a real matrix
1717 : ELSE
1718 4296 : do_emd = .FALSE.
1719 4296 : method = 2
1720 4296 : eps_exp = 1.0_dp*EPSILON(eps_exp)
1721 4296 : CALL cp_fm_maxabsrownorm(mat_theta, norm)
1722 4296 : maxeval = norm ! an upper bound
1723 4296 : CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
1724 4296 : CALL exp_pade_real(mat_U, mat_theta, nsquare, norder)
1725 : END IF
1726 :
1727 17298 : DO idim = 1, dim2
1728 : ! rotate z's
1729 13002 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
1730 13002 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
1731 13002 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
1732 17298 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
1733 : END DO
1734 : ! collect rotation matrix
1735 4296 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
1736 4296 : CALL cp_fm_to_fm(mat_t, mat_R)
1737 :
1738 4296 : CALL check_tolerance_new(weights, zij, tolerance, value)
1739 :
1740 4296 : IF (unit_nr > 0) THEN
1741 : WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1742 2148 : "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
1743 2148 : CALL m_flush(unit_nr)
1744 : END IF
1745 4296 : IF (tolerance .LT. eps_localization .OR. iterations .GE. max_iter) EXIT
1746 : END DO
1747 :
1748 100 : IF (PRESENT(converged)) converged = (tolerance .LT. eps_localization)
1749 :
1750 100 : CALL cp_cfm_release(cmat_A)
1751 100 : CALL cp_cfm_release(cmat_R)
1752 100 : CALL cp_cfm_release(cmat_T1)
1753 :
1754 100 : CALL cp_fm_release(mat_U)
1755 100 : CALL cp_fm_release(mat_T)
1756 100 : CALL cp_fm_release(mat_theta)
1757 :
1758 100 : CALL rotate_orbitals(mat_R, vectors)
1759 :
1760 100 : CALL cp_fm_release(mat_R)
1761 100 : DEALLOCATE (evals_exp, evals, diag_z)
1762 100 : DEALLOCATE (mii, mij, mjj)
1763 :
1764 100 : CALL timestop(handle)
1765 :
1766 200 : END SUBROUTINE crazy_rotations
1767 :
1768 : ! **************************************************************************************************
1769 : !> \brief use the exponential parametrization as described in to perform a direct mini
1770 : !> Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
1771 : !> none of the input is modified for the time being, just finds the rotations
1772 : !> that minimizes, and throws it away afterwards :-)
1773 : !> apart from being expensive and not cleaned, this works fine
1774 : !> useful to try different spread functionals
1775 : !> \param weights ...
1776 : !> \param zij ...
1777 : !> \param vectors ...
1778 : !> \param max_iter ...
1779 : !> \param eps_localization ...
1780 : !> \param iterations ...
1781 : ! **************************************************************************************************
1782 2 : SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1783 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1784 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1785 : INTEGER, INTENT(IN) :: max_iter
1786 : REAL(KIND=dp), INTENT(IN) :: eps_localization
1787 : INTEGER :: iterations
1788 :
1789 : CHARACTER(len=*), PARAMETER :: routineN = 'direct_mini'
1790 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1791 : czero = (0.0_dp, 0.0_dp)
1792 : REAL(KIND=dp), PARAMETER :: gold_sec = 0.3819_dp
1793 :
1794 : COMPLEX(KIND=dp) :: lk, ll, tmp
1795 2 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1796 2 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1797 : INTEGER :: handle, i, icol, idim, irow, &
1798 : line_search_count, line_searches, lsl, &
1799 : lsm, lsr, n, ncol_local, ndim, &
1800 : nrow_local, output_unit
1801 2 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1802 : LOGICAL :: new_direction
1803 : REAL(KIND=dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1804 : fb, fc, nom, normg, normg_cross, &
1805 : normg_old, npos, omega, tol, val, x0, &
1806 : x1, xa, xb, xc
1807 : REAL(KIND=dp), DIMENSION(150) :: energy, grad, pos
1808 2 : REAL(KIND=dp), DIMENSION(:), POINTER :: evals, fval, fvald
1809 : TYPE(cp_cfm_type) :: cmat_A, cmat_B, cmat_M, cmat_R, cmat_t1, &
1810 : cmat_t2, cmat_U
1811 2 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
1812 : TYPE(cp_fm_type) :: matrix_A, matrix_G, matrix_G_old, &
1813 : matrix_G_search, matrix_H, matrix_R, &
1814 : matrix_T
1815 :
1816 2 : NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1817 :
1818 2 : CALL timeset(routineN, handle)
1819 2 : output_unit = cp_logger_get_default_io_unit()
1820 :
1821 2 : n = zij(1, 1)%matrix_struct%nrow_global
1822 2 : ndim = (SIZE(zij, 2))
1823 :
1824 2 : IF (output_unit > 0) THEN
1825 1 : WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
1826 1 : WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
1827 : END IF
1828 :
1829 20 : ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1830 18 : ALLOCATE (c_zij(ndim))
1831 :
1832 : ! create the three complex matrices Z
1833 14 : DO idim = 1, ndim
1834 12 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
1835 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
1836 158 : zij(2, idim)%local_data, dp)
1837 : END DO
1838 :
1839 2 : CALL cp_fm_create(matrix_A, zij(1, 1)%matrix_struct)
1840 2 : CALL cp_fm_create(matrix_G, zij(1, 1)%matrix_struct)
1841 2 : CALL cp_fm_create(matrix_T, zij(1, 1)%matrix_struct)
1842 2 : CALL cp_fm_create(matrix_H, zij(1, 1)%matrix_struct)
1843 2 : CALL cp_fm_create(matrix_G_search, zij(1, 1)%matrix_struct)
1844 2 : CALL cp_fm_create(matrix_G_old, zij(1, 1)%matrix_struct)
1845 2 : CALL cp_fm_create(matrix_R, zij(1, 1)%matrix_struct)
1846 2 : CALL cp_fm_set_all(matrix_R, 0.0_dp, 1.0_dp)
1847 :
1848 2 : CALL cp_fm_set_all(matrix_A, 0.0_dp)
1849 : ! CALL cp_fm_init_random ( matrix_A )
1850 :
1851 2 : CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
1852 2 : CALL cp_cfm_create(cmat_U, zij(1, 1)%matrix_struct)
1853 2 : CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
1854 2 : CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1855 2 : CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
1856 2 : CALL cp_cfm_create(cmat_B, zij(1, 1)%matrix_struct)
1857 2 : CALL cp_cfm_create(cmat_M, zij(1, 1)%matrix_struct)
1858 :
1859 : CALL cp_cfm_get_info(cmat_B, nrow_local=nrow_local, ncol_local=ncol_local, &
1860 2 : row_indices=row_indices, col_indices=col_indices)
1861 :
1862 2 : CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
1863 2 : CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
1864 2 : normg_old = 1.0E30_dp
1865 2 : ds_min = 1.0_dp
1866 2 : new_direction = .TRUE.
1867 2 : Iterations = 0
1868 2 : line_searches = 0
1869 2 : line_search_count = 0
1870 208 : DO
1871 208 : iterations = iterations + 1
1872 : ! compute U,R,evals given A
1873 2704 : cmat_A%local_data = CMPLX(0.0_dp, matrix_A%local_data, dp) ! cmat_A is hermitian, evals are reals
1874 208 : CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
1875 1040 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1876 208 : CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
1877 208 : CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1878 208 : CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_U)
1879 2704 : cmat_U%local_data = REAL(cmat_U%local_data, KIND=dp) ! enforce numerics, U is a real matrix
1880 :
1881 208 : IF (new_direction .AND. MOD(line_searches, 20) .EQ. 5) THEN ! reset with A .eq. 0
1882 0 : DO idim = 1, ndim
1883 0 : CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1)
1884 0 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, c_zij(idim))
1885 : END DO
1886 : ! collect rotation matrix
1887 0 : matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
1888 0 : CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
1889 0 : CALL cp_fm_to_fm(matrix_T, matrix_R)
1890 :
1891 0 : CALL cp_cfm_set_all(cmat_U, czero, cone)
1892 0 : CALL cp_cfm_set_all(cmat_R, czero, cone)
1893 0 : CALL cp_cfm_set_all(cmat_A, czero)
1894 0 : CALL cp_fm_set_all(matrix_A, 0.0_dp)
1895 0 : evals(:) = 0.0_dp
1896 0 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1897 0 : CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
1898 0 : CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
1899 0 : normg_old = 1.0E30_dp
1900 : END IF
1901 :
1902 : ! compute Omega and M
1903 208 : CALL cp_cfm_set_all(cmat_M, czero)
1904 208 : omega = 0.0_dp
1905 1456 : DO idim = 1, ndim
1906 1248 : CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1) ! t1=ZU
1907 1248 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
1908 6240 : DO i = 1, n
1909 4992 : CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
1910 : SELECT CASE (2) ! allows for selection of different spread functionals
1911 : CASE (1)
1912 : fval(i) = -weights(idim)*LOG(ABS(diag_z(i, idim))**2)
1913 : fvald(i) = -weights(idim)/(ABS(diag_z(i, idim))**2)
1914 : CASE (2) ! corresponds to the jacobi setup
1915 4992 : fval(i) = weights(idim) - weights(idim)*ABS(diag_z(i, idim))**2
1916 4992 : fvald(i) = -weights(idim)
1917 : END SELECT
1918 6240 : omega = omega + fval(i)
1919 : END DO
1920 6448 : DO icol = 1, ncol_local
1921 16224 : DO irow = 1, nrow_local
1922 9984 : tmp = cmat_t1%local_data(irow, icol)*CONJG(diag_z(col_indices(icol), idim))
1923 : cmat_M%local_data(irow, icol) = cmat_M%local_data(irow, icol) &
1924 14976 : + 4.0_dp*fvald(col_indices(icol))*REAL(tmp, KIND=dp)
1925 : END DO
1926 : END DO
1927 : END DO
1928 :
1929 : ! compute Hessian diagonal approximation for the preconditioner
1930 : IF (.TRUE.) THEN
1931 208 : CALL gradsq_at_0(diag_z, weights, matrix_H, ndim)
1932 : ELSE
1933 : CALL cp_fm_set_all(matrix_H, 1.0_dp)
1934 : END IF
1935 :
1936 : ! compute B
1937 1040 : DO icol = 1, ncol_local
1938 2704 : DO irow = 1, nrow_local
1939 1664 : ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1940 1664 : lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1941 2496 : IF (ABS(ll - lk) .LT. 0.5_dp) THEN ! use a series expansion to avoid loss of precision
1942 634 : tmp = 1.0_dp
1943 634 : cmat_B%local_data(irow, icol) = 0.0_dp
1944 10778 : DO i = 1, 16
1945 10144 : cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol) + tmp
1946 10778 : tmp = tmp*(ll - lk)/(i + 1)
1947 : END DO
1948 634 : cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol)*EXP(lk)
1949 : ELSE
1950 1030 : cmat_B%local_data(irow, icol) = (EXP(lk) - EXP(ll))/(lk - ll)
1951 : END IF
1952 : END DO
1953 : END DO
1954 : ! compute gradient matrix_G
1955 :
1956 208 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_M, cmat_R, czero, cmat_t1) ! t1=(M^T)(R^T)
1957 208 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_R, cmat_t1, czero, cmat_t2) ! t2=(R)t1
1958 208 : CALL cp_cfm_schur_product(cmat_t2, cmat_B, cmat_t1)
1959 208 : CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_t2)
1960 208 : CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_R, cmat_t2, czero, cmat_t1)
1961 2704 : matrix_G%local_data = REAL(cmat_t1%local_data, KIND=dp)
1962 208 : CALL cp_fm_transpose(matrix_G, matrix_T)
1963 208 : CALL cp_fm_scale_and_add(-1.0_dp, matrix_G, 1.0_dp, matrix_T)
1964 208 : CALL cp_fm_maxabsval(matrix_G, tol)
1965 :
1966 : ! from here on, minimizing technology
1967 208 : IF (new_direction) THEN
1968 : ! energy converged up to machine precision ?
1969 10 : line_searches = line_searches + 1
1970 : ! DO i=1,line_search_count
1971 : ! write(15,*) pos(i),energy(i)
1972 : ! ENDDO
1973 : ! write(15,*) ""
1974 : ! CALL m_flush(15)
1975 : !write(16,*) evals(:)
1976 : !write(17,*) matrix_A%local_data(:,:)
1977 : !write(18,*) matrix_G%local_data(:,:)
1978 10 : IF (output_unit > 0) THEN
1979 5 : WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, Iterations, Omega, tol, ds_min
1980 5 : CALL m_flush(output_unit)
1981 : END IF
1982 10 : IF (tol < eps_localization .OR. iterations > max_iter) EXIT
1983 :
1984 : IF (.TRUE.) THEN ! do conjugate gradient CG
1985 8 : CALL cp_fm_trace(matrix_G, matrix_G_old, normg_cross)
1986 8 : normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
1987 : ! apply the preconditioner
1988 40 : DO icol = 1, ncol_local
1989 104 : DO irow = 1, nrow_local
1990 96 : matrix_G_old%local_data(irow, icol) = matrix_G%local_data(irow, icol)/matrix_H%local_data(irow, icol)
1991 : END DO
1992 : END DO
1993 8 : CALL cp_fm_trace(matrix_G, matrix_G_old, normg)
1994 8 : normg = normg*0.5_dp
1995 8 : beta_pr = (normg - normg_cross)/normg_old
1996 8 : normg_old = normg
1997 8 : beta_pr = MAX(beta_pr, 0.0_dp)
1998 8 : CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
1999 8 : CALL cp_fm_trace(matrix_G_search, matrix_G_old, normg_cross)
2000 8 : IF (normg_cross .GE. 0) THEN ! back to SD
2001 0 : IF (matrix_A%matrix_struct%para_env%is_source()) THEN
2002 0 : WRITE (cp_logger_get_default_unit_nr(), *) "!"
2003 : END IF
2004 0 : beta_pr = 0.0_dp
2005 0 : CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
2006 : END IF
2007 : ELSE ! SD
2008 : CALL cp_fm_scale_and_add(0.0_dp, matrix_G_search, -1.0_dp, matrix_G)
2009 : END IF
2010 : ! ds_min=1.0E-4_dp
2011 : line_search_count = 0
2012 : END IF
2013 206 : line_search_count = line_search_count + 1
2014 206 : energy(line_search_count) = Omega
2015 :
2016 : ! line search section
2017 : SELECT CASE (3)
2018 : CASE (1) ! two point line search
2019 : SELECT CASE (line_search_count)
2020 : CASE (1)
2021 : pos(1) = 0.0_dp
2022 : pos(2) = ds_min
2023 : CALL cp_fm_trace(matrix_G, matrix_G_search, grad(1))
2024 : grad(1) = grad(1)/2.0_dp
2025 : new_direction = .FALSE.
2026 : CASE (2)
2027 : new_direction = .TRUE.
2028 : x0 = pos(1) ! 0.0_dp
2029 : c = energy(1)
2030 : b = grad(1)
2031 : x1 = pos(2)
2032 : a = (energy(2) - b*x1 - c)/(x1**2)
2033 : IF (a .LE. 0.0_dp) a = 1.0E-15_dp
2034 : npos = -b/(2.0_dp*a)
2035 : val = a*npos**2 + b*npos + c
2036 : IF (val .LT. energy(1) .AND. val .LE. energy(2)) THEN
2037 : ! we go to a minimum, but ...
2038 : ! we take a guard against too large steps
2039 : pos(3) = MIN(npos, MAXVAL(pos(1:2))*4.0_dp)
2040 : ELSE ! just take an extended step
2041 : pos(3) = MAXVAL(pos(1:2))*2.0_dp
2042 : END IF
2043 : END SELECT
2044 : CASE (2) ! 3 point line search
2045 : SELECT CASE (line_search_count)
2046 : CASE (1)
2047 : new_direction = .FALSE.
2048 : pos(1) = 0.0_dp
2049 : pos(2) = ds_min*0.8_dp
2050 : CASE (2)
2051 : new_direction = .FALSE.
2052 : IF (energy(2) .GT. energy(1)) THEN
2053 : pos(3) = ds_min*0.7_dp
2054 : ELSE
2055 : pos(3) = ds_min*1.4_dp
2056 : END IF
2057 : CASE (3)
2058 : new_direction = .TRUE.
2059 : xa = pos(1)
2060 : xb = pos(2)
2061 : xc = pos(3)
2062 : fa = energy(1)
2063 : fb = energy(2)
2064 : fc = energy(3)
2065 : nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
2066 : denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
2067 : IF (ABS(denom) .LE. 1.0E-18_dp*MAX(ABS(fb - fc), ABS(fb - fa))) THEN
2068 : npos = xb
2069 : ELSE
2070 : npos = xb - 0.5_dp*nom/denom ! position of the stationary point
2071 : END IF
2072 : val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
2073 : (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
2074 : (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
2075 : IF (val .LT. fa .AND. val .LE. fb .AND. val .LE. fc) THEN ! OK, we go to a minimum
2076 : ! we take a guard against too large steps
2077 : pos(4) = MAX(MAXVAL(pos(1:3))*0.01_dp, &
2078 : MIN(npos, MAXVAL(pos(1:3))*4.0_dp))
2079 : ELSE ! just take an extended step
2080 : pos(4) = MAXVAL(pos(1:3))*2.0_dp
2081 : END IF
2082 : END SELECT
2083 : CASE (3) ! golden section hunt
2084 206 : new_direction = .FALSE.
2085 206 : IF (line_search_count .EQ. 1) THEN
2086 8 : lsl = 1
2087 8 : lsr = 0
2088 8 : lsm = 1
2089 8 : pos(1) = 0.0_dp
2090 8 : pos(2) = ds_min/gold_sec
2091 : ELSE
2092 198 : IF (line_search_count .EQ. 150) CPABORT("Too many")
2093 198 : IF (lsr .EQ. 0) THEN
2094 14 : IF (energy(line_search_count - 1) .LT. energy(line_search_count)) THEN
2095 8 : lsr = line_search_count
2096 8 : pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2097 : ELSE
2098 6 : lsl = lsm
2099 6 : lsm = line_search_count
2100 6 : pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2101 : END IF
2102 : ELSE
2103 184 : IF (pos(line_search_count) .LT. pos(lsm)) THEN
2104 94 : IF (energy(line_search_count) .LT. energy(lsm)) THEN
2105 : lsr = lsm
2106 : lsm = line_search_count
2107 : ELSE
2108 80 : lsl = line_search_count
2109 : END IF
2110 : ELSE
2111 90 : IF (energy(line_search_count) .LT. energy(lsm)) THEN
2112 : lsl = lsm
2113 : lsm = line_search_count
2114 : ELSE
2115 62 : lsr = line_search_count
2116 : END IF
2117 : END IF
2118 184 : IF (pos(lsr) - pos(lsm) .GT. pos(lsm) - pos(lsl)) THEN
2119 86 : pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2120 : ELSE
2121 98 : pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2122 : END IF
2123 184 : IF ((pos(lsr) - pos(lsl)) .LT. 1.0E-3_dp*pos(lsr)) THEN
2124 8 : new_direction = .TRUE.
2125 : END IF
2126 : END IF ! lsr .eq. 0
2127 : END IF ! first step
2128 : END SELECT
2129 : ! now go to the suggested point
2130 206 : ds_min = pos(line_search_count + 1)
2131 206 : ds = pos(line_search_count + 1) - pos(line_search_count)
2132 208 : CALL cp_fm_scale_and_add(1.0_dp, matrix_A, ds, matrix_G_search)
2133 : END DO
2134 :
2135 : ! collect rotation matrix
2136 26 : matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
2137 2 : CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
2138 2 : CALL cp_fm_to_fm(matrix_T, matrix_R)
2139 2 : CALL rotate_orbitals(matrix_R, vectors)
2140 2 : CALL cp_fm_release(matrix_R)
2141 :
2142 2 : CALL cp_fm_release(matrix_A)
2143 2 : CALL cp_fm_release(matrix_G)
2144 2 : CALL cp_fm_release(matrix_H)
2145 2 : CALL cp_fm_release(matrix_T)
2146 2 : CALL cp_fm_release(matrix_G_search)
2147 2 : CALL cp_fm_release(matrix_G_old)
2148 2 : CALL cp_cfm_release(cmat_A)
2149 2 : CALL cp_cfm_release(cmat_U)
2150 2 : CALL cp_cfm_release(cmat_R)
2151 2 : CALL cp_cfm_release(cmat_t1)
2152 2 : CALL cp_cfm_release(cmat_t2)
2153 2 : CALL cp_cfm_release(cmat_B)
2154 2 : CALL cp_cfm_release(cmat_M)
2155 :
2156 2 : DEALLOCATE (evals, evals_exp, fval, fvald)
2157 :
2158 14 : DO idim = 1, SIZE(c_zij)
2159 156 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
2160 156 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
2161 14 : CALL cp_cfm_release(c_zij(idim))
2162 : END DO
2163 2 : DEALLOCATE (c_zij)
2164 2 : DEALLOCATE (diag_z)
2165 :
2166 2 : CALL timestop(handle)
2167 :
2168 6 : END SUBROUTINE
2169 :
2170 : ! **************************************************************************************************
2171 : !> \brief Parallel algorithm for jacobi rotations
2172 : !> \param weights ...
2173 : !> \param zij ...
2174 : !> \param vectors ...
2175 : !> \param para_env ...
2176 : !> \param max_iter ...
2177 : !> \param eps_localization ...
2178 : !> \param sweeps ...
2179 : !> \param out_each ...
2180 : !> \param target_time ...
2181 : !> \param start_time ...
2182 : !> \param restricted ...
2183 : !> \par History
2184 : !> use allgather for improved performance
2185 : !> \author MI (11.2009)
2186 : ! **************************************************************************************************
2187 388 : SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2188 : sweeps, out_each, target_time, start_time, restricted)
2189 :
2190 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2191 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
2192 : TYPE(mp_para_env_type), POINTER :: para_env
2193 : INTEGER, INTENT(IN) :: max_iter
2194 : REAL(KIND=dp), INTENT(IN) :: eps_localization
2195 : INTEGER :: sweeps
2196 : INTEGER, INTENT(IN) :: out_each
2197 : REAL(dp) :: target_time, start_time
2198 : INTEGER :: restricted
2199 :
2200 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rot_para'
2201 :
2202 : INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2203 : nblock, nblock_max, ns_me, nstate, &
2204 : output_unit
2205 388 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2206 388 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2207 : REAL(KIND=dp) :: xstate
2208 : TYPE(cp_fm_type) :: rmat
2209 388 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2210 :
2211 388 : CALL timeset(routineN, handle)
2212 :
2213 388 : output_unit = cp_logger_get_default_io_unit()
2214 :
2215 388 : NULLIFY (cz_ij_loc)
2216 :
2217 388 : dim2 = SIZE(zij, 2)
2218 :
2219 388 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
2220 388 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
2221 :
2222 388 : CALL cp_fm_get_info(rmat, nrow_global=nstate)
2223 :
2224 388 : IF (restricted > 0) THEN
2225 0 : IF (output_unit > 0) THEN
2226 0 : WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
2227 : END IF
2228 0 : nstate = nstate - restricted
2229 : END IF
2230 :
2231 : ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2232 388 : xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
2233 1552 : ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2234 1164 : DO ip = 1, para_env%num_pe
2235 776 : ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
2236 1164 : ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
2237 : END DO
2238 388 : nblock_max = 0
2239 1164 : DO ip = 0, para_env%num_pe - 1
2240 776 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2241 1164 : nblock_max = MAX(nblock_max, nblock)
2242 : END DO
2243 :
2244 : ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2245 1552 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2246 1164 : ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2247 1164 : ALLOCATE (cz_ij_loc(dim2))
2248 1576 : DO idim = 1, dim2
2249 3952 : DO ip = 0, para_env%num_pe - 1
2250 2376 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2251 2376 : CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2252 2376 : CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2253 3564 : IF (para_env%mepos == ip) THEN
2254 4731 : ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2255 5025 : DO i = 1, nblock
2256 37818 : DO j = 1, nstate
2257 36630 : cz_ij_loc(idim)%c_array(j, i) = CMPLX(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
2258 : END DO
2259 : END DO
2260 : END IF
2261 : END DO ! ip
2262 : END DO
2263 388 : DEALLOCATE (z_ij_loc_re)
2264 388 : DEALLOCATE (z_ij_loc_im)
2265 :
2266 1552 : ALLOCATE (rotmat(nstate, 2*nblock_max))
2267 :
2268 : CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2269 : cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2270 388 : target_time=target_time, start_time=start_time)
2271 :
2272 388 : ilow1 = ns_bound(para_env%mepos, 1)
2273 388 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2274 1164 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2275 1164 : ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2276 1576 : DO idim = 1, dim2
2277 3952 : DO ip = 0, para_env%num_pe - 1
2278 79788 : z_ij_loc_re = 0.0_dp
2279 79788 : z_ij_loc_im = 0.0_dp
2280 2376 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2281 2376 : IF (ip == para_env%mepos) THEN
2282 5025 : ns_me = nblock
2283 5025 : DO i = 1, ns_me
2284 36630 : ii = ilow1 + i - 1
2285 37818 : DO j = 1, nstate
2286 32793 : z_ij_loc_re(j, i) = REAL(cz_ij_loc(idim)%c_array(j, i), dp)
2287 36630 : z_ij_loc_im(j, i) = AIMAG(cz_ij_loc(idim)%c_array(j, i))
2288 : END DO
2289 : END DO
2290 : END IF
2291 2376 : CALL para_env%bcast(z_ij_loc_re, ip)
2292 2376 : CALL para_env%bcast(z_ij_loc_im, ip)
2293 2376 : CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2294 3564 : CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2295 : END DO ! ip
2296 : END DO
2297 :
2298 1164 : DO ip = 0, para_env%num_pe - 1
2299 25692 : z_ij_loc_re = 0.0_dp
2300 776 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2301 776 : IF (ip == para_env%mepos) THEN
2302 1630 : ns_me = nblock
2303 1630 : DO i = 1, ns_me
2304 11772 : ii = ilow1 + i - 1
2305 12160 : DO j = 1, nstate
2306 11772 : z_ij_loc_re(j, i) = rotmat(j, i)
2307 : END DO
2308 : END DO
2309 : END IF
2310 776 : CALL para_env%bcast(z_ij_loc_re, ip)
2311 1164 : CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2312 : END DO
2313 :
2314 388 : DEALLOCATE (z_ij_loc_re)
2315 388 : DEALLOCATE (z_ij_loc_im)
2316 1576 : DO idim = 1, dim2
2317 1576 : DEALLOCATE (cz_ij_loc(idim)%c_array)
2318 : END DO
2319 388 : DEALLOCATE (cz_ij_loc)
2320 :
2321 388 : CALL para_env%sync()
2322 388 : CALL rotate_orbitals(rmat, vectors)
2323 388 : CALL cp_fm_release(rmat)
2324 :
2325 388 : DEALLOCATE (rotmat)
2326 388 : DEALLOCATE (ns_bound)
2327 :
2328 388 : CALL timestop(handle)
2329 :
2330 1164 : END SUBROUTINE jacobi_rot_para
2331 :
2332 : ! **************************************************************************************************
2333 : !> \brief almost identical to 'jacobi_rot_para' but with different inout variables
2334 : !> \param weights ...
2335 : !> \param czij ...
2336 : !> \param para_env ...
2337 : !> \param max_iter ...
2338 : !> \param rmat ...
2339 : !> \param tol_out ...
2340 : !> \author Soumya Ghosh (08/21)
2341 : ! **************************************************************************************************
2342 32 : SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2343 :
2344 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2345 : TYPE(cp_cfm_type), INTENT(IN) :: czij(:)
2346 : TYPE(mp_para_env_type), POINTER :: para_env
2347 : INTEGER, INTENT(IN) :: max_iter
2348 : TYPE(cp_cfm_type), INTENT(IN) :: rmat
2349 : REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2350 :
2351 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rot_para_1'
2352 :
2353 32 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: czij_array
2354 : INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2355 : nblock, nblock_max, ns_me, nstate, &
2356 : sweeps
2357 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2358 32 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_re
2359 : REAL(KIND=dp) :: xstate
2360 32 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2361 :
2362 32 : CALL timeset(routineN, handle)
2363 :
2364 32 : dim2 = SIZE(czij)
2365 :
2366 32 : CALL cp_cfm_set_all(rmat, CMPLX(0._dp, 0._dp, dp), CMPLX(1._dp, 0._dp, dp))
2367 :
2368 32 : CALL cp_cfm_get_info(rmat, nrow_global=nstate)
2369 :
2370 : ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2371 32 : xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
2372 128 : ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2373 96 : DO ip = 1, para_env%num_pe
2374 64 : ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
2375 96 : ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
2376 : END DO
2377 32 : nblock_max = 0
2378 96 : DO ip = 0, para_env%num_pe - 1
2379 64 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2380 96 : nblock_max = MAX(nblock_max, nblock)
2381 : END DO
2382 :
2383 : ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2384 128 : ALLOCATE (czij_array(nstate, nblock_max))
2385 96 : ALLOCATE (cz_ij_loc(dim2))
2386 128 : DO idim = 1, dim2
2387 320 : DO ip = 0, para_env%num_pe - 1
2388 192 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2389 : ! cfm --> allocatable
2390 192 : CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
2391 288 : IF (para_env%mepos == ip) THEN
2392 96 : ns_me = nblock
2393 384 : ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2394 2160 : DO i = 1, ns_me
2395 90912 : DO j = 1, nstate
2396 90816 : cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2397 : END DO
2398 : END DO
2399 : END IF
2400 : END DO ! ip
2401 : END DO
2402 32 : DEALLOCATE (czij_array)
2403 :
2404 128 : ALLOCATE (rotmat(nstate, 2*nblock_max))
2405 :
2406 : CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2407 32 : cz_ij_loc, rotmat, 0, tol_out=tol_out)
2408 :
2409 32 : ilow1 = ns_bound(para_env%mepos, 1)
2410 32 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2411 128 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2412 :
2413 96 : DO ip = 0, para_env%num_pe - 1
2414 62016 : z_ij_loc_re = 0.0_dp
2415 64 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2416 64 : IF (ip == para_env%mepos) THEN
2417 720 : ns_me = nblock
2418 720 : DO i = 1, ns_me
2419 30272 : ii = ilow1 + i - 1
2420 30304 : DO j = 1, nstate
2421 30272 : z_ij_loc_re(j, i) = rotmat(j, i)
2422 : END DO
2423 : END DO
2424 : END IF
2425 64 : CALL para_env%bcast(z_ij_loc_re, ip)
2426 62048 : CALL cp_cfm_set_submatrix(rmat, CMPLX(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
2427 : END DO
2428 :
2429 32 : DEALLOCATE (z_ij_loc_re)
2430 128 : DO idim = 1, dim2
2431 128 : DEALLOCATE (cz_ij_loc(idim)%c_array)
2432 : END DO
2433 32 : DEALLOCATE (cz_ij_loc)
2434 :
2435 32 : CALL para_env%sync()
2436 :
2437 32 : DEALLOCATE (rotmat)
2438 32 : DEALLOCATE (ns_bound)
2439 :
2440 32 : CALL timestop(handle)
2441 :
2442 64 : END SUBROUTINE jacobi_rot_para_1
2443 :
2444 : ! **************************************************************************************************
2445 : !> \brief Parallel algorithm for jacobi rotations
2446 : !> \param weights ...
2447 : !> \param para_env ...
2448 : !> \param max_iter ...
2449 : !> \param sweeps ...
2450 : !> \param out_each ...
2451 : !> \param dim2 ...
2452 : !> \param nstate ...
2453 : !> \param nblock_max ...
2454 : !> \param ns_bound ...
2455 : !> \param cz_ij_loc ...
2456 : !> \param rotmat ...
2457 : !> \param output_unit ...
2458 : !> \param tol_out ...
2459 : !> \param eps_localization ...
2460 : !> \param target_time ...
2461 : !> \param start_time ...
2462 : !> \par History
2463 : !> split out to reuse with different input types
2464 : !> \author HF (05.2022)
2465 : ! **************************************************************************************************
2466 1260 : SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2467 420 : ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2468 :
2469 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2470 : TYPE(mp_para_env_type), POINTER :: para_env
2471 : INTEGER, INTENT(IN) :: max_iter
2472 : INTEGER, INTENT(OUT) :: sweeps
2473 : INTEGER, INTENT(IN) :: out_each, dim2, nstate, nblock_max
2474 : INTEGER, DIMENSION(0:, :), INTENT(IN) :: ns_bound
2475 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2476 : REAL(dp), DIMENSION(:, :), INTENT(OUT) :: rotmat
2477 : INTEGER, INTENT(IN) :: output_unit
2478 : REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2479 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_localization
2480 : REAL(dp), OPTIONAL :: target_time, start_time
2481 :
2482 : COMPLEX(KIND=dp) :: zi, zj
2483 420 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: c_array_me, c_array_partner
2484 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
2485 : INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2486 : ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2487 : iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2488 : ns_recv_from, ns_recv_partner
2489 420 : INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl
2490 420 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: list_pair
2491 : LOGICAL :: should_stop
2492 420 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2493 420 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: rmat_recv_all
2494 : REAL(KIND=dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2495 : t2, theta, tolerance, zc, zr
2496 420 : TYPE(set_c_1d_type), DIMENSION(:), POINTER :: zdiag_all, zdiag_me
2497 420 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: xyz_mix, xyz_mix_ns
2498 :
2499 420 : NULLIFY (zdiag_all, zdiag_me)
2500 420 : NULLIFY (xyz_mix, xyz_mix_ns)
2501 : NULLIFY (mii, mij, mjj)
2502 :
2503 2100 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2504 :
2505 1260 : ALLOCATE (rcount(para_env%num_pe), STAT=istat)
2506 840 : ALLOCATE (rdispl(para_env%num_pe), STAT=istat)
2507 :
2508 420 : tolerance = 1.0e10_dp
2509 420 : sweeps = 0
2510 :
2511 : ! number of processor pairs and number of permutations
2512 420 : npair = (para_env%num_pe + 1)/2
2513 420 : nperm = para_env%num_pe - MOD(para_env%num_pe + 1, 2)
2514 1260 : ALLOCATE (list_pair(2, npair))
2515 :
2516 : ! initialize rotation matrix
2517 87288 : rotmat = 0.0_dp
2518 2350 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2519 1930 : ii = i - ns_bound(para_env%mepos, 1) + 1
2520 2350 : rotmat(i, ii) = 1.0_dp
2521 : END DO
2522 :
2523 1260 : ALLOCATE (xyz_mix(dim2))
2524 840 : ALLOCATE (xyz_mix_ns(dim2))
2525 1260 : ALLOCATE (zdiag_me(dim2))
2526 840 : ALLOCATE (zdiag_all(dim2))
2527 :
2528 420 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2529 420 : IF (ns_me /= 0) THEN
2530 2065 : ALLOCATE (c_array_me(nstate, ns_me, dim2))
2531 1676 : DO idim = 1, dim2
2532 5465 : ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2533 : END DO
2534 1652 : ALLOCATE (gmat(nstate, ns_me))
2535 : END IF
2536 :
2537 1704 : DO idim = 1, dim2
2538 3852 : ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2539 7518 : zdiag_me(idim)%c_array = CMPLX(0.0_dp, 0.0_dp, dp)
2540 3852 : ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2541 14172 : zdiag_all(idim)%c_array = CMPLX(0.0_dp, 0.0_dp, dp)
2542 : END DO
2543 1680 : ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2544 1260 : ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2545 :
2546 : ! buffer for message passing
2547 2100 : ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2548 :
2549 420 : IF (output_unit > 0) THEN
2550 194 : WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
2551 194 : WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
2552 : END IF
2553 :
2554 87208 : DO lsweep = 1, max_iter + 1
2555 87208 : sweeps = lsweep
2556 87208 : IF (sweeps == max_iter + 1) THEN
2557 156 : IF (output_unit > 0) THEN
2558 62 : WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2559 62 : WRITE (output_unit, *) ' Present Max. gradient = ', tolerance
2560 : END IF
2561 : EXIT
2562 : END IF
2563 87052 : t1 = m_walltime()
2564 :
2565 174104 : DO iperm = 1, nperm
2566 :
2567 : ! fix partners for this permutation, and get the number of states
2568 87052 : CALL eberlein(iperm, para_env, list_pair)
2569 87052 : ip_partner = -1
2570 87052 : ns_partner = 0
2571 87052 : DO ipair = 1, npair
2572 87052 : IF (list_pair(1, ipair) == para_env%mepos) THEN
2573 43526 : ip_partner = list_pair(2, ipair)
2574 43526 : EXIT
2575 43526 : ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
2576 43526 : ip_partner = list_pair(1, ipair)
2577 43526 : EXIT
2578 : END IF
2579 : END DO
2580 87052 : IF (ip_partner >= 0) THEN
2581 87052 : ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2582 : ELSE
2583 : ns_partner = 0
2584 : END IF
2585 :
2586 : ! if there is a non-zero block connecting two partners, jacobi-sweep it.
2587 87052 : IF (ns_partner*ns_me /= 0) THEN
2588 :
2589 348152 : ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2590 5089598 : rmat_loc = 0.0_dp
2591 698582 : DO i = 1, ns_me + ns_partner
2592 698582 : rmat_loc(i, i) = 1.0_dp
2593 : END DO
2594 :
2595 435190 : ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2596 :
2597 349754 : DO idim = 1, dim2
2598 1050864 : ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2599 1276511 : DO i = 1, ns_me
2600 7890792 : c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2601 : END DO
2602 : END DO
2603 :
2604 : CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2605 87038 : msgout=c_array_partner, source=ip_partner)
2606 :
2607 87038 : n1 = ns_me
2608 87038 : n2 = ns_partner
2609 87038 : ilow1 = ns_bound(para_env%mepos, 1)
2610 87038 : iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2611 87038 : ilow2 = ns_bound(ip_partner, 1)
2612 87038 : iup2 = ns_bound(ip_partner, 1) + n2 - 1
2613 87038 : IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
2614 43519 : il1 = 1
2615 : iu1 = n1
2616 43519 : iu1 = n1
2617 43519 : il2 = 1 + n1
2618 43519 : iu2 = n1 + n2
2619 : ELSE
2620 43519 : il1 = 1 + n2
2621 : iu1 = n1 + n2
2622 43519 : iu1 = n1 + n2
2623 43519 : il2 = 1
2624 43519 : iu2 = n2
2625 : END IF
2626 :
2627 349754 : DO idim = 1, dim2
2628 1189473 : DO i = 1, n1
2629 4337826 : xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2630 4479723 : xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2631 : END DO
2632 1276511 : DO i = 1, n2
2633 4337826 : xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2634 4479723 : xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2635 : END DO
2636 : END DO
2637 :
2638 698582 : DO istate = 1, n1 + n2
2639 2588318 : DO jstate = istate + 1, n1 + n2
2640 7664298 : DO idim = 1, dim2
2641 5774562 : mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2642 5774562 : mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2643 7664298 : mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2644 : END DO
2645 1889736 : CALL get_angle(mii, mjj, mij, weights, theta)
2646 1889736 : st = SIN(theta)
2647 1889736 : ct = COS(theta)
2648 7664298 : DO idim = 1, dim2
2649 51288858 : DO i = 1, n1 + n2
2650 45514296 : zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2651 45514296 : zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2652 45514296 : xyz_mix(idim)%c_array(i, istate) = zi
2653 51288858 : xyz_mix(idim)%c_array(i, jstate) = zj
2654 : END DO
2655 53178594 : DO i = 1, n1 + n2
2656 45514296 : zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2657 45514296 : zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2658 45514296 : xyz_mix(idim)%c_array(istate, i) = zi
2659 51288858 : xyz_mix(idim)%c_array(jstate, i) = zj
2660 : END DO
2661 : END DO
2662 :
2663 17237808 : DO i = 1, n1 + n2
2664 14736528 : ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2665 14736528 : rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2666 14736528 : rmat_loc(i, istate) = ri
2667 16626264 : rmat_loc(i, jstate) = rj
2668 : END DO
2669 : END DO
2670 : END DO
2671 :
2672 87038 : k = nblock_max + 1
2673 : CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2674 5089598 : rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2675 :
2676 87038 : IF (ilow1 < ilow2) THEN
2677 : ! no longer compiles in official sdgb:
2678 : !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
2679 : ! probably inefficient:
2680 : CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2681 1467175 : n2, 0.0_dp, gmat(:, :), nstate)
2682 : CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2683 43519 : n1 + n2, 1.0_dp, gmat(:, :), nstate)
2684 : ELSE
2685 : CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2686 43519 : rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2687 : ! no longer compiles in official sdgb:
2688 : !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
2689 : ! probably inefficient:
2690 : CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2691 1147207 : n1, 1.0_dp, gmat(:, :), nstate)
2692 : END IF
2693 :
2694 87038 : CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2695 :
2696 349754 : DO idim = 1, dim2
2697 1189473 : DO i = 1, n1
2698 7890792 : xyz_mix_ns(idim)%c_array(1:nstate, i) = CMPLX(0.0_dp, 0.0_dp, dp)
2699 : END DO
2700 :
2701 1189473 : DO istate = 1, n1
2702 7890792 : DO jstate = 1, nstate
2703 33316224 : DO i = 1, n2
2704 : xyz_mix_ns(idim)%c_array(jstate, istate) = &
2705 : xyz_mix_ns(idim)%c_array(jstate, istate) + &
2706 32389467 : c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2707 : END DO
2708 : END DO
2709 : END DO
2710 1276511 : DO istate = 1, n1
2711 7890792 : DO jstate = 1, nstate
2712 34155543 : DO i = 1, n1
2713 : xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2714 33228786 : c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2715 : END DO
2716 : END DO
2717 : END DO
2718 : END DO ! idim
2719 :
2720 87038 : DEALLOCATE (c_array_partner)
2721 :
2722 : ELSE ! save my data
2723 56 : DO idim = 1, dim2
2724 77 : DO i = 1, ns_me
2725 105 : xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2726 : END DO
2727 : END DO
2728 : END IF
2729 :
2730 349810 : DO idim = 1, dim2
2731 1276588 : DO i = 1, ns_me
2732 7890876 : cz_ij_loc(idim)%c_array(1:nstate, i) = CMPLX(0.0_dp, 0.0_dp, dp)
2733 : END DO
2734 : END DO
2735 :
2736 87052 : IF (ns_partner*ns_me /= 0) THEN
2737 : ! transpose rotation matrix rmat_loc
2738 698582 : DO i = 1, ns_me + ns_partner
2739 2588318 : DO j = i + 1, ns_me + ns_partner
2740 1889736 : ri = rmat_loc(i, j)
2741 1889736 : rmat_loc(i, j) = rmat_loc(j, i)
2742 2501280 : rmat_loc(j, i) = ri
2743 : END DO
2744 : END DO
2745 :
2746 : ! prepare for distribution
2747 392810 : DO i = 1, n1
2748 1510694 : rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2749 : END DO
2750 87038 : ik = nblock_max
2751 392810 : DO i = 1, n2
2752 1470434 : rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2753 : END DO
2754 : ELSE
2755 56 : rmat_send = 0.0_dp
2756 : END IF
2757 :
2758 : ! collect data from all tasks (this takes some significant time)
2759 87052 : CALL para_env%allgather(rmat_send, rmat_recv_all)
2760 :
2761 : ! update blocks everywhere
2762 261156 : DO ip = 0, para_env%num_pe - 1
2763 :
2764 174104 : ip_recv_from = MOD(para_env%mepos - IP + para_env%num_pe, para_env%num_pe)
2765 6456620 : rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2766 :
2767 174104 : ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2768 :
2769 261156 : IF (ns_me /= 0) THEN
2770 174090 : IF (ns_recv_from /= 0) THEN
2771 : !look for the partner of ip_recv_from
2772 174083 : ip_recv_partner = -1
2773 174083 : ns_recv_partner = 0
2774 174083 : DO ipair = 1, npair
2775 174083 : IF (list_pair(1, ipair) == ip_recv_from) THEN
2776 87045 : ip_recv_partner = list_pair(2, ipair)
2777 87045 : EXIT
2778 87038 : ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
2779 : ip_recv_partner = list_pair(1, ipair)
2780 : EXIT
2781 : END IF
2782 : END DO
2783 :
2784 174083 : IF (ip_recv_partner >= 0) THEN
2785 174083 : ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2786 : END IF
2787 174083 : IF (ns_recv_partner > 0) THEN
2788 174076 : il1 = ns_bound(para_env%mepos, 1)
2789 174076 : il_recv = ns_bound(ip_recv_from, 1)
2790 174076 : il_recv_partner = ns_bound(ip_recv_partner, 1)
2791 174076 : ik = nblock_max
2792 :
2793 699508 : DO idim = 1, dim2
2794 2378946 : DO i = 1, ns_recv_from
2795 1853514 : ii = il_recv + i - 1
2796 9080265 : DO j = 1, ns_me
2797 33228786 : jj = j
2798 35082300 : DO k = 1, ns_recv_from
2799 26527467 : kk = il_recv + k - 1
2800 : cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2801 33228786 : rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2802 : END DO
2803 : END DO
2804 : END DO
2805 2553022 : DO i = 1, ns_recv_from
2806 1853514 : ii = il_recv + i - 1
2807 9080265 : DO j = 1, ns_me
2808 32389467 : jj = j
2809 34242981 : DO k = 1, ns_recv_partner
2810 25688148 : kk = il_recv_partner + k - 1
2811 : cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2812 32389467 : rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2813 : END DO
2814 : END DO
2815 : END DO
2816 : END DO ! idim
2817 : ELSE
2818 7 : il1 = ns_bound(para_env%mepos, 1)
2819 7 : il_recv = ns_bound(ip_recv_from, 1)
2820 28 : DO idim = 1, dim2
2821 49 : DO j = 1, ns_me
2822 42 : jj = j
2823 63 : DO i = 1, ns_recv_from
2824 21 : ii = il_recv + i - 1
2825 42 : cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2826 : END DO
2827 : END DO
2828 : END DO ! idim
2829 : END IF
2830 : END IF
2831 : END IF ! ns_me
2832 : END DO ! ip
2833 :
2834 174104 : IF (ns_partner*ns_me /= 0) THEN
2835 87038 : DEALLOCATE (rmat_loc)
2836 349754 : DO idim = 1, dim2
2837 349754 : DEALLOCATE (xyz_mix(idim)%c_array)
2838 : END DO
2839 : END IF
2840 :
2841 : END DO ! iperm
2842 :
2843 : ! calculate the max gradient
2844 349810 : DO idim = 1, dim2
2845 1189536 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2846 926778 : ii = i - ns_bound(para_env%mepos, 1) + 1
2847 926778 : zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2848 1189536 : zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2849 : END DO
2850 788274 : rcount(:) = SIZE(zdiag_me(idim)%c_array)
2851 262758 : rdispl(1) = 0
2852 525516 : DO ip = 2, para_env%num_pe
2853 525516 : rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2854 : END DO
2855 : ! collect all the diagonal elements in a replicated 1d array
2856 3492664 : CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2857 : END DO
2858 :
2859 87052 : gmax = 0.0_dp
2860 392831 : DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2861 305779 : k = j - ns_bound(para_env%mepos, 1) + 1
2862 1337699 : DO i = 1, j - 1
2863 : ! find the location of the diagonal element (i,i)
2864 1088034 : DO ip = 0, para_env%num_pe - 1
2865 1088034 : IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
2866 : ip_has_i = ip
2867 : EXIT
2868 : END IF
2869 : END DO
2870 944868 : ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2871 : ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
2872 944868 : jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2873 944868 : grad = 0.0_dp
2874 3832149 : DO idim = 1, dim2
2875 2887281 : zi = zdiag_all(idim)%c_array(ii)
2876 2887281 : zj = zdiag_all(idim)%c_array(jj)
2877 3832149 : grad = grad + weights(idim)*REAL(4.0_dp*CONJG(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
2878 : END DO
2879 1250647 : gmax = MAX(gmax, ABS(grad))
2880 : END DO
2881 : END DO
2882 :
2883 87052 : CALL para_env%max(gmax)
2884 87052 : tolerance = gmax
2885 87052 : IF (PRESENT(tol_out)) tol_out = tolerance
2886 :
2887 87052 : func = 0.0_dp
2888 392831 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2889 305779 : k = i - ns_bound(para_env%mepos, 1) + 1
2890 1319609 : DO idim = 1, dim2
2891 926778 : zr = REAL(cz_ij_loc(idim)%c_array(i, k), dp)
2892 926778 : zc = AIMAG(cz_ij_loc(idim)%c_array(i, k))
2893 1232557 : func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
2894 : END DO
2895 : END DO
2896 87052 : CALL para_env%sum(func)
2897 87052 : t2 = m_walltime()
2898 :
2899 87052 : IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
2900 440 : WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2901 440 : CALL m_flush(output_unit)
2902 : END IF
2903 87052 : IF (PRESENT(eps_localization)) THEN
2904 87020 : IF (tolerance < eps_localization) EXIT
2905 : END IF
2906 87052 : IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
2907 86756 : CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
2908 86756 : IF (should_stop) EXIT
2909 : END IF
2910 :
2911 : END DO ! lsweep
2912 :
2913 : ! buffer for message passing
2914 420 : DEALLOCATE (rmat_recv_all)
2915 :
2916 420 : DEALLOCATE (rmat_recv)
2917 420 : DEALLOCATE (rmat_send)
2918 420 : IF (ns_me > 0) THEN
2919 413 : DEALLOCATE (c_array_me)
2920 : END IF
2921 1704 : DO idim = 1, dim2
2922 1284 : DEALLOCATE (zdiag_me(idim)%c_array)
2923 1704 : DEALLOCATE (zdiag_all(idim)%c_array)
2924 : END DO
2925 420 : DEALLOCATE (zdiag_me)
2926 420 : DEALLOCATE (zdiag_all)
2927 420 : DEALLOCATE (xyz_mix)
2928 1704 : DO idim = 1, dim2
2929 1704 : IF (ns_me /= 0) THEN
2930 1263 : DEALLOCATE (xyz_mix_ns(idim)%c_array)
2931 : END IF
2932 : END DO
2933 420 : DEALLOCATE (xyz_mix_ns)
2934 420 : IF (ns_me /= 0) THEN
2935 413 : DEALLOCATE (gmat)
2936 : END IF
2937 420 : DEALLOCATE (mii)
2938 420 : DEALLOCATE (mij)
2939 420 : DEALLOCATE (mjj)
2940 420 : DEALLOCATE (list_pair)
2941 :
2942 840 : END SUBROUTINE jacobi_rot_para_core
2943 :
2944 : ! **************************************************************************************************
2945 : !> \brief ...
2946 : !> \param iperm ...
2947 : !> \param para_env ...
2948 : !> \param list_pair ...
2949 : ! **************************************************************************************************
2950 87052 : SUBROUTINE eberlein(iperm, para_env, list_pair)
2951 : INTEGER, INTENT(IN) :: iperm
2952 : TYPE(mp_para_env_type), POINTER :: para_env
2953 : INTEGER, DIMENSION(:, :) :: list_pair
2954 :
2955 : INTEGER :: i, ii, jj, npair
2956 :
2957 87052 : npair = (para_env%num_pe + 1)/2
2958 87052 : IF (iperm == 1) THEN
2959 : !..set up initial ordering
2960 261156 : DO I = 0, para_env%num_pe - 1
2961 174104 : II = ((i + 1) + 1)/2
2962 174104 : JJ = MOD((i + 1) + 1, 2) + 1
2963 261156 : list_pair(JJ, II) = i
2964 : END DO
2965 87052 : IF (MOD(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2966 0 : ELSEIF (MOD(iperm, 2) == 0) THEN
2967 : !..a type shift
2968 0 : jj = list_pair(1, npair)
2969 0 : DO I = npair, 3, -1
2970 0 : list_pair(1, I) = list_pair(1, I - 1)
2971 : END DO
2972 0 : list_pair(1, 2) = list_pair(2, 1)
2973 0 : list_pair(2, 1) = jj
2974 : ELSE
2975 : !..b type shift
2976 0 : jj = list_pair(2, 1)
2977 0 : DO I = 1, npair - 1
2978 0 : list_pair(2, I) = list_pair(2, I + 1)
2979 : END DO
2980 0 : list_pair(2, npair) = jj
2981 : END IF
2982 :
2983 87052 : END SUBROUTINE eberlein
2984 :
2985 : ! **************************************************************************************************
2986 : !> \brief ...
2987 : !> \param vectors ...
2988 : !> \param op_sm_set ...
2989 : !> \param zij_fm_set ...
2990 : ! **************************************************************************************************
2991 488 : SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
2992 :
2993 : TYPE(cp_fm_type), INTENT(IN) :: vectors
2994 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: op_sm_set
2995 : TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: zij_fm_set
2996 :
2997 : CHARACTER(len=*), PARAMETER :: routineN = 'zij_matrix'
2998 :
2999 : INTEGER :: handle, i, j, nao, nmoloc
3000 : TYPE(cp_fm_type) :: opvec
3001 :
3002 488 : CALL timeset(routineN, handle)
3003 :
3004 : ! get rows and cols of the input
3005 488 : CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3006 : ! replicate the input kind of matrix
3007 488 : CALL cp_fm_create(opvec, vectors%matrix_struct)
3008 :
3009 : ! Compute zij here
3010 1988 : DO i = 1, SIZE(zij_fm_set, 2)
3011 4988 : DO j = 1, SIZE(zij_fm_set, 1)
3012 3000 : CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
3013 3000 : CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
3014 : CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3015 4500 : zij_fm_set(j, i))
3016 : END DO
3017 : END DO
3018 :
3019 488 : CALL cp_fm_release(opvec)
3020 488 : CALL timestop(handle)
3021 :
3022 488 : END SUBROUTINE zij_matrix
3023 :
3024 : ! **************************************************************************************************
3025 : !> \brief ...
3026 : !> \param vectors ...
3027 : ! **************************************************************************************************
3028 36 : SUBROUTINE scdm_qrfact(vectors)
3029 :
3030 : TYPE(cp_fm_type), INTENT(IN) :: vectors
3031 :
3032 : CHARACTER(len=*), PARAMETER :: routineN = 'scdm_qrfact'
3033 :
3034 : INTEGER :: handle, ncolT, nrowT
3035 36 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
3036 : TYPE(cp_fm_struct_type), POINTER :: cstruct
3037 : TYPE(cp_fm_type) :: CTp, Qf, tmp
3038 :
3039 36 : CALL timeset(routineN, handle)
3040 :
3041 : ! Create Transpose of Coefficient Matrix vectors
3042 36 : nrowT = vectors%matrix_struct%ncol_global
3043 36 : ncolT = vectors%matrix_struct%nrow_global
3044 :
3045 : CALL cp_fm_struct_create(cstruct, vectors%matrix_struct%para_env, &
3046 : vectors%matrix_struct%context, nrowT, ncolT, vectors%matrix_struct%ncol_block, &
3047 36 : vectors%matrix_struct%nrow_block, first_p_pos=vectors%matrix_struct%first_p_pos)
3048 36 : CALL cp_fm_create(CTp, cstruct)
3049 36 : CALL cp_fm_struct_release(cstruct)
3050 :
3051 108 : ALLOCATE (tau(nrowT))
3052 :
3053 36 : CALL cp_fm_transpose(vectors, CTp)
3054 :
3055 : ! Get QR decomposition of CTs
3056 36 : CALL cp_fm_pdgeqpf(CTp, tau, nrowT, ncolT, 1, 1)
3057 :
3058 : ! Construction of Q from the scalapack output
3059 : CALL cp_fm_struct_create(cstruct, para_env=CTp%matrix_struct%para_env, &
3060 : context=CTp%matrix_struct%context, nrow_global=CTp%matrix_struct%nrow_global, &
3061 36 : ncol_global=CTp%matrix_struct%nrow_global)
3062 36 : CALL cp_fm_create(Qf, cstruct)
3063 36 : CALL cp_fm_struct_release(cstruct)
3064 36 : CALL cp_fm_to_fm_submat(CTp, Qf, nrowT, nrowT, 1, 1, 1, 1)
3065 :
3066 : ! Get Q
3067 36 : CALL cp_fm_pdorgqr(Qf, tau, nrowT, 1, 1)
3068 :
3069 : ! Transform original coefficient matrix vectors
3070 36 : CALL cp_fm_create(tmp, vectors%matrix_struct)
3071 36 : CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
3072 36 : CALL cp_fm_to_fm(vectors, tmp)
3073 36 : CALL parallel_gemm('N', 'N', ncolT, nrowT, nrowT, 1.0_dp, tmp, Qf, 0.0_dp, vectors)
3074 :
3075 : ! Cleanup
3076 36 : CALL cp_fm_release(CTp)
3077 36 : CALL cp_fm_release(tmp)
3078 36 : CALL cp_fm_release(Qf)
3079 36 : DEALLOCATE (tau)
3080 :
3081 36 : CALL timestop(handle)
3082 :
3083 144 : END SUBROUTINE scdm_qrfact
3084 :
3085 0 : END MODULE qs_localization_methods
|