Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Localization methods such as 2x2 Jacobi rotations
10 : !> Steepest Decents
11 : !> Conjugate Gradient
12 : !> \par History
13 : !> Initial parallellization of jacobi (JVDV 07.2003)
14 : !> direct minimization using exponential parametrization (JVDV 09.2003)
15 : !> crazy rotations go fast (JVDV 10.2003)
16 : !> \author CJM (04.2003)
17 : ! **************************************************************************************************
18 : MODULE qs_localization_methods
19 : USE cell_types, ONLY: cell_type
20 : USE cp_blacs_env, ONLY: cp_blacs_env_type
21 : USE cp_cfm_basic_linalg, ONLY: cp_cfm_column_scale,&
22 : cp_cfm_rot_cols,&
23 : cp_cfm_rot_rows,&
24 : cp_cfm_scale,&
25 : cp_cfm_scale_and_add,&
26 : cp_cfm_schur_product,&
27 : cp_cfm_trace
28 : USE cp_cfm_diag, ONLY: cp_cfm_heevd
29 : USE cp_cfm_types, ONLY: &
30 : cp_cfm_create, cp_cfm_get_element, cp_cfm_get_info, cp_cfm_get_submatrix, cp_cfm_release, &
31 : cp_cfm_set_all, cp_cfm_set_submatrix, cp_cfm_to_cfm, cp_cfm_to_fm, cp_cfm_type, &
32 : cp_fm_to_cfm
33 : USE cp_dbcsr_api, ONLY: dbcsr_p_type
34 : USE cp_dbcsr_operations, ONLY: cp_dbcsr_sm_fm_multiply
35 : USE cp_external_control, ONLY: external_control
36 : USE cp_fm_basic_linalg, ONLY: cp_fm_frobenius_norm,&
37 : cp_fm_pdgeqpf,&
38 : cp_fm_pdorgqr,&
39 : cp_fm_scale,&
40 : cp_fm_scale_and_add,&
41 : cp_fm_trace,&
42 : cp_fm_transpose,&
43 : cp_fm_triangular_multiply
44 : USE cp_fm_cholesky, ONLY: cp_fm_cholesky_decompose
45 : USE cp_fm_diag, ONLY: cp_fm_syevd
46 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
47 : cp_fm_struct_get,&
48 : cp_fm_struct_release,&
49 : cp_fm_struct_type
50 : USE cp_fm_types, ONLY: &
51 : cp_fm_create, cp_fm_get_element, cp_fm_get_info, cp_fm_get_submatrix, cp_fm_init_random, &
52 : cp_fm_maxabsrownorm, cp_fm_maxabsval, cp_fm_release, cp_fm_set_all, cp_fm_set_submatrix, &
53 : cp_fm_to_fm, cp_fm_to_fm_submat, cp_fm_type
54 : USE cp_log_handling, ONLY: cp_logger_get_default_io_unit,&
55 : cp_logger_get_default_unit_nr
56 : USE kahan_sum, ONLY: accurate_sum
57 : USE kinds, ONLY: dp
58 : USE machine, ONLY: m_flush,&
59 : m_walltime
60 : USE mathconstants, ONLY: pi,&
61 : twopi,&
62 : z_zero
63 : USE matrix_exp, ONLY: exp_pade_real,&
64 : get_nsquare_norder
65 : USE message_passing, ONLY: mp_para_env_type
66 : USE parallel_gemm_api, ONLY: parallel_gemm
67 : #include "./base/base_uses.f90"
68 :
69 : IMPLICIT NONE
70 : PUBLIC :: initialize_weights, crazy_rotations, &
71 : direct_mini, rotate_orbitals, approx_l1_norm_sd, jacobi_rotations, scdm_qrfact, zij_matrix, &
72 : jacobi_cg_edf_ls
73 :
74 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'qs_localization_methods'
75 :
76 : PRIVATE
77 :
78 : TYPE set_c_1d_type
79 : COMPLEX(KIND=dp), POINTER, DIMENSION(:) :: c_array => NULL()
80 : END TYPE set_c_1d_type
81 :
82 : TYPE set_c_2d_type
83 : COMPLEX(KIND=dp), POINTER, DIMENSION(:, :) :: c_array => NULL()
84 : END TYPE set_c_2d_type
85 :
86 : CONTAINS
87 : ! **************************************************************************************************
88 : !> \brief ...
89 : !> \param C ...
90 : !> \param iterations ...
91 : !> \param eps ...
92 : !> \param converged ...
93 : !> \param sweeps ...
94 : ! **************************************************************************************************
95 210 : SUBROUTINE approx_l1_norm_sd(C, iterations, eps, converged, sweeps)
96 : TYPE(cp_fm_type), INTENT(IN) :: C
97 : INTEGER, INTENT(IN) :: iterations
98 : REAL(KIND=dp), INTENT(IN) :: eps
99 : LOGICAL, INTENT(INOUT) :: converged
100 : INTEGER, INTENT(INOUT) :: sweeps
101 :
102 : CHARACTER(len=*), PARAMETER :: routineN = 'approx_l1_norm_sd'
103 : INTEGER, PARAMETER :: taylor_order = 100
104 : REAL(KIND=dp), PARAMETER :: alpha = 0.1_dp, f2_eps = 0.01_dp
105 :
106 : INTEGER :: handle, i, istep, k, n, ncol_local, &
107 : nrow_local, output_unit, p
108 : REAL(KIND=dp) :: expfactor, f2, f2old, gnorm, tnorm
109 : TYPE(cp_blacs_env_type), POINTER :: context
110 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_k_k
111 : TYPE(cp_fm_type) :: CTmp, G, Gp1, Gp2, U
112 : TYPE(mp_para_env_type), POINTER :: para_env
113 :
114 30 : CALL timeset(routineN, handle)
115 :
116 30 : NULLIFY (context, para_env, fm_struct_k_k)
117 :
118 30 : output_unit = cp_logger_get_default_io_unit()
119 :
120 : CALL cp_fm_struct_get(C%matrix_struct, nrow_global=n, ncol_global=k, &
121 : nrow_local=nrow_local, ncol_local=ncol_local, &
122 30 : para_env=para_env, context=context)
123 : CALL cp_fm_struct_create(fm_struct_k_k, para_env=para_env, context=context, &
124 30 : nrow_global=k, ncol_global=k)
125 30 : CALL cp_fm_create(CTmp, C%matrix_struct)
126 30 : CALL cp_fm_create(U, fm_struct_k_k)
127 30 : CALL cp_fm_create(G, fm_struct_k_k)
128 30 : CALL cp_fm_create(Gp1, fm_struct_k_k)
129 30 : CALL cp_fm_create(Gp2, fm_struct_k_k)
130 : !
131 : ! printing
132 30 : IF (output_unit > 0) THEN
133 15 : WRITE (output_unit, '(1X)')
134 15 : WRITE (output_unit, '(2X,A)') '-----------------------------------------------------------------------------'
135 15 : WRITE (output_unit, '(A,I5)') ' Nbr iterations =', iterations
136 15 : WRITE (output_unit, '(A,E10.2)') ' eps convergence =', eps
137 15 : WRITE (output_unit, '(A,I5)') ' Max Taylor order =', taylor_order
138 15 : WRITE (output_unit, '(A,E10.2)') ' f2 eps =', f2_eps
139 15 : WRITE (output_unit, '(A,E10.2)') ' alpha =', alpha
140 15 : WRITE (output_unit, '(A)') ' iteration approx_l1_norm g_norm rel_err'
141 : END IF
142 : !
143 30 : f2old = 0.0_dp
144 30 : converged = .FALSE.
145 : !
146 : ! Start the steepest descent
147 1480 : DO istep = 1, iterations
148 : !
149 : !-------------------------------------------------------------------
150 : ! compute f_2
151 : ! f_2(x)=(x^2+eps)^1/2
152 1470 : f2 = 0.0_dp
153 22748 : DO p = 1, ncol_local ! p
154 1100956 : DO i = 1, nrow_local ! i
155 1099486 : f2 = f2 + SQRT(C%local_data(i, p)**2 + f2_eps)
156 : END DO
157 : END DO
158 1470 : CALL C%matrix_struct%para_env%sum(f2)
159 : !write(*,*) 'qs_localize: f_2=',f2
160 : !-------------------------------------------------------------------
161 : ! compute the derivative of f_2
162 : ! f_2(x)=(x^2+eps)^1/2
163 22748 : DO p = 1, ncol_local ! p
164 1100956 : DO i = 1, nrow_local ! i
165 1099486 : CTmp%local_data(i, p) = C%local_data(i, p)/SQRT(C%local_data(i, p)**2 + f2_eps)
166 : END DO
167 : END DO
168 1470 : CALL parallel_gemm('T', 'N', k, k, n, 1.0_dp, CTmp, C, 0.0_dp, G)
169 : ! antisymmetrize
170 1470 : CALL cp_fm_transpose(G, U)
171 1470 : CALL cp_fm_scale_and_add(-0.5_dp, G, 0.5_dp, U)
172 : !
173 : !-------------------------------------------------------------------
174 : !
175 1470 : gnorm = cp_fm_frobenius_norm(G)
176 : !write(*,*) 'qs_localize: norm(G)=',gnorm
177 : !
178 : ! rescale for steepest descent
179 1470 : CALL cp_fm_scale(-alpha, G)
180 : !
181 : ! compute unitary transform
182 : ! zeroth order
183 1470 : CALL cp_fm_set_all(U, 0.0_dp, 1.0_dp)
184 : ! first order
185 1470 : expfactor = 1.0_dp
186 1470 : CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, G)
187 1470 : tnorm = cp_fm_frobenius_norm(G)
188 : !write(*,*) 'Taylor expansion i=',1,' norm(X^i)/i!=',tnorm
189 1470 : IF (tnorm > 1.0E-10_dp) THEN
190 : ! other orders
191 1470 : CALL cp_fm_to_fm(G, Gp1)
192 4866 : DO i = 2, taylor_order
193 : ! new power of G
194 4866 : CALL parallel_gemm('N', 'N', k, k, k, 1.0_dp, G, Gp1, 0.0_dp, Gp2)
195 4866 : CALL cp_fm_to_fm(Gp2, Gp1)
196 : ! add to the taylor expansion so far
197 4866 : expfactor = expfactor/REAL(i, KIND=dp)
198 4866 : CALL cp_fm_scale_and_add(1.0_dp, U, expfactor, Gp1)
199 4866 : tnorm = cp_fm_frobenius_norm(Gp1)
200 : !write(*,*) 'Taylor expansion i=',i,' norm(X^i)/i!=',tnorm*expfactor
201 4866 : IF (tnorm*expfactor < 1.0E-10_dp) EXIT
202 : END DO
203 : END IF
204 : !
205 : ! incrementaly rotate the MOs
206 1470 : CALL parallel_gemm('N', 'N', n, k, k, 1.0_dp, C, U, 0.0_dp, CTmp)
207 1470 : CALL cp_fm_to_fm(CTmp, C)
208 : !
209 : ! printing
210 1470 : IF (output_unit > 0) THEN
211 735 : WRITE (output_unit, '(10X,I4,E18.10,2E10.2)') istep, f2, gnorm, ABS((f2 - f2old)/f2)
212 : END IF
213 : !
214 : ! Are we done?
215 1470 : sweeps = istep
216 : !IF(gnorm<=grad_thresh.AND.ABS((f2-f2old)/f2)<=f2_thresh.AND.istep>1) THEN
217 1470 : IF (ABS((f2 - f2old)/f2) <= eps .AND. istep > 1) THEN
218 20 : converged = .TRUE.
219 20 : EXIT
220 : END IF
221 1460 : f2old = f2
222 : END DO
223 : !
224 : ! here we should do one refine step to enforce C'*S*C=1 for any case
225 : !
226 : ! Print the final result
227 30 : IF (output_unit > 0) WRITE (output_unit, '(A,E16.10)') ' sparseness function f2 = ', f2
228 : !
229 : ! sparsity
230 : !DO i=1,size(thresh,1)
231 : ! gnorm = 0.0_dp
232 : ! DO o=1,ncol_local
233 : ! DO p=1,nrow_local
234 : ! IF(ABS(C%local_data(p,o))>thresh(i)) THEN
235 : ! gnorm = gnorm + 1.0_dp
236 : ! ENDIF
237 : ! ENDDO
238 : ! ENDDO
239 : ! CALL C%matrix_struct%para_env%sum(gnorm)
240 : ! IF(output_unit>0) THEN
241 : ! WRITE(output_unit,*) 'qs_localize: ratio2=',gnorm / ( REAL(k,KIND=dp)*REAL(n,KIND=dp) ),thresh(i)
242 : ! ENDIF
243 : !ENDDO
244 : !
245 : ! deallocate
246 30 : CALL cp_fm_struct_release(fm_struct_k_k)
247 30 : CALL cp_fm_release(CTmp)
248 30 : CALL cp_fm_release(U)
249 30 : CALL cp_fm_release(G)
250 30 : CALL cp_fm_release(Gp1)
251 30 : CALL cp_fm_release(Gp2)
252 :
253 30 : CALL timestop(handle)
254 :
255 30 : END SUBROUTINE approx_l1_norm_sd
256 : ! **************************************************************************************************
257 : !> \brief ...
258 : !> \param cell ...
259 : !> \param weights ...
260 : ! **************************************************************************************************
261 476 : SUBROUTINE initialize_weights(cell, weights)
262 :
263 : TYPE(cell_type), POINTER :: cell
264 : REAL(KIND=dp), DIMENSION(:) :: weights
265 :
266 : REAL(KIND=dp), DIMENSION(3, 3) :: metric
267 :
268 476 : CPASSERT(ASSOCIATED(cell))
269 :
270 476 : metric = 0.0_dp
271 476 : CALL dgemm('T', 'N', 3, 3, 3, 1._dp, cell%hmat(:, :), 3, cell%hmat(:, :), 3, 0.0_dp, metric(:, :), 3)
272 :
273 476 : weights(1) = METRIC(1, 1) - METRIC(1, 2) - METRIC(1, 3)
274 476 : weights(2) = METRIC(2, 2) - METRIC(1, 2) - METRIC(2, 3)
275 476 : weights(3) = METRIC(3, 3) - METRIC(1, 3) - METRIC(2, 3)
276 476 : weights(4) = METRIC(1, 2)
277 476 : weights(5) = METRIC(1, 3)
278 476 : weights(6) = METRIC(2, 3)
279 :
280 476 : END SUBROUTINE initialize_weights
281 :
282 : ! **************************************************************************************************
283 : !> \brief wrapper for the jacobi routines, should be removed if jacobi_rot_para
284 : !> can deal with serial para_envs.
285 : !> \param weights ...
286 : !> \param zij ...
287 : !> \param vectors ...
288 : !> \param para_env ...
289 : !> \param max_iter ...
290 : !> \param eps_localization ...
291 : !> \param sweeps ...
292 : !> \param out_each ...
293 : !> \param target_time ...
294 : !> \param start_time ...
295 : !> \param restricted ...
296 : !> \par History
297 : !> \author Joost VandeVondele (02.2010)
298 : ! **************************************************************************************************
299 390 : SUBROUTINE jacobi_rotations(weights, zij, vectors, para_env, max_iter, &
300 : eps_localization, sweeps, out_each, target_time, start_time, restricted)
301 :
302 : REAL(KIND=dp), INTENT(IN) :: weights(:)
303 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
304 : TYPE(mp_para_env_type), POINTER :: para_env
305 : INTEGER, INTENT(IN) :: max_iter
306 : REAL(KIND=dp), INTENT(IN) :: eps_localization
307 : INTEGER :: sweeps
308 : INTEGER, INTENT(IN) :: out_each
309 : REAL(dp) :: target_time, start_time
310 : INTEGER :: restricted
311 :
312 390 : IF (para_env%num_pe == 1) THEN
313 : CALL jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, &
314 0 : sweeps, out_each, restricted=restricted)
315 : ELSE
316 : CALL jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
317 390 : sweeps, out_each, target_time, start_time, restricted=restricted)
318 : END IF
319 :
320 390 : END SUBROUTINE jacobi_rotations
321 :
322 : ! **************************************************************************************************
323 : !> \brief this routine, private to the module is a serial backup, till we have jacobi_rot_para to work in serial
324 : !> while the routine below works in parallel, it is too slow to be useful
325 : !> \param weights ...
326 : !> \param zij ...
327 : !> \param vectors ...
328 : !> \param max_iter ...
329 : !> \param eps_localization ...
330 : !> \param sweeps ...
331 : !> \param out_each ...
332 : !> \param restricted ...
333 : ! **************************************************************************************************
334 0 : SUBROUTINE jacobi_rotations_serial(weights, zij, vectors, max_iter, eps_localization, sweeps, &
335 : out_each, restricted)
336 : REAL(KIND=dp), INTENT(IN) :: weights(:)
337 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
338 : INTEGER, INTENT(IN) :: max_iter
339 : REAL(KIND=dp), INTENT(IN) :: eps_localization
340 : INTEGER :: sweeps
341 : INTEGER, INTENT(IN) :: out_each
342 : INTEGER :: restricted
343 :
344 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial'
345 :
346 0 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
347 : INTEGER :: dim2, handle, idim, istate, jstate, &
348 : nstate, unit_nr
349 : REAL(KIND=dp) :: ct, st, t1, t2, theta, tolerance
350 : TYPE(cp_cfm_type) :: c_rmat
351 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
352 : TYPE(cp_fm_type) :: rmat
353 :
354 0 : CALL timeset(routineN, handle)
355 :
356 0 : dim2 = SIZE(zij, 2)
357 0 : ALLOCATE (c_zij(dim2))
358 : NULLIFY (mii, mij, mjj)
359 0 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
360 :
361 0 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
362 0 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
363 :
364 0 : CALL cp_cfm_create(c_rmat, zij(1, 1)%matrix_struct)
365 0 : CALL cp_cfm_set_all(c_rmat, (0._dp, 0._dp), (1._dp, 0._dp))
366 0 : DO idim = 1, dim2
367 0 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
368 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
369 0 : zij(2, idim)%local_data, dp)
370 : END DO
371 :
372 0 : CALL cp_fm_get_info(rmat, nrow_global=nstate)
373 0 : tolerance = 1.0e10_dp
374 :
375 0 : sweeps = 0
376 0 : unit_nr = -1
377 0 : IF (rmat%matrix_struct%para_env%is_source()) THEN
378 0 : unit_nr = cp_logger_get_default_unit_nr()
379 0 : WRITE (unit_nr, '(T4,A )') " Localization by iterative Jacobi rotation"
380 : END IF
381 :
382 0 : IF (restricted > 0) THEN
383 0 : unit_nr = cp_logger_get_default_unit_nr()
384 0 : WRITE (unit_nr, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
385 0 : nstate = nstate - restricted
386 : END IF
387 :
388 : ! do jacobi sweeps until converged
389 0 : DO WHILE (tolerance >= eps_localization .AND. sweeps < max_iter)
390 0 : sweeps = sweeps + 1
391 0 : t1 = m_walltime()
392 :
393 0 : DO istate = 1, nstate
394 0 : DO jstate = istate + 1, nstate
395 0 : DO idim = 1, dim2
396 0 : CALL cp_cfm_get_element(c_zij(idim), istate, istate, mii(idim))
397 0 : CALL cp_cfm_get_element(c_zij(idim), istate, jstate, mij(idim))
398 0 : CALL cp_cfm_get_element(c_zij(idim), jstate, jstate, mjj(idim))
399 : END DO
400 0 : CALL get_angle(mii, mjj, mij, weights, theta)
401 0 : st = SIN(theta)
402 0 : ct = COS(theta)
403 0 : CALL rotate_zij(istate, jstate, st, ct, c_zij)
404 :
405 0 : CALL rotate_rmat(istate, jstate, st, ct, c_rmat)
406 : END DO
407 : END DO
408 :
409 0 : CALL check_tolerance(c_zij, weights, tolerance)
410 :
411 0 : t2 = m_walltime()
412 0 : IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
413 : WRITE (unit_nr, '(T4,A,I7,T30,A,E12.4,T60,A,F8.3)') &
414 0 : "Iteration:", sweeps, "Tolerance:", tolerance, "Time:", t2 - t1
415 0 : CALL m_flush(unit_nr)
416 : END IF
417 :
418 : END DO
419 :
420 0 : DO idim = 1, dim2
421 0 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
422 0 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
423 0 : CALL cp_cfm_release(c_zij(idim))
424 : END DO
425 0 : DEALLOCATE (c_zij)
426 0 : DEALLOCATE (mii, mij, mjj)
427 0 : rmat%local_data = REAL(c_rmat%local_data, dp)
428 0 : CALL cp_cfm_release(c_rmat)
429 0 : CALL rotate_orbitals(rmat, vectors)
430 0 : CALL cp_fm_release(rmat)
431 :
432 0 : CALL timestop(handle)
433 :
434 0 : END SUBROUTINE jacobi_rotations_serial
435 : ! **************************************************************************************************
436 : !> \brief very similar to jacobi_rotations_serial with some extra output options
437 : !> \param weights ...
438 : !> \param c_zij ...
439 : !> \param max_iter ...
440 : !> \param c_rmat ...
441 : !> \param eps_localization ...
442 : !> \param tol_out ...
443 : !> \param jsweeps ...
444 : !> \param out_each ...
445 : !> \param c_zij_out ...
446 : !> \param grad_final ...
447 : ! **************************************************************************************************
448 0 : SUBROUTINE jacobi_rotations_serial_1(weights, c_zij, max_iter, c_rmat, eps_localization, &
449 0 : tol_out, jsweeps, out_each, c_zij_out, grad_final)
450 : REAL(KIND=dp), INTENT(IN) :: weights(:)
451 : TYPE(cp_cfm_type), INTENT(IN) :: c_zij(:)
452 : INTEGER, INTENT(IN) :: max_iter
453 : TYPE(cp_cfm_type), INTENT(IN) :: c_rmat
454 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_localization
455 : REAL(KIND=dp), INTENT(OUT), OPTIONAL :: tol_out
456 : INTEGER, INTENT(OUT), OPTIONAL :: jsweeps
457 : INTEGER, INTENT(IN), OPTIONAL :: out_each
458 : TYPE(cp_cfm_type), INTENT(IN), OPTIONAL :: c_zij_out(:)
459 : TYPE(cp_fm_type), INTENT(OUT), OPTIONAL, POINTER :: grad_final
460 :
461 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rotations_serial_1'
462 :
463 : COMPLEX(KIND=dp) :: mzii
464 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
465 : INTEGER :: dim2, handle, idim, istate, jstate, &
466 : nstate, sweeps, unit_nr
467 : REAL(KIND=dp) :: alpha, avg_spread_ii, ct, spread_ii, st, &
468 : sum_spread_ii, t1, t2, theta, tolerance
469 : TYPE(cp_cfm_type) :: c_rmat_local
470 : TYPE(cp_cfm_type), ALLOCATABLE :: c_zij_local(:)
471 :
472 0 : CALL timeset(routineN, handle)
473 :
474 0 : dim2 = SIZE(c_zij)
475 : NULLIFY (mii, mij, mjj)
476 0 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
477 :
478 0 : ALLOCATE (c_zij_local(dim2))
479 0 : CALL cp_cfm_create(c_rmat_local, c_rmat%matrix_struct)
480 0 : CALL cp_cfm_set_all(c_rmat_local, (0.0_dp, 0.0_dp), (1.0_dp, 0.0_dp))
481 0 : DO idim = 1, dim2
482 0 : CALL cp_cfm_create(c_zij_local(idim), c_zij(idim)%matrix_struct)
483 0 : c_zij_local(idim)%local_data = c_zij(idim)%local_data
484 : END DO
485 :
486 0 : CALL cp_cfm_get_info(c_rmat_local, nrow_global=nstate)
487 0 : tolerance = 1.0e10_dp
488 :
489 0 : IF (PRESENT(grad_final)) CALL cp_fm_set_all(grad_final, 0.0_dp)
490 :
491 0 : sweeps = 0
492 0 : IF (PRESENT(out_each)) THEN
493 0 : unit_nr = -1
494 0 : IF (c_rmat_local%matrix_struct%para_env%is_source()) THEN
495 0 : unit_nr = cp_logger_get_default_unit_nr()
496 : END IF
497 0 : alpha = 0.0_dp
498 0 : DO idim = 1, dim2
499 0 : alpha = alpha + weights(idim)
500 : END DO
501 : END IF
502 :
503 : ! do jacobi sweeps until converged
504 0 : DO WHILE (sweeps < max_iter)
505 0 : sweeps = sweeps + 1
506 0 : IF (PRESENT(eps_localization)) THEN
507 0 : IF (tolerance < eps_localization) EXIT
508 : END IF
509 0 : IF (PRESENT(out_each)) t1 = m_walltime()
510 :
511 0 : DO istate = 1, nstate
512 0 : DO jstate = istate + 1, nstate
513 0 : DO idim = 1, dim2
514 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mii(idim))
515 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, jstate, mij(idim))
516 0 : CALL cp_cfm_get_element(c_zij_local(idim), jstate, jstate, mjj(idim))
517 : END DO
518 0 : CALL get_angle(mii, mjj, mij, weights, theta)
519 0 : st = SIN(theta)
520 0 : ct = COS(theta)
521 0 : CALL rotate_zij(istate, jstate, st, ct, c_zij_local)
522 :
523 0 : CALL rotate_rmat(istate, jstate, st, ct, c_rmat_local)
524 : END DO
525 : END DO
526 :
527 0 : IF (PRESENT(grad_final)) THEN
528 0 : CALL check_tolerance(c_zij_local, weights, tolerance, grad=grad_final)
529 : ELSE
530 0 : CALL check_tolerance(c_zij_local, weights, tolerance)
531 : END IF
532 0 : IF (PRESENT(tol_out)) tol_out = tolerance
533 :
534 0 : IF (PRESENT(out_each)) THEN
535 0 : t2 = m_walltime()
536 0 : IF (unit_nr > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
537 0 : sum_spread_ii = 0.0_dp
538 0 : DO istate = 1, nstate
539 : spread_ii = 0.0_dp
540 0 : DO idim = 1, dim2
541 0 : CALL cp_cfm_get_element(c_zij_local(idim), istate, istate, mzii)
542 : spread_ii = spread_ii + weights(idim)* &
543 0 : ABS(mzii)**2/twopi/twopi
544 : END DO
545 0 : sum_spread_ii = sum_spread_ii + spread_ii
546 : END DO
547 0 : sum_spread_ii = alpha*nstate/twopi/twopi - sum_spread_ii
548 0 : avg_spread_ii = sum_spread_ii/nstate
549 : WRITE (unit_nr, '(T4,A,T26,A,T48,A,T64,A)') &
550 0 : "Iteration", "Avg. Spread_ii", "Tolerance", "Time"
551 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4,T60,F8.3)') &
552 0 : sweeps, avg_spread_ii, tolerance, t2 - t1
553 0 : CALL m_flush(unit_nr)
554 : END IF
555 0 : IF (PRESENT(jsweeps)) jsweeps = sweeps
556 : END IF
557 :
558 : END DO
559 :
560 0 : IF (PRESENT(c_zij_out)) THEN
561 0 : DO idim = 1, dim2
562 0 : CALL cp_cfm_to_cfm(c_zij_local(idim), c_zij_out(idim))
563 : END DO
564 : END IF
565 0 : CALL cp_cfm_to_cfm(c_rmat_local, c_rmat)
566 :
567 0 : DEALLOCATE (mii, mij, mjj)
568 0 : DO idim = 1, dim2
569 0 : CALL cp_cfm_release(c_zij_local(idim))
570 : END DO
571 0 : DEALLOCATE (c_zij_local)
572 0 : CALL cp_cfm_release(c_rmat_local)
573 :
574 0 : CALL timestop(handle)
575 :
576 0 : END SUBROUTINE jacobi_rotations_serial_1
577 : ! **************************************************************************************************
578 : !> \brief combine jacobi rotations (serial) and conjugate gradient with golden section line search
579 : !> for partially occupied wannier functions
580 : !> \param para_env ...
581 : !> \param weights ...
582 : !> \param zij ...
583 : !> \param vectors ...
584 : !> \param max_iter ...
585 : !> \param eps_localization ...
586 : !> \param iter ...
587 : !> \param out_each ...
588 : !> \param nextra ...
589 : !> \param do_cg ...
590 : !> \param nmo ...
591 : !> \param vectors_2 ...
592 : !> \param mos_guess ...
593 : ! **************************************************************************************************
594 2 : SUBROUTINE jacobi_cg_edf_ls(para_env, weights, zij, vectors, max_iter, eps_localization, &
595 : iter, out_each, nextra, do_cg, nmo, vectors_2, mos_guess)
596 : TYPE(mp_para_env_type), POINTER :: para_env
597 : REAL(KIND=dp), INTENT(IN) :: weights(:)
598 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
599 : INTEGER, INTENT(IN) :: max_iter
600 : REAL(KIND=dp), INTENT(IN) :: eps_localization
601 : INTEGER :: iter
602 : INTEGER, INTENT(IN) :: out_each, nextra
603 : LOGICAL, INTENT(IN) :: do_cg
604 : INTEGER, INTENT(IN), OPTIONAL :: nmo
605 : TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: vectors_2, mos_guess
606 :
607 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_cg_edf_ls'
608 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
609 : czero = (0.0_dp, 0.0_dp)
610 : REAL(KIND=dp), PARAMETER :: gold_sec = 0.3819_dp
611 :
612 : COMPLEX(KIND=dp) :: cnorm2_Gct, cnorm2_Gct_cross, mzii
613 2 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_cmat
614 2 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: arr_zii
615 2 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: matrix_zii
616 : INTEGER :: dim2, handle, icinit, idim, istate, line_search_count, line_searches, lsl, lsm, &
617 : lsr, miniter, nao, ndummy, nocc, norextra, northo, nstate, unit_nr
618 : INTEGER, DIMENSION(1) :: iloc
619 : LOGICAL :: do_cinit_mo, do_cinit_random, &
620 : do_U_guess_mo, new_direction
621 : REAL(KIND=dp) :: alpha, avg_spread_ii, beta, beta_pr, ds, ds_min, mintol, norm, norm2_Gct, &
622 : norm2_Gct_cross, norm2_old, spread_ii, spread_sum, sum_spread_ii, t1, tol, tolc, weight
623 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: sum_spread
624 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: tmp_mat, tmp_mat_1
625 : REAL(KIND=dp), DIMENSION(50) :: energy, pos
626 2 : REAL(KIND=dp), DIMENSION(:), POINTER :: tmp_arr
627 : TYPE(cp_blacs_env_type), POINTER :: context
628 : TYPE(cp_cfm_type) :: c_tilde, ctrans_lambda, Gct_old, &
629 : grad_ctilde, skc, tmp_cfm, tmp_cfm_1, &
630 : tmp_cfm_2, U, UL, V, VL, zdiag
631 2 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij, zij_0
632 : TYPE(cp_fm_struct_type), POINTER :: tmp_fm_struct
633 : TYPE(cp_fm_type) :: id_nextra, matrix_U, matrix_V, &
634 : matrix_V_all, rmat, tmp_fm, vectors_all
635 :
636 2 : CALL timeset(routineN, handle)
637 :
638 2 : dim2 = SIZE(zij, 2)
639 2 : NULLIFY (context)
640 2 : NULLIFY (matrix_zii, arr_zii)
641 2 : NULLIFY (tmp_fm_struct)
642 2 : NULLIFY (tmp_arr)
643 :
644 12 : ALLOCATE (c_zij(dim2))
645 :
646 2 : CALL cp_fm_get_info(zij(1, 1), nrow_global=nstate)
647 :
648 6 : ALLOCATE (sum_spread(nstate))
649 8 : ALLOCATE (matrix_zii(nstate, dim2))
650 266 : matrix_zii = czero
651 88 : sum_spread = 0.0_dp
652 :
653 : alpha = 0.0_dp
654 8 : DO idim = 1, dim2
655 6 : alpha = alpha + weights(idim)
656 6 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
657 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
658 5813 : zij(2, idim)%local_data, dp)
659 : END DO
660 :
661 10 : ALLOCATE (zij_0(dim2))
662 :
663 2 : CALL cp_cfm_create(U, zij(1, 1)%matrix_struct)
664 2 : CALL cp_fm_create(matrix_U, zij(1, 1)%matrix_struct)
665 :
666 2 : CALL cp_cfm_set_all(U, czero, cone)
667 2 : CALL cp_fm_set_all(matrix_U, 0.0_dp, 1.0_dp)
668 :
669 2 : CALL cp_fm_get_info(vectors, nrow_global=nao)
670 2 : IF (nextra > 0) THEN
671 2 : IF (PRESENT(mos_guess)) THEN
672 2 : do_cinit_random = .FALSE.
673 2 : do_cinit_mo = .TRUE.
674 2 : CALL cp_fm_get_info(mos_guess, ncol_global=ndummy)
675 : ELSE
676 0 : do_cinit_random = .TRUE.
677 0 : do_cinit_mo = .FALSE.
678 0 : ndummy = nstate
679 : END IF
680 :
681 : IF (do_cinit_random) THEN
682 : icinit = 1
683 : do_U_guess_mo = .FALSE.
684 : ELSEIF (do_cinit_mo) THEN
685 2 : icinit = 2
686 2 : do_U_guess_mo = .TRUE.
687 : END IF
688 :
689 2 : nocc = nstate - nextra
690 2 : northo = nmo - nocc
691 2 : norextra = nmo - nstate
692 2 : CALL cp_fm_struct_get(zij(1, 1)%matrix_struct, context=context)
693 :
694 8 : ALLOCATE (tmp_cmat(nstate, nstate))
695 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nmo, &
696 2 : para_env=para_env, context=context)
697 8 : DO idim = 1, dim2
698 6 : CALL cp_cfm_create(zij_0(idim), tmp_fm_struct)
699 6 : CALL cp_cfm_set_all(zij_0(idim), czero, cone)
700 6 : CALL cp_cfm_get_submatrix(c_zij(idim), tmp_cmat)
701 8 : CALL cp_cfm_set_submatrix(zij_0(idim), tmp_cmat)
702 : END DO
703 2 : CALL cp_fm_struct_release(tmp_fm_struct)
704 2 : DEALLOCATE (tmp_cmat)
705 :
706 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=nstate, &
707 2 : para_env=para_env, context=context)
708 2 : CALL cp_cfm_create(V, tmp_fm_struct)
709 2 : CALL cp_fm_create(matrix_V, tmp_fm_struct)
710 2 : CALL cp_cfm_create(zdiag, tmp_fm_struct)
711 2 : CALL cp_fm_create(rmat, tmp_fm_struct)
712 2 : CALL cp_fm_struct_release(tmp_fm_struct)
713 2 : CALL cp_cfm_set_all(V, czero, cone)
714 2 : CALL cp_fm_set_all(matrix_V, 0.0_dp, 1.0_dp)
715 :
716 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nmo, ncol_global=ndummy, &
717 2 : para_env=para_env, context=context)
718 2 : CALL cp_fm_create(matrix_V_all, tmp_fm_struct)
719 2 : CALL cp_fm_struct_release(tmp_fm_struct)
720 2 : CALL cp_fm_set_all(matrix_V_all, 0._dp, 1._dp)
721 :
722 6 : ALLOCATE (arr_zii(nstate))
723 :
724 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
725 2 : para_env=para_env, context=context)
726 2 : CALL cp_cfm_create(c_tilde, tmp_fm_struct)
727 2 : CALL cp_cfm_create(grad_ctilde, tmp_fm_struct)
728 2 : CALL cp_cfm_create(Gct_old, tmp_fm_struct)
729 2 : CALL cp_cfm_create(skc, tmp_fm_struct)
730 2 : CALL cp_fm_struct_release(tmp_fm_struct)
731 2 : CALL cp_cfm_set_all(c_tilde, czero)
732 2 : CALL cp_cfm_set_all(Gct_old, czero)
733 2 : CALL cp_cfm_set_all(skc, czero)
734 :
735 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nstate, &
736 2 : para_env=para_env, context=context)
737 2 : CALL cp_cfm_create(VL, tmp_fm_struct)
738 2 : CALL cp_cfm_set_all(VL, czero)
739 2 : CALL cp_fm_struct_release(tmp_fm_struct)
740 :
741 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nextra, &
742 2 : para_env=para_env, context=context)
743 2 : CALL cp_fm_create(id_nextra, tmp_fm_struct)
744 2 : CALL cp_cfm_create(ctrans_lambda, tmp_fm_struct)
745 2 : CALL cp_fm_struct_release(tmp_fm_struct)
746 2 : CALL cp_cfm_set_all(ctrans_lambda, czero)
747 2 : CALL cp_fm_set_all(id_nextra, 0.0_dp, 1.0_dp)
748 :
749 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nextra, ncol_global=nstate, &
750 2 : para_env=para_env, context=context)
751 2 : CALL cp_cfm_create(UL, tmp_fm_struct)
752 2 : CALL cp_fm_struct_release(tmp_fm_struct)
753 2 : CALL cp_cfm_set_all(UL, czero)
754 :
755 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=nao, ncol_global=nmo, &
756 2 : para_env=para_env, context=context)
757 2 : CALL cp_fm_create(vectors_all, tmp_fm_struct)
758 2 : CALL cp_fm_struct_release(tmp_fm_struct)
759 8 : ALLOCATE (tmp_mat(nao, nstate))
760 2 : CALL cp_fm_get_submatrix(vectors, tmp_mat)
761 2 : CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, 1, nao, nstate)
762 2 : DEALLOCATE (tmp_mat)
763 8 : ALLOCATE (tmp_mat(nao, norextra))
764 2 : CALL cp_fm_get_submatrix(vectors_2, tmp_mat)
765 2 : CALL cp_fm_set_submatrix(vectors_all, tmp_mat, 1, nstate + 1, nao, norextra)
766 2 : DEALLOCATE (tmp_mat)
767 :
768 : ! initialize c_tilde
769 14 : SELECT CASE (icinit)
770 : CASE (1) ! random coefficients
771 : !WRITE (*, *) "RANDOM INITIAL GUESS FOR C"
772 0 : CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
773 0 : CALL cp_fm_init_random(tmp_fm, nextra)
774 0 : CALL ortho_vectors(tmp_fm)
775 0 : c_tilde%local_data = tmp_fm%local_data
776 0 : CALL cp_fm_release(tmp_fm)
777 0 : ALLOCATE (tmp_cmat(northo, nextra))
778 0 : CALL cp_cfm_get_submatrix(c_tilde, tmp_cmat)
779 0 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, nocc + 1, northo, nextra)
780 0 : DEALLOCATE (tmp_cmat)
781 : CASE (2) ! MO based coeffs
782 2 : CALL parallel_gemm("T", "N", nmo, ndummy, nao, 1.0_dp, vectors_all, mos_guess, 0.0_dp, matrix_V_all)
783 6 : ALLOCATE (tmp_arr(nmo))
784 8 : ALLOCATE (tmp_mat(nmo, ndummy))
785 8 : ALLOCATE (tmp_mat_1(nmo, nstate))
786 : ! normalize matrix_V_all
787 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat)
788 88 : DO istate = 1, ndummy
789 4558 : tmp_arr(:) = tmp_mat(:, istate)
790 4558 : norm = norm2(tmp_arr)
791 4558 : tmp_arr(:) = tmp_arr(:)/norm
792 4560 : tmp_mat(:, istate) = tmp_arr(:)
793 : END DO
794 2 : CALL cp_fm_set_submatrix(matrix_V_all, tmp_mat)
795 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat_1, 1, 1, nmo, nstate)
796 2 : CALL cp_fm_set_submatrix(matrix_V, tmp_mat_1)
797 2 : DEALLOCATE (tmp_arr, tmp_mat, tmp_mat_1)
798 2 : CALL cp_fm_to_cfm(msourcer=matrix_V, mtarget=V)
799 8 : ALLOCATE (tmp_mat(northo, ndummy))
800 8 : ALLOCATE (tmp_mat_1(northo, nextra))
801 2 : CALL cp_fm_get_submatrix(matrix_V_all, tmp_mat, nocc + 1, 1, northo, ndummy)
802 6 : ALLOCATE (tmp_arr(ndummy))
803 88 : tmp_arr = 0.0_dp
804 88 : DO istate = 1, ndummy
805 1034 : tmp_arr(istate) = norm2(tmp_mat(:, istate))
806 : END DO
807 : ! find edfs
808 6 : DO istate = 1, nextra
809 180 : iloc = MAXLOC(tmp_arr)
810 48 : tmp_mat_1(:, istate) = tmp_mat(:, iloc(1))
811 6 : tmp_arr(iloc(1)) = 0.0_dp
812 : END DO
813 :
814 2 : DEALLOCATE (tmp_arr, tmp_mat)
815 :
816 : CALL cp_fm_struct_create(tmp_fm_struct, nrow_global=northo, ncol_global=nextra, &
817 2 : para_env=para_env, context=context)
818 2 : CALL cp_fm_create(tmp_fm, tmp_fm_struct)
819 2 : CALL cp_fm_struct_release(tmp_fm_struct)
820 2 : CALL cp_fm_set_submatrix(tmp_fm, tmp_mat_1)
821 2 : DEALLOCATE (tmp_mat_1)
822 2 : CALL ortho_vectors(tmp_fm)
823 2 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
824 2 : CALL cp_fm_release(tmp_fm)
825 : ! initialize U
826 2 : IF (do_U_guess_mo) THEN
827 8 : ALLOCATE (tmp_cmat(nocc, nstate))
828 2 : CALL cp_cfm_get_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
829 2 : CALL cp_cfm_set_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
830 2 : DEALLOCATE (tmp_cmat)
831 8 : ALLOCATE (tmp_cmat(northo, nstate))
832 2 : CALL cp_cfm_get_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
833 2 : CALL cp_cfm_set_submatrix(VL, tmp_cmat, 1, 1, northo, nstate)
834 2 : DEALLOCATE (tmp_cmat)
835 2 : CALL parallel_gemm("C", "N", nextra, nstate, northo, cone, c_tilde, VL, czero, UL)
836 8 : ALLOCATE (tmp_cmat(nextra, nstate))
837 2 : CALL cp_cfm_get_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
838 2 : CALL cp_cfm_set_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
839 2 : DEALLOCATE (tmp_cmat)
840 2 : CALL cp_fm_create(tmp_fm, U%matrix_struct)
841 1937 : tmp_fm%local_data = REAL(U%local_data, KIND=dp)
842 2 : CALL ortho_vectors(tmp_fm)
843 2 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=U)
844 2 : CALL cp_fm_release(tmp_fm)
845 4 : CALL cp_cfm_to_fm(U, matrix_U)
846 : END IF
847 : ! reevaluate V
848 8 : ALLOCATE (tmp_cmat(nocc, nstate))
849 2 : CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
850 2 : CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
851 2 : DEALLOCATE (tmp_cmat)
852 8 : ALLOCATE (tmp_cmat(nextra, nstate))
853 2 : CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
854 2 : CALL cp_cfm_set_submatrix(UL, tmp_cmat, 1, 1, nextra, nstate)
855 2 : DEALLOCATE (tmp_cmat)
856 2 : CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
857 8 : ALLOCATE (tmp_cmat(northo, nstate))
858 2 : CALL cp_cfm_get_submatrix(VL, tmp_cmat)
859 2 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
860 6 : DEALLOCATE (tmp_cmat)
861 : END SELECT
862 : ELSE
863 0 : DO idim = 1, dim2
864 0 : CALL cp_cfm_create(zij_0(idim), zij(1, 1)%matrix_struct)
865 0 : CALL cp_cfm_to_cfm(c_zij(idim), zij_0(idim))
866 : END DO
867 0 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
868 0 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
869 : END IF
870 :
871 2 : unit_nr = -1
872 2 : IF (rmat%matrix_struct%para_env%is_source()) THEN
873 1 : unit_nr = cp_logger_get_default_unit_nr()
874 1 : WRITE (unit_nr, '(T4,A )') " Localization by combined Jacobi rotations and Non-Linear Conjugate Gradient"
875 : END IF
876 :
877 2 : norm2_old = 1.0E30_dp
878 2 : ds_min = 1.0_dp
879 2 : new_direction = .TRUE.
880 2 : iter = 0
881 2 : line_searches = 0
882 2 : line_search_count = 0
883 2 : tol = 1.0E+20_dp
884 2 : mintol = 1.0E+10_dp
885 2 : miniter = 0
886 :
887 : !IF (nextra > 0) WRITE(*,*) 'random_guess, MO_guess, U_guess, conjugate_gradient: ', &
888 : ! do_cinit_random, do_cinit_mo, do_U_guess_mo, do_cg
889 :
890 : ! do conjugate gradient until converged
891 34 : DO WHILE (iter < max_iter)
892 34 : iter = iter + 1
893 : !WRITE(*,*) 'iter = ', iter
894 34 : t1 = m_walltime()
895 :
896 34 : IF (iter > 1) THEN
897 : ! comput U
898 32 : CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
899 32 : CALL cp_cfm_create(tmp_cfm_2, zij(1, 1)%matrix_struct)
900 32 : IF (para_env%num_pe == 1) THEN
901 0 : CALL jacobi_rotations_serial_1(weights, c_zij, 1, tmp_cfm_2, tol_out=tol)
902 : ELSE
903 32 : CALL jacobi_rot_para_1(weights, c_zij, para_env, 1, tmp_cfm_2, tol_out=tol)
904 : END IF
905 32 : CALL parallel_gemm('N', 'N', nstate, nstate, nstate, cone, U, tmp_cfm_2, czero, tmp_cfm)
906 32 : CALL cp_cfm_to_cfm(tmp_cfm, U)
907 32 : CALL cp_cfm_release(tmp_cfm)
908 32 : CALL cp_cfm_release(tmp_cfm_2)
909 : END IF
910 :
911 34 : IF (nextra > 0) THEN
912 136 : ALLOCATE (tmp_cmat(nextra, nstate))
913 34 : CALL cp_cfm_get_submatrix(U, tmp_cmat, nocc + 1, 1, nextra, nstate)
914 34 : CALL cp_cfm_set_submatrix(UL, tmp_cmat)
915 34 : DEALLOCATE (tmp_cmat)
916 34 : IF (iter > 1) THEN
917 : ! orthonormalize c_tilde
918 32 : CALL cp_fm_create(tmp_fm, c_tilde%matrix_struct)
919 448 : tmp_fm%local_data = REAL(c_tilde%local_data, KIND=dp)
920 32 : CALL ortho_vectors(tmp_fm)
921 32 : CALL cp_fm_to_cfm(msourcer=tmp_fm, mtarget=c_tilde)
922 32 : CALL cp_fm_release(tmp_fm)
923 :
924 128 : ALLOCATE (tmp_cmat(nocc, nstate))
925 32 : CALL cp_cfm_get_submatrix(U, tmp_cmat, 1, 1, nocc, nstate)
926 32 : CALL cp_cfm_set_submatrix(V, tmp_cmat, 1, 1, nocc, nstate)
927 32 : DEALLOCATE (tmp_cmat)
928 32 : CALL parallel_gemm("N", "N", northo, nstate, nextra, cone, c_tilde, UL, czero, VL)
929 128 : ALLOCATE (tmp_cmat(northo, nstate))
930 32 : CALL cp_cfm_get_submatrix(VL, tmp_cmat)
931 32 : CALL cp_cfm_set_submatrix(V, tmp_cmat, nocc + 1, 1, northo, nstate)
932 64 : DEALLOCATE (tmp_cmat)
933 : END IF
934 :
935 : ! reset if new_direction
936 34 : IF (new_direction .AND. MOD(line_searches, 20) == 5) THEN
937 0 : CALL cp_cfm_set_all(skc, czero)
938 0 : CALL cp_cfm_set_all(Gct_old, czero)
939 0 : norm2_old = 1.0E30_dp
940 : END IF
941 :
942 34 : CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
943 34 : CALL cp_cfm_to_cfm(V, tmp_cfm)
944 34 : CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
945 68 : ndummy = nmo
946 : ELSE
947 0 : CALL cp_cfm_create(tmp_cfm, zij(1, 1)%matrix_struct)
948 0 : CALL cp_cfm_to_cfm(U, tmp_cfm)
949 0 : CALL cp_cfm_create(tmp_cfm_1, zij(1, 1)%matrix_struct)
950 0 : ndummy = nstate
951 : END IF
952 : ! update z_ij
953 136 : DO idim = 1, dim2
954 : ! 'tmp_cfm_1 = zij_0*tmp_cfm'
955 : CALL parallel_gemm("N", "N", ndummy, nstate, ndummy, cone, zij_0(idim), &
956 102 : tmp_cfm, czero, tmp_cfm_1)
957 : ! 'c_zij = tmp_cfm_dagg*tmp_cfm_1'
958 : CALL parallel_gemm("C", "N", nstate, nstate, ndummy, cone, tmp_cfm, tmp_cfm_1, &
959 136 : czero, c_zij(idim))
960 : END DO
961 34 : CALL cp_cfm_release(tmp_cfm)
962 34 : CALL cp_cfm_release(tmp_cfm_1)
963 : ! compute spread
964 1496 : DO istate = 1, nstate
965 1462 : spread_ii = 0.0_dp
966 5848 : DO idim = 1, dim2
967 4386 : CALL cp_cfm_get_element(c_zij(idim), istate, istate, mzii)
968 : spread_ii = spread_ii + weights(idim)* &
969 4386 : ABS(mzii)**2/twopi/twopi
970 5848 : matrix_zii(istate, idim) = mzii
971 : END DO
972 : !WRITE(*,*) 'spread_ii', spread_ii
973 1496 : sum_spread(istate) = spread_ii
974 : END DO
975 34 : CALL c_zij(1)%matrix_struct%para_env%sum(spread_ii)
976 34 : spread_sum = accurate_sum(sum_spread)
977 :
978 34 : IF (nextra > 0) THEN
979 : ! update c_tilde
980 34 : CALL cp_cfm_set_all(zdiag, czero)
981 34 : CALL cp_cfm_set_all(grad_ctilde, czero)
982 34 : CALL cp_cfm_create(tmp_cfm, V%matrix_struct)
983 34 : CALL cp_cfm_set_all(tmp_cfm, czero)
984 34 : CALL cp_cfm_create(tmp_cfm_1, V%matrix_struct)
985 34 : CALL cp_cfm_set_all(tmp_cfm_1, czero)
986 136 : ALLOCATE (tmp_cmat(northo, nstate))
987 136 : DO idim = 1, dim2
988 102 : weight = weights(idim)
989 8874 : arr_zii = matrix_zii(:, idim)
990 : ! tmp_cfm = zij_0*V
991 : CALL parallel_gemm("N", "N", nmo, nstate, nmo, cone, &
992 102 : zij_0(idim), V, czero, tmp_cfm)
993 : ! tmp_cfm = tmp_cfm*diag_zij_dagg
994 4488 : CALL cp_cfm_column_scale(tmp_cfm, CONJG(arr_zii))
995 : ! tmp_cfm_1 = tmp_cfm*U_dagg
996 : CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
997 102 : U, czero, tmp_cfm_1)
998 102 : CALL cp_cfm_scale(weight, tmp_cfm_1)
999 : ! zdiag = zdiag + tmp_cfm_1'
1000 102 : CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1001 :
1002 : ! tmp_cfm = zij_0_dagg*V
1003 : CALL parallel_gemm("C", "N", nmo, nstate, nmo, cone, &
1004 102 : zij_0(idim), V, czero, tmp_cfm)
1005 :
1006 : ! tmp_cfm = tmp_cfm*diag_zij
1007 102 : CALL cp_cfm_column_scale(tmp_cfm, arr_zii)
1008 : ! tmp_cfm_1 = tmp_cfm*U_dagg
1009 : CALL parallel_gemm("N", "C", nmo, nstate, nstate, cone, tmp_cfm, &
1010 102 : U, czero, tmp_cfm_1)
1011 102 : CALL cp_cfm_scale(weight, tmp_cfm_1)
1012 : ! zdiag = zdiag + tmp_cfm_1'
1013 136 : CALL cp_cfm_scale_and_add(cone, zdiag, cone, tmp_cfm_1)
1014 : END DO ! idim
1015 34 : CALL cp_cfm_release(tmp_cfm)
1016 34 : CALL cp_cfm_release(tmp_cfm_1)
1017 34 : DEALLOCATE (tmp_cmat)
1018 136 : ALLOCATE (tmp_cmat(northo, nextra))
1019 : CALL cp_cfm_get_submatrix(zdiag, tmp_cmat, nocc + 1, nocc + 1, &
1020 34 : northo, nextra, .FALSE.)
1021 : ! 'grad_ctilde'
1022 34 : CALL cp_cfm_set_submatrix(grad_ctilde, tmp_cmat)
1023 34 : DEALLOCATE (tmp_cmat)
1024 : ! ctrans_lambda = c_tilde_dagg*grad_ctilde
1025 34 : CALL parallel_gemm("C", "N", nextra, nextra, northo, cone, c_tilde, grad_ctilde, czero, ctrans_lambda)
1026 : !WRITE(*,*) "norm(ctrans_lambda) = ", cp_cfm_norm(ctrans_lambda, "F")
1027 : ! 'grad_ctilde = - c_tilde*ctrans_lambda + grad_ctilde'
1028 102 : CALL parallel_gemm("N", "N", northo, nextra, nextra, -cone, c_tilde, ctrans_lambda, cone, grad_ctilde)
1029 : END IF ! nextra > 0
1030 :
1031 : ! tolerance
1032 34 : IF (nextra > 0) THEN
1033 : tolc = 0.0_dp
1034 34 : CALL cp_fm_create(tmp_fm, grad_ctilde%matrix_struct)
1035 34 : CALL cp_cfm_to_fm(grad_ctilde, tmp_fm)
1036 34 : CALL cp_fm_maxabsval(tmp_fm, tolc)
1037 34 : CALL cp_fm_release(tmp_fm)
1038 : !WRITE(*,*) 'tolc = ', tolc
1039 34 : tol = tol + tolc
1040 : END IF
1041 : !WRITE(*,*) 'tol = ', tol
1042 :
1043 36 : IF (nextra > 0) THEN
1044 : !WRITE(*,*) 'new_direction: ', new_direction
1045 34 : IF (new_direction) THEN
1046 6 : line_searches = line_searches + 1
1047 6 : IF (mintol > tol) THEN
1048 4 : mintol = tol
1049 4 : miniter = iter
1050 : END IF
1051 :
1052 6 : IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
1053 0 : sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1054 0 : avg_spread_ii = sum_spread_ii/nstate
1055 : WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1056 0 : "Iteration", "Avg. Spread_ii", "Tolerance"
1057 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1058 0 : iter, avg_spread_ii, tol
1059 0 : CALL m_flush(unit_nr)
1060 : END IF
1061 6 : IF (tol < eps_localization) EXIT
1062 :
1063 4 : IF (do_cg) THEN
1064 : cnorm2_Gct = czero
1065 : cnorm2_Gct_cross = czero
1066 4 : CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct_cross)
1067 4 : norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
1068 56 : Gct_old%local_data = grad_ctilde%local_data
1069 4 : CALL cp_cfm_trace(grad_ctilde, Gct_old, cnorm2_Gct)
1070 4 : norm2_Gct = REAL(cnorm2_Gct, KIND=dp)
1071 : ! compute beta_pr
1072 4 : beta_pr = (norm2_Gct - norm2_Gct_cross)/norm2_old
1073 4 : norm2_old = norm2_Gct
1074 4 : beta = MAX(0.0_dp, beta_pr)
1075 : !WRITE(*,*) 'beta = ', beta
1076 : ! compute skc / ska = beta * skc / ska + grad_ctilde / G
1077 4 : CALL cp_cfm_scale(beta, skc)
1078 4 : CALL cp_cfm_scale_and_add(cone, skc, cone, Gct_old)
1079 4 : CALL cp_cfm_trace(skc, Gct_old, cnorm2_Gct_cross)
1080 4 : norm2_Gct_cross = REAL(cnorm2_Gct_cross, KIND=dp)
1081 4 : IF (norm2_Gct_cross <= 0.0_dp) THEN ! back to steepest ascent
1082 0 : CALL cp_cfm_scale_and_add(czero, skc, cone, Gct_old)
1083 : END IF
1084 : ELSE
1085 0 : CALL cp_cfm_scale_and_add(czero, skc, cone, grad_ctilde)
1086 : END IF
1087 : line_search_count = 0
1088 : END IF
1089 :
1090 32 : line_search_count = line_search_count + 1
1091 : !WRITE(*,*) 'line_search_count = ', line_search_count
1092 32 : energy(line_search_count) = spread_sum
1093 :
1094 : ! gold line search
1095 32 : new_direction = .FALSE.
1096 32 : IF (line_search_count == 1) THEN
1097 4 : lsl = 1
1098 4 : lsr = 0
1099 4 : lsm = 1
1100 4 : pos(1) = 0.0_dp
1101 4 : pos(2) = ds_min/gold_sec
1102 4 : ds = pos(2)
1103 : ELSE
1104 28 : IF (line_search_count == 50) THEN
1105 0 : IF (ABS(energy(line_search_count) - energy(line_search_count - 1)) < 1.0E-4_dp) THEN
1106 0 : CPWARN("Line search failed to converge properly")
1107 0 : ds_min = 0.1_dp
1108 0 : new_direction = .TRUE.
1109 : ds = pos(line_search_count)
1110 0 : line_search_count = 0
1111 : ELSE
1112 0 : CPABORT("No. of line searches exceeds 50")
1113 : END IF
1114 : ELSE
1115 28 : IF (lsr == 0) THEN
1116 28 : IF (energy(line_search_count - 1) > energy(line_search_count)) THEN
1117 0 : lsr = line_search_count
1118 0 : pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
1119 : ELSE
1120 28 : lsl = lsm
1121 28 : lsm = line_search_count
1122 28 : pos(line_search_count + 1) = pos(line_search_count)/gold_sec
1123 : END IF
1124 : ELSE
1125 0 : IF (pos(line_search_count) < pos(lsm)) THEN
1126 0 : IF (energy(line_search_count) > energy(lsm)) THEN
1127 : lsr = lsm
1128 : lsm = line_search_count
1129 : ELSE
1130 0 : lsl = line_search_count
1131 : END IF
1132 : ELSE
1133 0 : IF (energy(line_search_count) > energy(lsm)) THEN
1134 : lsl = lsm
1135 : lsm = line_search_count
1136 : ELSE
1137 0 : lsr = line_search_count
1138 : END IF
1139 : END IF
1140 0 : IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
1141 0 : pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
1142 : ELSE
1143 0 : pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
1144 : END IF
1145 0 : IF ((pos(lsr) - pos(lsl)) < 1.0E-3_dp*pos(lsr)) THEN
1146 0 : new_direction = .TRUE.
1147 : END IF
1148 : END IF ! lsr .eq. 0
1149 : END IF ! line_search_count .eq. 50
1150 : ! now go to the suggested point
1151 28 : ds = pos(line_search_count + 1) - pos(line_search_count)
1152 : !WRITE(*,*) 'lsl, lsr, lsm, ds = ', lsl, lsr, lsm, ds
1153 28 : IF ((ABS(ds) < 1.0E-10_dp) .AND. (lsl == 1)) THEN
1154 0 : new_direction = .TRUE.
1155 0 : ds_min = 0.5_dp/alpha
1156 28 : ELSEIF (ABS(ds) > 10.0_dp) THEN
1157 4 : new_direction = .TRUE.
1158 4 : ds_min = 0.5_dp/alpha
1159 : ELSE
1160 : ds_min = pos(line_search_count + 1)
1161 : END IF
1162 : END IF ! first step
1163 : ! 'c_tilde = c_tilde + d*skc'
1164 32 : CALL cp_cfm_scale(ds, skc)
1165 32 : CALL cp_cfm_scale_and_add(cone, c_tilde, cone, skc)
1166 : ELSE
1167 0 : IF (mintol > tol) THEN
1168 0 : mintol = tol
1169 0 : miniter = iter
1170 : END IF
1171 0 : IF (unit_nr > 0 .AND. MODULO(iter, out_each) == 0) THEN
1172 0 : sum_spread_ii = alpha*nstate/twopi/twopi - spread_sum
1173 0 : avg_spread_ii = sum_spread_ii/nstate
1174 : WRITE (unit_nr, '(T4,A,T26,A,T48,A)') &
1175 0 : "Iteration", "Avg. Spread_ii", "Tolerance"
1176 : WRITE (unit_nr, '(T4,I7,T20,F20.10,T45,E12.4)') &
1177 0 : iter, avg_spread_ii, tol
1178 0 : CALL m_flush(unit_nr)
1179 : END IF
1180 0 : IF (tol < eps_localization) EXIT
1181 : END IF ! nextra > 0
1182 :
1183 : END DO ! iteration
1184 :
1185 2 : IF ((unit_nr > 0) .AND. (iter == max_iter)) THEN
1186 0 : WRITE (unit_nr, '(T4,A,T4,A)') "Min. Itr.", "Min. Tol."
1187 0 : WRITE (unit_nr, '(T4,I7,T4,E12.4)') miniter, mintol
1188 0 : CALL m_flush(unit_nr)
1189 : END IF
1190 :
1191 2 : CALL cp_cfm_to_fm(U, matrix_U)
1192 :
1193 2 : IF (nextra > 0) THEN
1194 2324 : rmat%local_data = REAL(V%local_data, KIND=dp)
1195 2 : CALL rotate_orbitals_edf(rmat, vectors_all, vectors)
1196 :
1197 2 : CALL cp_cfm_release(c_tilde)
1198 2 : CALL cp_cfm_release(grad_ctilde)
1199 2 : CALL cp_cfm_release(Gct_old)
1200 2 : CALL cp_cfm_release(skc)
1201 2 : CALL cp_cfm_release(UL)
1202 2 : CALL cp_cfm_release(zdiag)
1203 2 : CALL cp_cfm_release(ctrans_lambda)
1204 2 : CALL cp_fm_release(id_nextra)
1205 2 : CALL cp_fm_release(vectors_all)
1206 2 : CALL cp_cfm_release(V)
1207 2 : CALL cp_fm_release(matrix_V)
1208 2 : CALL cp_fm_release(matrix_V_all)
1209 2 : CALL cp_cfm_release(VL)
1210 2 : DEALLOCATE (arr_zii)
1211 : ELSE
1212 0 : rmat%local_data = matrix_U%local_data
1213 0 : CALL rotate_orbitals(rmat, vectors)
1214 : END IF
1215 8 : DO idim = 1, dim2
1216 8 : CALL cp_cfm_release(zij_0(idim))
1217 : END DO
1218 2 : DEALLOCATE (zij_0)
1219 :
1220 8 : DO idim = 1, dim2
1221 5811 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
1222 5811 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
1223 8 : CALL cp_cfm_release(c_zij(idim))
1224 : END DO
1225 2 : DEALLOCATE (c_zij)
1226 2 : CALL cp_fm_release(rmat)
1227 2 : CALL cp_cfm_release(U)
1228 2 : CALL cp_fm_release(matrix_U)
1229 2 : DEALLOCATE (matrix_zii, sum_spread)
1230 :
1231 2 : CALL timestop(handle)
1232 :
1233 10 : END SUBROUTINE jacobi_cg_edf_ls
1234 :
1235 : ! **************************************************************************************************
1236 : !> \brief ...
1237 : !> \param vmatrix ...
1238 : ! **************************************************************************************************
1239 108 : SUBROUTINE ortho_vectors(vmatrix)
1240 :
1241 : TYPE(cp_fm_type), INTENT(IN) :: vmatrix
1242 :
1243 : CHARACTER(LEN=*), PARAMETER :: routineN = 'ortho_vectors'
1244 :
1245 : INTEGER :: handle, n, ncol
1246 : TYPE(cp_fm_struct_type), POINTER :: fm_struct_tmp
1247 : TYPE(cp_fm_type) :: overlap_vv
1248 :
1249 36 : CALL timeset(routineN, handle)
1250 :
1251 36 : NULLIFY (fm_struct_tmp)
1252 :
1253 36 : CALL cp_fm_get_info(matrix=vmatrix, nrow_global=n, ncol_global=ncol)
1254 :
1255 : CALL cp_fm_struct_create(fm_struct_tmp, nrow_global=ncol, ncol_global=ncol, &
1256 : para_env=vmatrix%matrix_struct%para_env, &
1257 36 : context=vmatrix%matrix_struct%context)
1258 36 : CALL cp_fm_create(overlap_vv, fm_struct_tmp, "overlap_vv")
1259 36 : CALL cp_fm_struct_release(fm_struct_tmp)
1260 :
1261 36 : CALL parallel_gemm('T', 'N', ncol, ncol, n, 1.0_dp, vmatrix, vmatrix, 0.0_dp, overlap_vv)
1262 36 : CALL cp_fm_cholesky_decompose(overlap_vv)
1263 36 : CALL cp_fm_triangular_multiply(overlap_vv, vmatrix, n_cols=ncol, side='R', invert_tr=.TRUE.)
1264 :
1265 36 : CALL cp_fm_release(overlap_vv)
1266 :
1267 36 : CALL timestop(handle)
1268 :
1269 36 : END SUBROUTINE ortho_vectors
1270 :
1271 : ! **************************************************************************************************
1272 : !> \brief ...
1273 : !> \param istate ...
1274 : !> \param jstate ...
1275 : !> \param st ...
1276 : !> \param ct ...
1277 : !> \param zij ...
1278 : ! **************************************************************************************************
1279 0 : SUBROUTINE rotate_zij(istate, jstate, st, ct, zij)
1280 : INTEGER, INTENT(IN) :: istate, jstate
1281 : REAL(KIND=dp), INTENT(IN) :: st, ct
1282 : TYPE(cp_cfm_type) :: zij(:)
1283 :
1284 : INTEGER :: id
1285 :
1286 : ! Locals
1287 :
1288 0 : DO id = 1, SIZE(zij, 1)
1289 0 : CALL cp_cfm_rot_cols(zij(id), istate, jstate, ct, st)
1290 0 : CALL cp_cfm_rot_rows(zij(id), istate, jstate, ct, st)
1291 : END DO
1292 :
1293 0 : END SUBROUTINE rotate_zij
1294 : ! **************************************************************************************************
1295 : !> \brief ...
1296 : !> \param istate ...
1297 : !> \param jstate ...
1298 : !> \param st ...
1299 : !> \param ct ...
1300 : !> \param rmat ...
1301 : ! **************************************************************************************************
1302 0 : SUBROUTINE rotate_rmat(istate, jstate, st, ct, rmat)
1303 : INTEGER, INTENT(IN) :: istate, jstate
1304 : REAL(KIND=dp), INTENT(IN) :: st, ct
1305 : TYPE(cp_cfm_type), INTENT(IN) :: rmat
1306 :
1307 0 : CALL cp_cfm_rot_cols(rmat, istate, jstate, ct, st)
1308 :
1309 0 : END SUBROUTINE rotate_rmat
1310 : ! **************************************************************************************************
1311 : !> \brief ...
1312 : !> \param mii ...
1313 : !> \param mjj ...
1314 : !> \param mij ...
1315 : !> \param weights ...
1316 : !> \param theta ...
1317 : !> \param grad_ij ...
1318 : !> \param step ...
1319 : ! **************************************************************************************************
1320 2324078 : SUBROUTINE get_angle(mii, mjj, mij, weights, theta, grad_ij, step)
1321 : COMPLEX(KIND=dp), POINTER :: mii(:), mjj(:), mij(:)
1322 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1323 : REAL(KIND=dp), INTENT(OUT) :: theta
1324 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: grad_ij, step
1325 :
1326 : COMPLEX(KIND=dp) :: z11, z12, z22
1327 : INTEGER :: dim_m, idim
1328 : REAL(KIND=dp) :: a12, b12, d2, ratio
1329 :
1330 2324078 : a12 = 0.0_dp
1331 2324078 : b12 = 0.0_dp
1332 2324078 : dim_m = SIZE(mii)
1333 9402806 : DO idim = 1, dim_m
1334 7078728 : z11 = mii(idim)
1335 7078728 : z22 = mjj(idim)
1336 7078728 : z12 = mij(idim)
1337 7078728 : a12 = a12 + weights(idim)*REAL(CONJG(z12)*(z11 - z22), KIND=dp)
1338 : b12 = b12 + weights(idim)*REAL((z12*CONJG(z12) - &
1339 9402806 : 0.25_dp*(z11 - z22)*(CONJG(z11) - CONJG(z22))), KIND=dp)
1340 : END DO
1341 2324078 : IF (ABS(b12) > 1.e-10_dp) THEN
1342 2324078 : ratio = -a12/b12
1343 2324078 : theta = 0.25_dp*ATAN(ratio)
1344 0 : ELSEIF (ABS(b12) < 1.e-10_dp) THEN
1345 0 : b12 = 0.0_dp
1346 0 : theta = 0.0_dp
1347 : ELSE
1348 0 : theta = 0.25_dp*pi
1349 : END IF
1350 2324078 : IF (PRESENT(grad_ij)) theta = theta + step*grad_ij
1351 : ! Check second derivative info
1352 2324078 : d2 = a12*SIN(4._dp*theta) - b12*COS(4._dp*theta)
1353 2324078 : IF (d2 <= 0._dp) THEN ! go to the maximum, not the minimum
1354 3240 : IF (theta > 0.0_dp) THEN ! make theta as small as possible
1355 1563 : theta = theta - 0.25_dp*pi
1356 : ELSE
1357 1677 : theta = theta + 0.25_dp*pi
1358 : END IF
1359 : END IF
1360 2324078 : END SUBROUTINE get_angle
1361 : ! **************************************************************************************************
1362 : !> \brief ...
1363 : !> \param zij ...
1364 : !> \param weights ...
1365 : !> \param tolerance ...
1366 : !> \param grad ...
1367 : ! **************************************************************************************************
1368 0 : SUBROUTINE check_tolerance(zij, weights, tolerance, grad)
1369 : TYPE(cp_cfm_type) :: zij(:)
1370 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1371 : REAL(KIND=dp), INTENT(OUT) :: tolerance
1372 : TYPE(cp_fm_type), INTENT(OUT), OPTIONAL :: grad
1373 :
1374 : CHARACTER(len=*), PARAMETER :: routineN = 'check_tolerance'
1375 :
1376 : INTEGER :: handle
1377 : TYPE(cp_fm_type) :: force
1378 :
1379 0 : CALL timeset(routineN, handle)
1380 :
1381 : ! compute gradient at t=0
1382 :
1383 0 : CALL cp_fm_create(force, zij(1)%matrix_struct)
1384 0 : CALL cp_fm_set_all(force, 0._dp)
1385 0 : CALL grad_at_0(zij, weights, force)
1386 0 : CALL cp_fm_maxabsval(force, tolerance)
1387 0 : IF (PRESENT(grad)) CALL cp_fm_to_fm(force, grad)
1388 0 : CALL cp_fm_release(force)
1389 :
1390 0 : CALL timestop(handle)
1391 :
1392 0 : END SUBROUTINE check_tolerance
1393 : ! **************************************************************************************************
1394 : !> \brief ...
1395 : !> \param rmat ...
1396 : !> \param vectors ...
1397 : ! **************************************************************************************************
1398 1052 : SUBROUTINE rotate_orbitals(rmat, vectors)
1399 : TYPE(cp_fm_type), INTENT(IN) :: rmat, vectors
1400 :
1401 : INTEGER :: k, n
1402 : TYPE(cp_fm_type) :: wf
1403 :
1404 526 : CALL cp_fm_create(wf, vectors%matrix_struct)
1405 526 : CALL cp_fm_get_info(vectors, nrow_global=n, ncol_global=k)
1406 526 : CALL parallel_gemm("N", "N", n, k, k, 1.0_dp, vectors, rmat, 0.0_dp, wf)
1407 526 : CALL cp_fm_to_fm(wf, vectors)
1408 526 : CALL cp_fm_release(wf)
1409 526 : END SUBROUTINE rotate_orbitals
1410 : ! **************************************************************************************************
1411 : !> \brief ...
1412 : !> \param rmat ...
1413 : !> \param vec_all ...
1414 : !> \param vectors ...
1415 : ! **************************************************************************************************
1416 6 : SUBROUTINE rotate_orbitals_edf(rmat, vec_all, vectors)
1417 : TYPE(cp_fm_type), INTENT(IN) :: rmat, vec_all, vectors
1418 :
1419 : INTEGER :: k, l, n
1420 : TYPE(cp_fm_type) :: wf
1421 :
1422 2 : CALL cp_fm_create(wf, vectors%matrix_struct)
1423 2 : CALL cp_fm_get_info(vec_all, nrow_global=n, ncol_global=k)
1424 2 : CALL cp_fm_get_info(rmat, ncol_global=l)
1425 :
1426 2 : CALL parallel_gemm("N", "N", n, l, k, 1.0_dp, vec_all, rmat, 0.0_dp, wf)
1427 2 : CALL cp_fm_to_fm(wf, vectors)
1428 2 : CALL cp_fm_release(wf)
1429 2 : END SUBROUTINE rotate_orbitals_edf
1430 : ! **************************************************************************************************
1431 : !> \brief ...
1432 : !> \param diag ...
1433 : !> \param weights ...
1434 : !> \param matrix ...
1435 : !> \param ndim ...
1436 : ! **************************************************************************************************
1437 204 : SUBROUTINE gradsq_at_0(diag, weights, matrix, ndim)
1438 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1439 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1440 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1441 : INTEGER, INTENT(IN) :: ndim
1442 :
1443 : COMPLEX(KIND=dp) :: zii, zjj
1444 : INTEGER :: idim, istate, jstate, ncol_local, &
1445 : nrow_global, nrow_local
1446 204 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1447 : REAL(KIND=dp) :: gradsq_ij
1448 :
1449 : CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1450 : ncol_local=ncol_local, nrow_global=nrow_global, &
1451 204 : row_indices=row_indices, col_indices=col_indices)
1452 :
1453 612 : DO istate = 1, nrow_local
1454 2244 : DO jstate = 1, ncol_local
1455 : ! get real and imaginary parts
1456 1632 : gradsq_ij = 0.0_dp
1457 11424 : DO idim = 1, ndim
1458 9792 : zii = diag(row_indices(istate), idim)
1459 9792 : zjj = diag(col_indices(jstate), idim)
1460 : gradsq_ij = gradsq_ij + weights(idim)* &
1461 11424 : 4.0_dp*REAL((CONJG(zii)*zii + CONJG(zjj)*zjj), KIND=dp)
1462 : END DO
1463 2040 : matrix%local_data(istate, jstate) = gradsq_ij
1464 : END DO
1465 : END DO
1466 204 : END SUBROUTINE gradsq_at_0
1467 : ! **************************************************************************************************
1468 : !> \brief ...
1469 : !> \param matrix_p ...
1470 : !> \param weights ...
1471 : !> \param matrix ...
1472 : ! **************************************************************************************************
1473 0 : SUBROUTINE grad_at_0(matrix_p, weights, matrix)
1474 : TYPE(cp_cfm_type) :: matrix_p(:)
1475 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1476 : TYPE(cp_fm_type), INTENT(IN) :: matrix
1477 :
1478 : COMPLEX(KIND=dp) :: zii, zij, zjj
1479 0 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1480 : INTEGER :: dim_m, idim, istate, jstate, ncol_local, &
1481 : nrow_global, nrow_local
1482 0 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1483 : REAL(KIND=dp) :: grad_ij
1484 :
1485 0 : NULLIFY (diag)
1486 : CALL cp_fm_get_info(matrix, nrow_local=nrow_local, &
1487 : ncol_local=ncol_local, nrow_global=nrow_global, &
1488 0 : row_indices=row_indices, col_indices=col_indices)
1489 0 : dim_m = SIZE(matrix_p, 1)
1490 0 : ALLOCATE (diag(nrow_global, dim_m))
1491 :
1492 0 : DO idim = 1, dim_m
1493 0 : DO istate = 1, nrow_global
1494 0 : CALL cp_cfm_get_element(matrix_p(idim), istate, istate, diag(istate, idim))
1495 : END DO
1496 : END DO
1497 :
1498 0 : DO istate = 1, nrow_local
1499 0 : DO jstate = 1, ncol_local
1500 : ! get real and imaginary parts
1501 : grad_ij = 0.0_dp
1502 0 : DO idim = 1, dim_m
1503 0 : zii = diag(row_indices(istate), idim)
1504 0 : zjj = diag(col_indices(jstate), idim)
1505 0 : zij = matrix_p(idim)%local_data(istate, jstate)
1506 : grad_ij = grad_ij + weights(idim)* &
1507 0 : REAL(4.0_dp*CONJG(zij)*(zjj - zii), dp)
1508 : END DO
1509 0 : matrix%local_data(istate, jstate) = grad_ij
1510 : END DO
1511 : END DO
1512 0 : DEALLOCATE (diag)
1513 0 : END SUBROUTINE grad_at_0
1514 :
1515 : ! return energy and maximum gradient in the current point
1516 : ! **************************************************************************************************
1517 : !> \brief ...
1518 : !> \param weights ...
1519 : !> \param zij ...
1520 : !> \param tolerance ...
1521 : !> \param value ...
1522 : ! **************************************************************************************************
1523 4246 : SUBROUTINE check_tolerance_new(weights, zij, tolerance, value)
1524 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1525 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :)
1526 : REAL(KIND=dp) :: tolerance, value
1527 :
1528 : COMPLEX(KIND=dp) :: kii, kij, kjj
1529 4246 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag
1530 : INTEGER :: idim, istate, jstate, ncol_local, &
1531 : nrow_global, nrow_local
1532 4246 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1533 : REAL(KIND=dp) :: grad_ij, ra, rb
1534 :
1535 4246 : NULLIFY (diag)
1536 : CALL cp_fm_get_info(zij(1, 1), nrow_local=nrow_local, &
1537 : ncol_local=ncol_local, nrow_global=nrow_global, &
1538 4246 : row_indices=row_indices, col_indices=col_indices)
1539 16984 : ALLOCATE (diag(nrow_global, SIZE(zij, 2)))
1540 4246 : value = 0.0_dp
1541 17098 : DO idim = 1, SIZE(zij, 2)
1542 191338 : DO istate = 1, nrow_global
1543 174240 : CALL cp_fm_get_element(zij(1, idim), istate, istate, ra)
1544 174240 : CALL cp_fm_get_element(zij(2, idim), istate, istate, rb)
1545 174240 : diag(istate, idim) = CMPLX(ra, rb, dp)
1546 187092 : value = value + weights(idim) - weights(idim)*ABS(diag(istate, idim))**2
1547 : END DO
1548 : END DO
1549 4246 : tolerance = 0.0_dp
1550 33191 : DO istate = 1, nrow_local
1551 496716 : DO jstate = 1, ncol_local
1552 1855525 : grad_ij = 0.0_dp
1553 1855525 : DO idim = 1, SIZE(zij, 2)
1554 1392000 : kii = diag(row_indices(istate), idim)
1555 1392000 : kjj = diag(col_indices(jstate), idim)
1556 1392000 : ra = zij(1, idim)%local_data(istate, jstate)
1557 1392000 : rb = zij(2, idim)%local_data(istate, jstate)
1558 1392000 : kij = CMPLX(ra, rb, dp)
1559 : grad_ij = grad_ij + weights(idim)* &
1560 1855525 : REAL(4.0_dp*CONJG(kij)*(kjj - kii), dp)
1561 : END DO
1562 492470 : tolerance = MAX(ABS(grad_ij), tolerance)
1563 : END DO
1564 : END DO
1565 4246 : CALL zij(1, 1)%matrix_struct%para_env%max(tolerance)
1566 :
1567 4246 : DEALLOCATE (diag)
1568 :
1569 4246 : END SUBROUTINE check_tolerance_new
1570 :
1571 : ! **************************************************************************************************
1572 : !> \brief yet another crazy try, computes the angles needed to rotate the orbitals first
1573 : !> and rotates them all at the same time (hoping for the best of course)
1574 : !> \param weights ...
1575 : !> \param zij ...
1576 : !> \param vectors ...
1577 : !> \param max_iter ...
1578 : !> \param max_crazy_angle ...
1579 : !> \param crazy_scale ...
1580 : !> \param crazy_use_diag ...
1581 : !> \param eps_localization ...
1582 : !> \param iterations ...
1583 : !> \param converged ...
1584 : ! **************************************************************************************************
1585 100 : SUBROUTINE crazy_rotations(weights, zij, vectors, max_iter, max_crazy_angle, crazy_scale, crazy_use_diag, &
1586 : eps_localization, iterations, converged)
1587 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1588 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1589 : INTEGER, INTENT(IN) :: max_iter
1590 : REAL(KIND=dp), INTENT(IN) :: max_crazy_angle
1591 : REAL(KIND=dp) :: crazy_scale
1592 : LOGICAL :: crazy_use_diag
1593 : REAL(KIND=dp), INTENT(IN) :: eps_localization
1594 : INTEGER :: iterations
1595 : LOGICAL, INTENT(out), OPTIONAL :: converged
1596 :
1597 : CHARACTER(len=*), PARAMETER :: routineN = 'crazy_rotations'
1598 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1599 : czero = (0.0_dp, 0.0_dp)
1600 :
1601 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1602 100 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1603 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
1604 : INTEGER :: dim2, handle, i, icol, idim, irow, &
1605 : method, ncol_global, ncol_local, &
1606 : norder, nrow_global, nrow_local, &
1607 : nsquare, unit_nr
1608 100 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1609 : LOGICAL :: do_emd
1610 : REAL(KIND=dp) :: eps_exp, limit_crazy_angle, maxeval, &
1611 : norm, ra, rb, theta, tolerance, value
1612 : REAL(KIND=dp), DIMENSION(:), POINTER :: evals
1613 : TYPE(cp_cfm_type) :: cmat_A, cmat_R, cmat_t1
1614 : TYPE(cp_fm_type) :: mat_R, mat_t, mat_theta, mat_U
1615 :
1616 100 : CALL timeset(routineN, handle)
1617 100 : NULLIFY (row_indices, col_indices)
1618 : CALL cp_fm_get_info(zij(1, 1), nrow_global=nrow_global, &
1619 : ncol_global=ncol_global, &
1620 : row_indices=row_indices, col_indices=col_indices, &
1621 100 : nrow_local=nrow_local, ncol_local=ncol_local)
1622 :
1623 100 : limit_crazy_angle = max_crazy_angle
1624 :
1625 100 : NULLIFY (diag_z, evals, evals_exp, mii, mij, mjj)
1626 100 : dim2 = SIZE(zij, 2)
1627 400 : ALLOCATE (diag_z(nrow_global, dim2))
1628 300 : ALLOCATE (evals(nrow_global))
1629 300 : ALLOCATE (evals_exp(nrow_global))
1630 :
1631 100 : CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
1632 100 : CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
1633 100 : CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1634 :
1635 100 : CALL cp_fm_create(mat_U, zij(1, 1)%matrix_struct)
1636 100 : CALL cp_fm_create(mat_t, zij(1, 1)%matrix_struct)
1637 100 : CALL cp_fm_create(mat_R, zij(1, 1)%matrix_struct)
1638 :
1639 100 : CALL cp_fm_create(mat_theta, zij(1, 1)%matrix_struct)
1640 :
1641 100 : CALL cp_fm_set_all(mat_R, 0.0_dp, 1.0_dp)
1642 100 : CALL cp_fm_set_all(mat_t, 0.0_dp)
1643 500 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
1644 406 : DO idim = 1, dim2
1645 306 : CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(1, idim))
1646 406 : CALL cp_fm_scale_and_add(1.0_dp, mat_t, weights(idim), zij(2, idim))
1647 : END DO
1648 100 : CALL cp_fm_syevd(mat_t, mat_U, evals)
1649 406 : DO idim = 1, dim2
1650 : ! rotate z's
1651 306 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
1652 306 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
1653 306 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
1654 406 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
1655 : END DO
1656 : ! collect rotation matrix
1657 100 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
1658 100 : CALL cp_fm_to_fm(mat_t, mat_R)
1659 :
1660 100 : unit_nr = -1
1661 100 : IF (cmat_A%matrix_struct%para_env%is_source()) THEN
1662 50 : unit_nr = cp_logger_get_default_unit_nr()
1663 : WRITE (unit_nr, '(T2,A7,A6,1X,A20,A12,A12,A12)') &
1664 50 : "CRAZY| ", "Iter", "value ", "gradient", "Max. eval", "limit"
1665 : END IF
1666 :
1667 100 : iterations = 0
1668 100 : tolerance = 1.0_dp
1669 :
1670 : DO
1671 4246 : iterations = iterations + 1
1672 17098 : DO idim = 1, dim2
1673 191338 : DO i = 1, nrow_global
1674 174240 : CALL cp_fm_get_element(zij(1, idim), i, i, ra)
1675 174240 : CALL cp_fm_get_element(zij(2, idim), i, i, rb)
1676 187092 : diag_z(i, idim) = CMPLX(ra, rb, dp)
1677 : END DO
1678 : END DO
1679 33191 : DO irow = 1, nrow_local
1680 496716 : DO icol = 1, ncol_local
1681 1855525 : DO idim = 1, dim2
1682 1392000 : ra = zij(1, idim)%local_data(irow, icol)
1683 1392000 : rb = zij(2, idim)%local_data(irow, icol)
1684 1392000 : mij(idim) = CMPLX(ra, rb, dp)
1685 1392000 : mii(idim) = diag_z(row_indices(irow), idim)
1686 1855525 : mjj(idim) = diag_z(col_indices(icol), idim)
1687 : END DO
1688 492470 : IF (row_indices(irow) /= col_indices(icol)) THEN
1689 434580 : CALL get_angle(mii, mjj, mij, weights, theta)
1690 434580 : theta = crazy_scale*theta
1691 434580 : IF (theta > limit_crazy_angle) theta = limit_crazy_angle
1692 434580 : IF (theta < -limit_crazy_angle) theta = -limit_crazy_angle
1693 434580 : IF (crazy_use_diag) THEN
1694 0 : cmat_A%local_data(irow, icol) = -CMPLX(0.0_dp, theta, dp)
1695 : ELSE
1696 434580 : mat_theta%local_data(irow, icol) = -theta
1697 : END IF
1698 : ELSE
1699 28945 : IF (crazy_use_diag) THEN
1700 0 : cmat_A%local_data(irow, icol) = czero
1701 : ELSE
1702 28945 : mat_theta%local_data(irow, icol) = 0.0_dp
1703 : END IF
1704 : END IF
1705 : END DO
1706 : END DO
1707 :
1708 : ! construct rotation matrix U based on A using diagonalization
1709 : ! alternatively, exp based on repeated squaring could be faster
1710 4246 : IF (crazy_use_diag) THEN
1711 0 : CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
1712 0 : maxeval = MAXVAL(ABS(evals))
1713 0 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1714 0 : CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
1715 0 : CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1716 : CALL parallel_gemm('N', 'C', nrow_global, nrow_global, nrow_global, cone, &
1717 0 : cmat_t1, cmat_R, czero, cmat_A)
1718 0 : mat_U%local_data = REAL(cmat_A%local_data, KIND=dp) ! U is a real matrix
1719 : ELSE
1720 4246 : do_emd = .FALSE.
1721 4246 : method = 2
1722 4246 : eps_exp = 1.0_dp*EPSILON(eps_exp)
1723 4246 : CALL cp_fm_maxabsrownorm(mat_theta, norm)
1724 4246 : maxeval = norm ! an upper bound
1725 4246 : CALL get_nsquare_norder(norm, nsquare, norder, eps_exp, method, do_emd)
1726 4246 : CALL exp_pade_real(mat_U, mat_theta, nsquare, norder)
1727 : END IF
1728 :
1729 17098 : DO idim = 1, dim2
1730 : ! rotate z's
1731 12852 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(1, idim), mat_U, 0.0_dp, mat_t)
1732 12852 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(1, idim))
1733 12852 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, zij(2, idim), mat_U, 0.0_dp, mat_t)
1734 17098 : CALL parallel_gemm('T', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_U, mat_t, 0.0_dp, zij(2, idim))
1735 : END DO
1736 : ! collect rotation matrix
1737 4246 : CALL parallel_gemm('N', 'N', nrow_global, nrow_global, nrow_global, 1.0_dp, mat_R, mat_U, 0.0_dp, mat_t)
1738 4246 : CALL cp_fm_to_fm(mat_t, mat_R)
1739 :
1740 4246 : CALL check_tolerance_new(weights, zij, tolerance, value)
1741 :
1742 4246 : IF (unit_nr > 0) THEN
1743 : WRITE (unit_nr, '(T2,A7,I6,1X,G20.15,E12.4,E12.4,E12.4)') &
1744 2123 : "CRAZY| ", iterations, value, tolerance, maxeval, limit_crazy_angle
1745 2123 : CALL m_flush(unit_nr)
1746 : END IF
1747 4246 : IF (tolerance < eps_localization .OR. iterations >= max_iter) EXIT
1748 : END DO
1749 :
1750 100 : IF (PRESENT(converged)) converged = (tolerance < eps_localization)
1751 :
1752 100 : CALL cp_cfm_release(cmat_A)
1753 100 : CALL cp_cfm_release(cmat_R)
1754 100 : CALL cp_cfm_release(cmat_T1)
1755 :
1756 100 : CALL cp_fm_release(mat_U)
1757 100 : CALL cp_fm_release(mat_T)
1758 100 : CALL cp_fm_release(mat_theta)
1759 :
1760 100 : CALL rotate_orbitals(mat_R, vectors)
1761 :
1762 100 : CALL cp_fm_release(mat_R)
1763 100 : DEALLOCATE (evals_exp, evals, diag_z)
1764 100 : DEALLOCATE (mii, mij, mjj)
1765 :
1766 100 : CALL timestop(handle)
1767 :
1768 200 : END SUBROUTINE crazy_rotations
1769 :
1770 : ! **************************************************************************************************
1771 : !> \brief use the exponential parametrization as described in to perform a direct mini
1772 : !> Gerd Berghold et al. PRB 61 (15), pag. 10040 (2000)
1773 : !> none of the input is modified for the time being, just finds the rotations
1774 : !> that minimizes, and throws it away afterwards :-)
1775 : !> apart from being expensive and not cleaned, this works fine
1776 : !> useful to try different spread functionals
1777 : !> \param weights ...
1778 : !> \param zij ...
1779 : !> \param vectors ...
1780 : !> \param max_iter ...
1781 : !> \param eps_localization ...
1782 : !> \param iterations ...
1783 : ! **************************************************************************************************
1784 2 : SUBROUTINE direct_mini(weights, zij, vectors, max_iter, eps_localization, iterations)
1785 : REAL(KIND=dp), INTENT(IN) :: weights(:)
1786 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
1787 : INTEGER, INTENT(IN) :: max_iter
1788 : REAL(KIND=dp), INTENT(IN) :: eps_localization
1789 : INTEGER :: iterations
1790 :
1791 : CHARACTER(len=*), PARAMETER :: routineN = 'direct_mini'
1792 : COMPLEX(KIND=dp), PARAMETER :: cone = (1.0_dp, 0.0_dp), &
1793 : czero = (0.0_dp, 0.0_dp)
1794 : REAL(KIND=dp), PARAMETER :: gold_sec = 0.3819_dp
1795 :
1796 : COMPLEX(KIND=dp) :: lk, ll, tmp
1797 2 : COMPLEX(KIND=dp), DIMENSION(:), POINTER :: evals_exp
1798 2 : COMPLEX(KIND=dp), DIMENSION(:, :), POINTER :: diag_z
1799 : INTEGER :: handle, i, icol, idim, irow, &
1800 : line_search_count, line_searches, lsl, &
1801 : lsm, lsr, n, ncol_local, ndim, &
1802 : nrow_local, output_unit
1803 2 : INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
1804 : LOGICAL :: new_direction
1805 : REAL(KIND=dp) :: a, b, beta_pr, c, denom, ds, ds_min, fa, &
1806 : fb, fc, nom, normg, normg_cross, &
1807 : normg_old, npos, omega, tol, val, x0, &
1808 : x1, xa, xb, xc
1809 : REAL(KIND=dp), DIMENSION(150) :: energy, grad, pos
1810 2 : REAL(KIND=dp), DIMENSION(:), POINTER :: evals, fval, fvald
1811 : TYPE(cp_cfm_type) :: cmat_A, cmat_B, cmat_M, cmat_R, cmat_t1, &
1812 : cmat_t2, cmat_U
1813 2 : TYPE(cp_cfm_type), ALLOCATABLE, DIMENSION(:) :: c_zij
1814 : TYPE(cp_fm_type) :: matrix_A, matrix_G, matrix_G_old, &
1815 : matrix_G_search, matrix_H, matrix_R, &
1816 : matrix_T
1817 :
1818 2 : NULLIFY (evals, evals_exp, diag_z, fval, fvald)
1819 :
1820 2 : CALL timeset(routineN, handle)
1821 2 : output_unit = cp_logger_get_default_io_unit()
1822 :
1823 2 : n = zij(1, 1)%matrix_struct%nrow_global
1824 2 : ndim = (SIZE(zij, 2))
1825 :
1826 2 : IF (output_unit > 0) THEN
1827 1 : WRITE (output_unit, '(T4,A )') "Localization by direct minimization of the functional; "
1828 1 : WRITE (output_unit, '(T5,2A13,A20,A20,A10 )') " Line search ", " Iteration ", " Functional ", " Tolerance ", " ds Min "
1829 : END IF
1830 :
1831 20 : ALLOCATE (evals(n), evals_exp(n), diag_z(n, ndim), fval(n), fvald(n))
1832 18 : ALLOCATE (c_zij(ndim))
1833 :
1834 : ! create the three complex matrices Z
1835 14 : DO idim = 1, ndim
1836 12 : CALL cp_cfm_create(c_zij(idim), zij(1, 1)%matrix_struct)
1837 : c_zij(idim)%local_data = CMPLX(zij(1, idim)%local_data, &
1838 158 : zij(2, idim)%local_data, dp)
1839 : END DO
1840 :
1841 2 : CALL cp_fm_create(matrix_A, zij(1, 1)%matrix_struct)
1842 2 : CALL cp_fm_create(matrix_G, zij(1, 1)%matrix_struct)
1843 2 : CALL cp_fm_create(matrix_T, zij(1, 1)%matrix_struct)
1844 2 : CALL cp_fm_create(matrix_H, zij(1, 1)%matrix_struct)
1845 2 : CALL cp_fm_create(matrix_G_search, zij(1, 1)%matrix_struct)
1846 2 : CALL cp_fm_create(matrix_G_old, zij(1, 1)%matrix_struct)
1847 2 : CALL cp_fm_create(matrix_R, zij(1, 1)%matrix_struct)
1848 2 : CALL cp_fm_set_all(matrix_R, 0.0_dp, 1.0_dp)
1849 :
1850 2 : CALL cp_fm_set_all(matrix_A, 0.0_dp)
1851 : ! CALL cp_fm_init_random ( matrix_A )
1852 :
1853 2 : CALL cp_cfm_create(cmat_A, zij(1, 1)%matrix_struct)
1854 2 : CALL cp_cfm_create(cmat_U, zij(1, 1)%matrix_struct)
1855 2 : CALL cp_cfm_create(cmat_R, zij(1, 1)%matrix_struct)
1856 2 : CALL cp_cfm_create(cmat_t1, zij(1, 1)%matrix_struct)
1857 2 : CALL cp_cfm_create(cmat_t2, zij(1, 1)%matrix_struct)
1858 2 : CALL cp_cfm_create(cmat_B, zij(1, 1)%matrix_struct)
1859 2 : CALL cp_cfm_create(cmat_M, zij(1, 1)%matrix_struct)
1860 :
1861 : CALL cp_cfm_get_info(cmat_B, nrow_local=nrow_local, ncol_local=ncol_local, &
1862 2 : row_indices=row_indices, col_indices=col_indices)
1863 :
1864 2 : CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
1865 2 : CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
1866 2 : normg_old = 1.0E30_dp
1867 2 : ds_min = 1.0_dp
1868 2 : new_direction = .TRUE.
1869 2 : Iterations = 0
1870 2 : line_searches = 0
1871 2 : line_search_count = 0
1872 204 : DO
1873 204 : iterations = iterations + 1
1874 : ! compute U,R,evals given A
1875 2652 : cmat_A%local_data = CMPLX(0.0_dp, matrix_A%local_data, dp) ! cmat_A is hermitian, evals are reals
1876 204 : CALL cp_cfm_heevd(cmat_A, cmat_R, evals)
1877 1020 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1878 204 : CALL cp_cfm_to_cfm(cmat_R, cmat_t1)
1879 204 : CALL cp_cfm_column_scale(cmat_t1, evals_exp)
1880 204 : CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_U)
1881 2652 : cmat_U%local_data = REAL(cmat_U%local_data, KIND=dp) ! enforce numerics, U is a real matrix
1882 :
1883 204 : IF (new_direction .AND. MOD(line_searches, 20) == 5) THEN ! reset with A .eq. 0
1884 0 : DO idim = 1, ndim
1885 0 : CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1)
1886 0 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, c_zij(idim))
1887 : END DO
1888 : ! collect rotation matrix
1889 0 : matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
1890 0 : CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
1891 0 : CALL cp_fm_to_fm(matrix_T, matrix_R)
1892 :
1893 0 : CALL cp_cfm_set_all(cmat_U, czero, cone)
1894 0 : CALL cp_cfm_set_all(cmat_R, czero, cone)
1895 0 : CALL cp_cfm_set_all(cmat_A, czero)
1896 0 : CALL cp_fm_set_all(matrix_A, 0.0_dp)
1897 0 : evals(:) = 0.0_dp
1898 0 : evals_exp(:) = EXP((0.0_dp, -1.0_dp)*evals(:))
1899 0 : CALL cp_fm_set_all(matrix_G_old, 0.0_dp)
1900 0 : CALL cp_fm_set_all(matrix_G_search, 0.0_dp)
1901 0 : normg_old = 1.0E30_dp
1902 : END IF
1903 :
1904 : ! compute Omega and M
1905 204 : CALL cp_cfm_set_all(cmat_M, czero)
1906 204 : omega = 0.0_dp
1907 1428 : DO idim = 1, ndim
1908 1224 : CALL parallel_gemm('N', 'N', n, n, n, cone, c_zij(idim), cmat_U, czero, cmat_t1) ! t1=ZU
1909 1224 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_U, cmat_t1, czero, cmat_t2) ! t2=(U^T)ZU
1910 6120 : DO i = 1, n
1911 4896 : CALL cp_cfm_get_element(cmat_t2, i, i, diag_z(i, idim))
1912 : SELECT CASE (2) ! allows for selection of different spread functionals
1913 : CASE (1)
1914 : fval(i) = -weights(idim)*LOG(ABS(diag_z(i, idim))**2)
1915 : fvald(i) = -weights(idim)/(ABS(diag_z(i, idim))**2)
1916 : CASE (2) ! corresponds to the jacobi setup
1917 4896 : fval(i) = weights(idim) - weights(idim)*ABS(diag_z(i, idim))**2
1918 4896 : fvald(i) = -weights(idim)
1919 : END SELECT
1920 6120 : omega = omega + fval(i)
1921 : END DO
1922 6324 : DO icol = 1, ncol_local
1923 15912 : DO irow = 1, nrow_local
1924 9792 : tmp = cmat_t1%local_data(irow, icol)*CONJG(diag_z(col_indices(icol), idim))
1925 : cmat_M%local_data(irow, icol) = cmat_M%local_data(irow, icol) &
1926 14688 : + 4.0_dp*fvald(col_indices(icol))*REAL(tmp, KIND=dp)
1927 : END DO
1928 : END DO
1929 : END DO
1930 :
1931 : ! compute Hessian diagonal approximation for the preconditioner
1932 : IF (.TRUE.) THEN
1933 204 : CALL gradsq_at_0(diag_z, weights, matrix_H, ndim)
1934 : ELSE
1935 : CALL cp_fm_set_all(matrix_H, 1.0_dp)
1936 : END IF
1937 :
1938 : ! compute B
1939 1020 : DO icol = 1, ncol_local
1940 2652 : DO irow = 1, nrow_local
1941 1632 : ll = (0.0_dp, -1.0_dp)*evals(row_indices(irow))
1942 1632 : lk = (0.0_dp, -1.0_dp)*evals(col_indices(icol))
1943 2448 : IF (ABS(ll - lk) < 0.5_dp) THEN ! use a series expansion to avoid loss of precision
1944 622 : tmp = 1.0_dp
1945 622 : cmat_B%local_data(irow, icol) = 0.0_dp
1946 10574 : DO i = 1, 16
1947 9952 : cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol) + tmp
1948 10574 : tmp = tmp*(ll - lk)/(i + 1)
1949 : END DO
1950 622 : cmat_B%local_data(irow, icol) = cmat_B%local_data(irow, icol)*EXP(lk)
1951 : ELSE
1952 1010 : cmat_B%local_data(irow, icol) = (EXP(lk) - EXP(ll))/(lk - ll)
1953 : END IF
1954 : END DO
1955 : END DO
1956 : ! compute gradient matrix_G
1957 :
1958 204 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_M, cmat_R, czero, cmat_t1) ! t1=(M^T)(R^T)
1959 204 : CALL parallel_gemm('C', 'N', n, n, n, cone, cmat_R, cmat_t1, czero, cmat_t2) ! t2=(R)t1
1960 204 : CALL cp_cfm_schur_product(cmat_t2, cmat_B, cmat_t1)
1961 204 : CALL parallel_gemm('N', 'C', n, n, n, cone, cmat_t1, cmat_R, czero, cmat_t2)
1962 204 : CALL parallel_gemm('N', 'N', n, n, n, cone, cmat_R, cmat_t2, czero, cmat_t1)
1963 2652 : matrix_G%local_data = REAL(cmat_t1%local_data, KIND=dp)
1964 204 : CALL cp_fm_transpose(matrix_G, matrix_T)
1965 204 : CALL cp_fm_scale_and_add(-1.0_dp, matrix_G, 1.0_dp, matrix_T)
1966 204 : CALL cp_fm_maxabsval(matrix_G, tol)
1967 :
1968 : ! from here on, minimizing technology
1969 204 : IF (new_direction) THEN
1970 : ! energy converged up to machine precision ?
1971 10 : line_searches = line_searches + 1
1972 : ! DO i=1,line_search_count
1973 : ! write(15,*) pos(i),energy(i)
1974 : ! ENDDO
1975 : ! write(15,*) ""
1976 : ! CALL m_flush(15)
1977 : !write(16,*) evals(:)
1978 : !write(17,*) matrix_A%local_data(:,:)
1979 : !write(18,*) matrix_G%local_data(:,:)
1980 10 : IF (output_unit > 0) THEN
1981 5 : WRITE (output_unit, '(T5,I10,T18,I10,T31,2F20.6,F10.3)') line_searches, Iterations, Omega, tol, ds_min
1982 5 : CALL m_flush(output_unit)
1983 : END IF
1984 10 : IF (tol < eps_localization .OR. iterations > max_iter) EXIT
1985 :
1986 : IF (.TRUE.) THEN ! do conjugate gradient CG
1987 8 : CALL cp_fm_trace(matrix_G, matrix_G_old, normg_cross)
1988 8 : normg_cross = normg_cross*0.5_dp ! takes into account the fact that A is antisymmetric
1989 : ! apply the preconditioner
1990 40 : DO icol = 1, ncol_local
1991 104 : DO irow = 1, nrow_local
1992 96 : matrix_G_old%local_data(irow, icol) = matrix_G%local_data(irow, icol)/matrix_H%local_data(irow, icol)
1993 : END DO
1994 : END DO
1995 8 : CALL cp_fm_trace(matrix_G, matrix_G_old, normg)
1996 8 : normg = normg*0.5_dp
1997 8 : beta_pr = (normg - normg_cross)/normg_old
1998 8 : normg_old = normg
1999 8 : beta_pr = MAX(beta_pr, 0.0_dp)
2000 8 : CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
2001 8 : CALL cp_fm_trace(matrix_G_search, matrix_G_old, normg_cross)
2002 8 : IF (normg_cross >= 0) THEN ! back to SD
2003 0 : IF (matrix_A%matrix_struct%para_env%is_source()) THEN
2004 0 : WRITE (cp_logger_get_default_unit_nr(), *) "!"
2005 : END IF
2006 0 : beta_pr = 0.0_dp
2007 0 : CALL cp_fm_scale_and_add(beta_pr, matrix_G_search, -1.0_dp, matrix_G_old)
2008 : END IF
2009 : ELSE ! SD
2010 : CALL cp_fm_scale_and_add(0.0_dp, matrix_G_search, -1.0_dp, matrix_G)
2011 : END IF
2012 : ! ds_min=1.0E-4_dp
2013 : line_search_count = 0
2014 : END IF
2015 202 : line_search_count = line_search_count + 1
2016 202 : energy(line_search_count) = Omega
2017 :
2018 : ! line search section
2019 : SELECT CASE (3)
2020 : CASE (1) ! two point line search
2021 : SELECT CASE (line_search_count)
2022 : CASE (1)
2023 : pos(1) = 0.0_dp
2024 : pos(2) = ds_min
2025 : CALL cp_fm_trace(matrix_G, matrix_G_search, grad(1))
2026 : grad(1) = grad(1)/2.0_dp
2027 : new_direction = .FALSE.
2028 : CASE (2)
2029 : new_direction = .TRUE.
2030 : x0 = pos(1) ! 0.0_dp
2031 : c = energy(1)
2032 : b = grad(1)
2033 : x1 = pos(2)
2034 : a = (energy(2) - b*x1 - c)/(x1**2)
2035 : IF (a <= 0.0_dp) a = 1.0E-15_dp
2036 : npos = -b/(2.0_dp*a)
2037 : val = a*npos**2 + b*npos + c
2038 : IF (val < energy(1) .AND. val <= energy(2)) THEN
2039 : ! we go to a minimum, but ...
2040 : ! we take a guard against too large steps
2041 : pos(3) = MIN(npos, MAXVAL(pos(1:2))*4.0_dp)
2042 : ELSE ! just take an extended step
2043 : pos(3) = MAXVAL(pos(1:2))*2.0_dp
2044 : END IF
2045 : END SELECT
2046 : CASE (2) ! 3 point line search
2047 : SELECT CASE (line_search_count)
2048 : CASE (1)
2049 : new_direction = .FALSE.
2050 : pos(1) = 0.0_dp
2051 : pos(2) = ds_min*0.8_dp
2052 : CASE (2)
2053 : new_direction = .FALSE.
2054 : IF (energy(2) > energy(1)) THEN
2055 : pos(3) = ds_min*0.7_dp
2056 : ELSE
2057 : pos(3) = ds_min*1.4_dp
2058 : END IF
2059 : CASE (3)
2060 : new_direction = .TRUE.
2061 : xa = pos(1)
2062 : xb = pos(2)
2063 : xc = pos(3)
2064 : fa = energy(1)
2065 : fb = energy(2)
2066 : fc = energy(3)
2067 : nom = (xb - xa)**2*(fb - fc) - (xb - xc)**2*(fb - fa)
2068 : denom = (xb - xa)*(fb - fc) - (xb - xc)*(fb - fa)
2069 : IF (ABS(denom) <= 1.0E-18_dp*MAX(ABS(fb - fc), ABS(fb - fa))) THEN
2070 : npos = xb
2071 : ELSE
2072 : npos = xb - 0.5_dp*nom/denom ! position of the stationary point
2073 : END IF
2074 : val = (npos - xa)*(npos - xb)*fc/((xc - xa)*(xc - xb)) + &
2075 : (npos - xb)*(npos - xc)*fa/((xa - xb)*(xa - xc)) + &
2076 : (npos - xc)*(npos - xa)*fb/((xb - xc)*(xb - xa))
2077 : IF (val < fa .AND. val <= fb .AND. val <= fc) THEN ! OK, we go to a minimum
2078 : ! we take a guard against too large steps
2079 : pos(4) = MAX(MAXVAL(pos(1:3))*0.01_dp, &
2080 : MIN(npos, MAXVAL(pos(1:3))*4.0_dp))
2081 : ELSE ! just take an extended step
2082 : pos(4) = MAXVAL(pos(1:3))*2.0_dp
2083 : END IF
2084 : END SELECT
2085 : CASE (3) ! golden section hunt
2086 202 : new_direction = .FALSE.
2087 202 : IF (line_search_count == 1) THEN
2088 8 : lsl = 1
2089 8 : lsr = 0
2090 8 : lsm = 1
2091 8 : pos(1) = 0.0_dp
2092 8 : pos(2) = ds_min/gold_sec
2093 : ELSE
2094 194 : IF (line_search_count == 150) CPABORT("Too many")
2095 194 : IF (lsr == 0) THEN
2096 14 : IF (energy(line_search_count - 1) < energy(line_search_count)) THEN
2097 8 : lsr = line_search_count
2098 8 : pos(line_search_count + 1) = pos(lsm) + (pos(lsr) - pos(lsm))*gold_sec
2099 : ELSE
2100 6 : lsl = lsm
2101 6 : lsm = line_search_count
2102 6 : pos(line_search_count + 1) = pos(line_search_count)/gold_sec
2103 : END IF
2104 : ELSE
2105 180 : IF (pos(line_search_count) < pos(lsm)) THEN
2106 92 : IF (energy(line_search_count) < energy(lsm)) THEN
2107 : lsr = lsm
2108 : lsm = line_search_count
2109 : ELSE
2110 76 : lsl = line_search_count
2111 : END IF
2112 : ELSE
2113 88 : IF (energy(line_search_count) < energy(lsm)) THEN
2114 : lsl = lsm
2115 : lsm = line_search_count
2116 : ELSE
2117 60 : lsr = line_search_count
2118 : END IF
2119 : END IF
2120 180 : IF (pos(lsr) - pos(lsm) > pos(lsm) - pos(lsl)) THEN
2121 86 : pos(line_search_count + 1) = pos(lsm) + gold_sec*(pos(lsr) - pos(lsm))
2122 : ELSE
2123 94 : pos(line_search_count + 1) = pos(lsl) + gold_sec*(pos(lsm) - pos(lsl))
2124 : END IF
2125 180 : IF ((pos(lsr) - pos(lsl)) < 1.0E-3_dp*pos(lsr)) THEN
2126 8 : new_direction = .TRUE.
2127 : END IF
2128 : END IF ! lsr .eq. 0
2129 : END IF ! first step
2130 : END SELECT
2131 : ! now go to the suggested point
2132 202 : ds_min = pos(line_search_count + 1)
2133 202 : ds = pos(line_search_count + 1) - pos(line_search_count)
2134 204 : CALL cp_fm_scale_and_add(1.0_dp, matrix_A, ds, matrix_G_search)
2135 : END DO
2136 :
2137 : ! collect rotation matrix
2138 26 : matrix_H%local_data = REAL(cmat_U%local_data, KIND=dp)
2139 2 : CALL parallel_gemm('N', 'N', n, n, n, 1.0_dp, matrix_R, matrix_H, 0.0_dp, matrix_T)
2140 2 : CALL cp_fm_to_fm(matrix_T, matrix_R)
2141 2 : CALL rotate_orbitals(matrix_R, vectors)
2142 2 : CALL cp_fm_release(matrix_R)
2143 :
2144 2 : CALL cp_fm_release(matrix_A)
2145 2 : CALL cp_fm_release(matrix_G)
2146 2 : CALL cp_fm_release(matrix_H)
2147 2 : CALL cp_fm_release(matrix_T)
2148 2 : CALL cp_fm_release(matrix_G_search)
2149 2 : CALL cp_fm_release(matrix_G_old)
2150 2 : CALL cp_cfm_release(cmat_A)
2151 2 : CALL cp_cfm_release(cmat_U)
2152 2 : CALL cp_cfm_release(cmat_R)
2153 2 : CALL cp_cfm_release(cmat_t1)
2154 2 : CALL cp_cfm_release(cmat_t2)
2155 2 : CALL cp_cfm_release(cmat_B)
2156 2 : CALL cp_cfm_release(cmat_M)
2157 :
2158 2 : DEALLOCATE (evals, evals_exp, fval, fvald)
2159 :
2160 14 : DO idim = 1, SIZE(c_zij)
2161 156 : zij(1, idim)%local_data = REAL(c_zij(idim)%local_data, dp)
2162 156 : zij(2, idim)%local_data = AIMAG(c_zij(idim)%local_data)
2163 14 : CALL cp_cfm_release(c_zij(idim))
2164 : END DO
2165 2 : DEALLOCATE (c_zij)
2166 2 : DEALLOCATE (diag_z)
2167 :
2168 2 : CALL timestop(handle)
2169 :
2170 6 : END SUBROUTINE direct_mini
2171 :
2172 : ! **************************************************************************************************
2173 : !> \brief Parallel algorithm for jacobi rotations
2174 : !> \param weights ...
2175 : !> \param zij ...
2176 : !> \param vectors ...
2177 : !> \param para_env ...
2178 : !> \param max_iter ...
2179 : !> \param eps_localization ...
2180 : !> \param sweeps ...
2181 : !> \param out_each ...
2182 : !> \param target_time ...
2183 : !> \param start_time ...
2184 : !> \param restricted ...
2185 : !> \par History
2186 : !> use allgather for improved performance
2187 : !> \author MI (11.2009)
2188 : ! **************************************************************************************************
2189 390 : SUBROUTINE jacobi_rot_para(weights, zij, vectors, para_env, max_iter, eps_localization, &
2190 : sweeps, out_each, target_time, start_time, restricted)
2191 :
2192 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2193 : TYPE(cp_fm_type), INTENT(IN) :: zij(:, :), vectors
2194 : TYPE(mp_para_env_type), POINTER :: para_env
2195 : INTEGER, INTENT(IN) :: max_iter
2196 : REAL(KIND=dp), INTENT(IN) :: eps_localization
2197 : INTEGER :: sweeps
2198 : INTEGER, INTENT(IN) :: out_each
2199 : REAL(dp) :: target_time, start_time
2200 : INTEGER :: restricted
2201 :
2202 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rot_para'
2203 :
2204 : INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2205 : nblock, nblock_max, ns_me, nstate, &
2206 : output_unit
2207 390 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2208 390 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_im, z_ij_loc_re
2209 : REAL(KIND=dp) :: xstate
2210 : TYPE(cp_fm_type) :: rmat
2211 390 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2212 :
2213 390 : CALL timeset(routineN, handle)
2214 :
2215 390 : output_unit = cp_logger_get_default_io_unit()
2216 :
2217 390 : NULLIFY (cz_ij_loc)
2218 :
2219 390 : dim2 = SIZE(zij, 2)
2220 :
2221 390 : CALL cp_fm_create(rmat, zij(1, 1)%matrix_struct)
2222 390 : CALL cp_fm_set_all(rmat, 0._dp, 1._dp)
2223 :
2224 390 : CALL cp_fm_get_info(rmat, nrow_global=nstate)
2225 :
2226 390 : IF (restricted > 0) THEN
2227 0 : IF (output_unit > 0) THEN
2228 0 : WRITE (output_unit, '(T4,A,I2,A )') "JACOBI: for the ROKS method, the last ", restricted, " orbitals DO NOT ROTATE"
2229 : END IF
2230 0 : nstate = nstate - restricted
2231 : END IF
2232 :
2233 : ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2234 390 : xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
2235 1560 : ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2236 1170 : DO ip = 1, para_env%num_pe
2237 780 : ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
2238 1170 : ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
2239 : END DO
2240 390 : nblock_max = 0
2241 1170 : DO ip = 0, para_env%num_pe - 1
2242 780 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2243 1170 : nblock_max = MAX(nblock_max, nblock)
2244 : END DO
2245 :
2246 : ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2247 1560 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2248 1170 : ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2249 2364 : ALLOCATE (cz_ij_loc(dim2))
2250 1584 : DO idim = 1, dim2
2251 3972 : DO ip = 0, para_env%num_pe - 1
2252 2388 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2253 2388 : CALL cp_fm_get_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2254 2388 : CALL cp_fm_get_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2255 3582 : IF (para_env%mepos == ip) THEN
2256 4752 : ALLOCATE (cz_ij_loc(idim)%c_array(nstate, nblock))
2257 5034 : DO i = 1, nblock
2258 37830 : DO j = 1, nstate
2259 36636 : cz_ij_loc(idim)%c_array(j, i) = CMPLX(z_ij_loc_re(j, i), z_ij_loc_im(j, i), dp)
2260 : END DO
2261 : END DO
2262 : END IF
2263 : END DO ! ip
2264 : END DO
2265 390 : DEALLOCATE (z_ij_loc_re)
2266 390 : DEALLOCATE (z_ij_loc_im)
2267 :
2268 1560 : ALLOCATE (rotmat(nstate, 2*nblock_max))
2269 :
2270 : CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, ns_bound, &
2271 : cz_ij_loc, rotmat, output_unit, eps_localization=eps_localization, &
2272 390 : target_time=target_time, start_time=start_time)
2273 :
2274 390 : ilow1 = ns_bound(para_env%mepos, 1)
2275 390 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2276 1170 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2277 1170 : ALLOCATE (z_ij_loc_im(nstate, nblock_max))
2278 1584 : DO idim = 1, dim2
2279 3972 : DO ip = 0, para_env%num_pe - 1
2280 79824 : z_ij_loc_re = 0.0_dp
2281 79824 : z_ij_loc_im = 0.0_dp
2282 2388 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2283 2388 : IF (ip == para_env%mepos) THEN
2284 5034 : ns_me = nblock
2285 5034 : DO i = 1, ns_me
2286 36636 : ii = ilow1 + i - 1
2287 37830 : DO j = 1, nstate
2288 32796 : z_ij_loc_re(j, i) = REAL(cz_ij_loc(idim)%c_array(j, i), dp)
2289 36636 : z_ij_loc_im(j, i) = AIMAG(cz_ij_loc(idim)%c_array(j, i))
2290 : END DO
2291 : END DO
2292 : END IF
2293 2388 : CALL para_env%bcast(z_ij_loc_re, ip)
2294 2388 : CALL para_env%bcast(z_ij_loc_im, ip)
2295 2388 : CALL cp_fm_set_submatrix(zij(1, idim), z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2296 3582 : CALL cp_fm_set_submatrix(zij(2, idim), z_ij_loc_im, 1, ns_bound(ip, 1), nstate, nblock)
2297 : END DO ! ip
2298 : END DO
2299 :
2300 1170 : DO ip = 0, para_env%num_pe - 1
2301 25704 : z_ij_loc_re = 0.0_dp
2302 780 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2303 780 : IF (ip == para_env%mepos) THEN
2304 1633 : ns_me = nblock
2305 1633 : DO i = 1, ns_me
2306 11774 : ii = ilow1 + i - 1
2307 12164 : DO j = 1, nstate
2308 11774 : z_ij_loc_re(j, i) = rotmat(j, i)
2309 : END DO
2310 : END DO
2311 : END IF
2312 780 : CALL para_env%bcast(z_ij_loc_re, ip)
2313 1170 : CALL cp_fm_set_submatrix(rmat, z_ij_loc_re, 1, ns_bound(ip, 1), nstate, nblock)
2314 : END DO
2315 :
2316 390 : DEALLOCATE (z_ij_loc_re)
2317 390 : DEALLOCATE (z_ij_loc_im)
2318 1584 : DO idim = 1, dim2
2319 1584 : DEALLOCATE (cz_ij_loc(idim)%c_array)
2320 : END DO
2321 390 : DEALLOCATE (cz_ij_loc)
2322 :
2323 390 : CALL para_env%sync()
2324 390 : CALL rotate_orbitals(rmat, vectors)
2325 390 : CALL cp_fm_release(rmat)
2326 :
2327 390 : DEALLOCATE (rotmat)
2328 390 : DEALLOCATE (ns_bound)
2329 :
2330 390 : CALL timestop(handle)
2331 :
2332 1170 : END SUBROUTINE jacobi_rot_para
2333 :
2334 : ! **************************************************************************************************
2335 : !> \brief almost identical to 'jacobi_rot_para' but with different inout variables
2336 : !> \param weights ...
2337 : !> \param czij ...
2338 : !> \param para_env ...
2339 : !> \param max_iter ...
2340 : !> \param rmat ...
2341 : !> \param tol_out ...
2342 : !> \author Soumya Ghosh (08/21)
2343 : ! **************************************************************************************************
2344 32 : SUBROUTINE jacobi_rot_para_1(weights, czij, para_env, max_iter, rmat, tol_out)
2345 :
2346 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2347 : TYPE(cp_cfm_type), INTENT(IN) :: czij(:)
2348 : TYPE(mp_para_env_type), POINTER :: para_env
2349 : INTEGER, INTENT(IN) :: max_iter
2350 : TYPE(cp_cfm_type), INTENT(IN) :: rmat
2351 : REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2352 :
2353 : CHARACTER(len=*), PARAMETER :: routineN = 'jacobi_rot_para_1'
2354 :
2355 32 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: czij_array
2356 : INTEGER :: dim2, handle, i, idim, ii, ilow1, ip, j, &
2357 : nblock, nblock_max, ns_me, nstate, &
2358 : sweeps
2359 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: ns_bound
2360 32 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: rotmat, z_ij_loc_re
2361 : REAL(KIND=dp) :: xstate
2362 32 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2363 :
2364 32 : CALL timeset(routineN, handle)
2365 :
2366 32 : dim2 = SIZE(czij)
2367 :
2368 32 : CALL cp_cfm_set_all(rmat, CMPLX(0._dp, 0._dp, dp), CMPLX(1._dp, 0._dp, dp))
2369 :
2370 32 : CALL cp_cfm_get_info(rmat, nrow_global=nstate)
2371 :
2372 : ! Distribution of the states (XXXXX safe against more pe than states ??? XXXXX)
2373 32 : xstate = REAL(nstate, dp)/REAL(para_env%num_pe, dp)
2374 128 : ALLOCATE (ns_bound(0:para_env%num_pe - 1, 2))
2375 96 : DO ip = 1, para_env%num_pe
2376 64 : ns_bound(ip - 1, 1) = MIN(nstate, NINT(xstate*(ip - 1))) + 1
2377 96 : ns_bound(ip - 1, 2) = MIN(nstate, NINT(xstate*ip))
2378 : END DO
2379 32 : nblock_max = 0
2380 96 : DO ip = 0, para_env%num_pe - 1
2381 64 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2382 96 : nblock_max = MAX(nblock_max, nblock)
2383 : END DO
2384 :
2385 : ! otbtain local part of the matrix (could be made faster, but is likely irrelevant).
2386 128 : ALLOCATE (czij_array(nstate, nblock_max))
2387 192 : ALLOCATE (cz_ij_loc(dim2))
2388 128 : DO idim = 1, dim2
2389 320 : DO ip = 0, para_env%num_pe - 1
2390 192 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2391 : ! cfm --> allocatable
2392 192 : CALL cp_cfm_get_submatrix(czij(idim), czij_array, 1, ns_bound(ip, 1), nstate, nblock)
2393 288 : IF (para_env%mepos == ip) THEN
2394 96 : ns_me = nblock
2395 384 : ALLOCATE (cz_ij_loc(idim)%c_array(nstate, ns_me))
2396 2160 : DO i = 1, ns_me
2397 90912 : DO j = 1, nstate
2398 90816 : cz_ij_loc(idim)%c_array(j, i) = czij_array(j, i)
2399 : END DO
2400 : END DO
2401 : END IF
2402 : END DO ! ip
2403 : END DO
2404 32 : DEALLOCATE (czij_array)
2405 :
2406 128 : ALLOCATE (rotmat(nstate, 2*nblock_max))
2407 :
2408 : CALL jacobi_rot_para_core(weights, para_env, max_iter, sweeps, 1, dim2, nstate, nblock_max, ns_bound, &
2409 32 : cz_ij_loc, rotmat, 0, tol_out=tol_out)
2410 :
2411 32 : ilow1 = ns_bound(para_env%mepos, 1)
2412 32 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2413 128 : ALLOCATE (z_ij_loc_re(nstate, nblock_max))
2414 :
2415 96 : DO ip = 0, para_env%num_pe - 1
2416 62016 : z_ij_loc_re = 0.0_dp
2417 64 : nblock = ns_bound(ip, 2) - ns_bound(ip, 1) + 1
2418 64 : IF (ip == para_env%mepos) THEN
2419 720 : ns_me = nblock
2420 720 : DO i = 1, ns_me
2421 30272 : ii = ilow1 + i - 1
2422 30304 : DO j = 1, nstate
2423 30272 : z_ij_loc_re(j, i) = rotmat(j, i)
2424 : END DO
2425 : END DO
2426 : END IF
2427 64 : CALL para_env%bcast(z_ij_loc_re, ip)
2428 62048 : CALL cp_cfm_set_submatrix(rmat, CMPLX(z_ij_loc_re, 0.0_dp, dp), 1, ns_bound(ip, 1), nstate, nblock)
2429 : END DO
2430 :
2431 32 : DEALLOCATE (z_ij_loc_re)
2432 128 : DO idim = 1, dim2
2433 128 : DEALLOCATE (cz_ij_loc(idim)%c_array)
2434 : END DO
2435 32 : DEALLOCATE (cz_ij_loc)
2436 :
2437 32 : CALL para_env%sync()
2438 :
2439 32 : DEALLOCATE (rotmat)
2440 32 : DEALLOCATE (ns_bound)
2441 :
2442 32 : CALL timestop(handle)
2443 :
2444 64 : END SUBROUTINE jacobi_rot_para_1
2445 :
2446 : ! **************************************************************************************************
2447 : !> \brief Parallel algorithm for jacobi rotations
2448 : !> \param weights ...
2449 : !> \param para_env ...
2450 : !> \param max_iter ...
2451 : !> \param sweeps ...
2452 : !> \param out_each ...
2453 : !> \param dim2 ...
2454 : !> \param nstate ...
2455 : !> \param nblock_max ...
2456 : !> \param ns_bound ...
2457 : !> \param cz_ij_loc ...
2458 : !> \param rotmat ...
2459 : !> \param output_unit ...
2460 : !> \param tol_out ...
2461 : !> \param eps_localization ...
2462 : !> \param target_time ...
2463 : !> \param start_time ...
2464 : !> \par History
2465 : !> split out to reuse with different input types
2466 : !> \author HF (05.2022)
2467 : ! **************************************************************************************************
2468 1266 : SUBROUTINE jacobi_rot_para_core(weights, para_env, max_iter, sweeps, out_each, dim2, nstate, nblock_max, &
2469 422 : ns_bound, cz_ij_loc, rotmat, output_unit, tol_out, eps_localization, target_time, start_time)
2470 :
2471 : REAL(KIND=dp), INTENT(IN) :: weights(:)
2472 : TYPE(mp_para_env_type), POINTER :: para_env
2473 : INTEGER, INTENT(IN) :: max_iter
2474 : INTEGER, INTENT(OUT) :: sweeps
2475 : INTEGER, INTENT(IN) :: out_each, dim2, nstate, nblock_max
2476 : INTEGER, DIMENSION(0:, :), INTENT(IN) :: ns_bound
2477 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: cz_ij_loc
2478 : REAL(dp), DIMENSION(:, :), INTENT(OUT) :: rotmat
2479 : INTEGER, INTENT(IN) :: output_unit
2480 : REAL(dp), INTENT(OUT), OPTIONAL :: tol_out
2481 : REAL(KIND=dp), INTENT(IN), OPTIONAL :: eps_localization
2482 : REAL(dp), OPTIONAL :: target_time, start_time
2483 :
2484 : COMPLEX(KIND=dp) :: zi, zj
2485 422 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: c_array_me, c_array_partner
2486 : COMPLEX(KIND=dp), POINTER :: mii(:), mij(:), mjj(:)
2487 : INTEGER :: i, idim, ii, ik, il1, il2, il_recv, il_recv_partner, ilow1, ilow2, ip, ip_has_i, &
2488 : ip_partner, ip_recv_from, ip_recv_partner, ipair, iperm, istat, istate, iu1, iu2, iup1, &
2489 : iup2, j, jj, jstate, k, kk, lsweep, n1, n2, npair, nperm, ns_me, ns_partner, &
2490 : ns_recv_from, ns_recv_partner
2491 422 : INTEGER, ALLOCATABLE, DIMENSION(:) :: rcount, rdispl
2492 422 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: list_pair
2493 : LOGICAL :: should_stop
2494 422 : REAL(dp), ALLOCATABLE, DIMENSION(:, :) :: gmat, rmat_loc, rmat_recv, rmat_send
2495 422 : REAL(dp), ALLOCATABLE, DIMENSION(:, :, :) :: rmat_recv_all
2496 : REAL(KIND=dp) :: ct, func, gmax, grad, ri, rj, st, t1, &
2497 : t2, theta, tolerance, zc, zr
2498 422 : TYPE(set_c_1d_type), DIMENSION(:), POINTER :: zdiag_all, zdiag_me
2499 422 : TYPE(set_c_2d_type), DIMENSION(:), POINTER :: xyz_mix, xyz_mix_ns
2500 :
2501 422 : NULLIFY (zdiag_all, zdiag_me)
2502 422 : NULLIFY (xyz_mix, xyz_mix_ns)
2503 : NULLIFY (mii, mij, mjj)
2504 :
2505 2110 : ALLOCATE (mii(dim2), mij(dim2), mjj(dim2))
2506 :
2507 1266 : ALLOCATE (rcount(para_env%num_pe), STAT=istat)
2508 844 : ALLOCATE (rdispl(para_env%num_pe), STAT=istat)
2509 :
2510 422 : tolerance = 1.0e10_dp
2511 422 : sweeps = 0
2512 :
2513 : ! number of processor pairs and number of permutations
2514 422 : npair = (para_env%num_pe + 1)/2
2515 422 : nperm = para_env%num_pe - MOD(para_env%num_pe + 1, 2)
2516 1266 : ALLOCATE (list_pair(2, npair))
2517 :
2518 : ! initialize rotation matrix
2519 87298 : rotmat = 0.0_dp
2520 2353 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2521 1931 : ii = i - ns_bound(para_env%mepos, 1) + 1
2522 2353 : rotmat(i, ii) = 1.0_dp
2523 : END DO
2524 :
2525 2556 : ALLOCATE (xyz_mix(dim2))
2526 2134 : ALLOCATE (xyz_mix_ns(dim2))
2527 2556 : ALLOCATE (zdiag_me(dim2))
2528 2134 : ALLOCATE (zdiag_all(dim2))
2529 :
2530 422 : ns_me = ns_bound(para_env%mepos, 2) - ns_bound(para_env%mepos, 1) + 1
2531 422 : IF (ns_me /= 0) THEN
2532 2070 : ALLOCATE (c_array_me(nstate, ns_me, dim2))
2533 1680 : DO idim = 1, dim2
2534 5478 : ALLOCATE (xyz_mix_ns(idim)%c_array(nstate, ns_me))
2535 : END DO
2536 1656 : ALLOCATE (gmat(nstate, ns_me))
2537 : END IF
2538 :
2539 1712 : DO idim = 1, dim2
2540 3870 : ALLOCATE (zdiag_me(idim)%c_array(nblock_max))
2541 7530 : zdiag_me(idim)%c_array = z_zero
2542 3870 : ALLOCATE (zdiag_all(idim)%c_array(para_env%num_pe*nblock_max))
2543 14192 : zdiag_all(idim)%c_array = z_zero
2544 : END DO
2545 1688 : ALLOCATE (rmat_recv(nblock_max*2, nblock_max))
2546 1266 : ALLOCATE (rmat_send(nblock_max*2, nblock_max))
2547 :
2548 : ! buffer for message passing
2549 2110 : ALLOCATE (rmat_recv_all(nblock_max*2, nblock_max, 0:para_env%num_pe - 1))
2550 :
2551 422 : IF (output_unit > 0) THEN
2552 195 : WRITE (output_unit, '(T4,A )') " Localization by iterative distributed Jacobi rotation"
2553 195 : WRITE (output_unit, '(T20,A12,T32, A22,T60, A12,A8 )') "Iteration", "Functional", "Tolerance", " Time "
2554 : END IF
2555 :
2556 87204 : DO lsweep = 1, max_iter + 1
2557 87204 : sweeps = lsweep
2558 87204 : IF (sweeps == max_iter + 1) THEN
2559 156 : IF (output_unit > 0) THEN
2560 62 : WRITE (output_unit, *) ' LOCALIZATION! loop did not converge within the maximum number of iterations.'
2561 62 : WRITE (output_unit, *) ' Present Max. gradient = ', tolerance
2562 : END IF
2563 : EXIT
2564 : END IF
2565 87048 : t1 = m_walltime()
2566 :
2567 174096 : DO iperm = 1, nperm
2568 :
2569 : ! fix partners for this permutation, and get the number of states
2570 87048 : CALL eberlein(iperm, para_env, list_pair)
2571 87048 : ip_partner = -1
2572 87048 : ns_partner = 0
2573 87048 : DO ipair = 1, npair
2574 87048 : IF (list_pair(1, ipair) == para_env%mepos) THEN
2575 43524 : ip_partner = list_pair(2, ipair)
2576 43524 : EXIT
2577 43524 : ELSE IF (list_pair(2, ipair) == para_env%mepos) THEN
2578 43524 : ip_partner = list_pair(1, ipair)
2579 43524 : EXIT
2580 : END IF
2581 : END DO
2582 87048 : IF (ip_partner >= 0) THEN
2583 87048 : ns_partner = ns_bound(ip_partner, 2) - ns_bound(ip_partner, 1) + 1
2584 : ELSE
2585 : ns_partner = 0
2586 : END IF
2587 :
2588 : ! if there is a non-zero block connecting two partners, jacobi-sweep it.
2589 87048 : IF (ns_partner*ns_me /= 0) THEN
2590 :
2591 348128 : ALLOCATE (rmat_loc(ns_me + ns_partner, ns_me + ns_partner))
2592 5089000 : rmat_loc = 0.0_dp
2593 698518 : DO i = 1, ns_me + ns_partner
2594 698518 : rmat_loc(i, i) = 1.0_dp
2595 : END DO
2596 :
2597 435160 : ALLOCATE (c_array_partner(nstate, ns_partner, dim2))
2598 :
2599 349730 : DO idim = 1, dim2
2600 1050792 : ALLOCATE (xyz_mix(idim)%c_array(ns_me + ns_partner, ns_me + ns_partner))
2601 1276400 : DO i = 1, ns_me
2602 7889886 : c_array_me(1:nstate, i, idim) = cz_ij_loc(idim)%c_array(1:nstate, i)
2603 : END DO
2604 : END DO
2605 :
2606 : CALL para_env%sendrecv(msgin=c_array_me, dest=ip_partner, &
2607 87032 : msgout=c_array_partner, source=ip_partner)
2608 :
2609 87032 : n1 = ns_me
2610 87032 : n2 = ns_partner
2611 87032 : ilow1 = ns_bound(para_env%mepos, 1)
2612 87032 : iup1 = ns_bound(para_env%mepos, 1) + n1 - 1
2613 87032 : ilow2 = ns_bound(ip_partner, 1)
2614 87032 : iup2 = ns_bound(ip_partner, 1) + n2 - 1
2615 87032 : IF (ns_bound(para_env%mepos, 1) < ns_bound(ip_partner, 1)) THEN
2616 43516 : il1 = 1
2617 : iu1 = n1
2618 43516 : iu1 = n1
2619 43516 : il2 = 1 + n1
2620 43516 : iu2 = n1 + n2
2621 : ELSE
2622 43516 : il1 = 1 + n2
2623 : iu1 = n1 + n2
2624 43516 : iu1 = n1 + n2
2625 43516 : il2 = 1
2626 43516 : iu2 = n2
2627 : END IF
2628 :
2629 349730 : DO idim = 1, dim2
2630 1189368 : DO i = 1, n1
2631 4337346 : xyz_mix(idim)%c_array(il1:iu1, il1 + i - 1) = c_array_me(ilow1:iup1, i, idim)
2632 4479210 : xyz_mix(idim)%c_array(il2:iu2, il1 + i - 1) = c_array_me(ilow2:iup2, i, idim)
2633 : END DO
2634 1276400 : DO i = 1, n2
2635 4337346 : xyz_mix(idim)%c_array(il2:iu2, il2 + i - 1) = c_array_partner(ilow2:iup2, i, idim)
2636 4479210 : xyz_mix(idim)%c_array(il1:iu1, il2 + i - 1) = c_array_partner(ilow1:iup1, i, idim)
2637 : END DO
2638 : END DO
2639 :
2640 698518 : DO istate = 1, n1 + n2
2641 2588016 : DO jstate = istate + 1, n1 + n2
2642 7663346 : DO idim = 1, dim2
2643 5773848 : mii(idim) = xyz_mix(idim)%c_array(istate, istate)
2644 5773848 : mij(idim) = xyz_mix(idim)%c_array(istate, jstate)
2645 7663346 : mjj(idim) = xyz_mix(idim)%c_array(jstate, jstate)
2646 : END DO
2647 1889498 : CALL get_angle(mii, mjj, mij, weights, theta)
2648 1889498 : st = SIN(theta)
2649 1889498 : ct = COS(theta)
2650 7663346 : DO idim = 1, dim2
2651 51281802 : DO i = 1, n1 + n2
2652 45507954 : zi = ct*xyz_mix(idim)%c_array(i, istate) + st*xyz_mix(idim)%c_array(i, jstate)
2653 45507954 : zj = -st*xyz_mix(idim)%c_array(i, istate) + ct*xyz_mix(idim)%c_array(i, jstate)
2654 45507954 : xyz_mix(idim)%c_array(i, istate) = zi
2655 51281802 : xyz_mix(idim)%c_array(i, jstate) = zj
2656 : END DO
2657 53171300 : DO i = 1, n1 + n2
2658 45507954 : zi = ct*xyz_mix(idim)%c_array(istate, i) + st*xyz_mix(idim)%c_array(jstate, i)
2659 45507954 : zj = -st*xyz_mix(idim)%c_array(istate, i) + ct*xyz_mix(idim)%c_array(jstate, i)
2660 45507954 : xyz_mix(idim)%c_array(istate, i) = zi
2661 51281802 : xyz_mix(idim)%c_array(jstate, i) = zj
2662 : END DO
2663 : END DO
2664 :
2665 17235398 : DO i = 1, n1 + n2
2666 14734414 : ri = ct*rmat_loc(i, istate) + st*rmat_loc(i, jstate)
2667 14734414 : rj = ct*rmat_loc(i, jstate) - st*rmat_loc(i, istate)
2668 14734414 : rmat_loc(i, istate) = ri
2669 16623912 : rmat_loc(i, jstate) = rj
2670 : END DO
2671 : END DO
2672 : END DO
2673 :
2674 87032 : k = nblock_max + 1
2675 : CALL para_env%sendrecv(rotmat(1:nstate, 1:ns_me), ip_partner, &
2676 5089000 : rotmat(1:nstate, k:k + n2 - 1), ip_partner)
2677 :
2678 87032 : IF (ilow1 < ilow2) THEN
2679 : ! no longer compiles in official sdgb:
2680 : !CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1, k), nstate, rmat_loc(1 + n1, 1), n1 + n2, 0.0_dp, gmat, nstate)
2681 : ! probably inefficient:
2682 : CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, rmat_loc(1 + n1:, 1:n1), &
2683 1467012 : n2, 0.0_dp, gmat(:, :), nstate)
2684 : CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(1:, 1:), &
2685 43516 : n1 + n2, 1.0_dp, gmat(:, :), nstate)
2686 : ELSE
2687 : CALL dgemm("N", "N", nstate, n1, n2, 1.0_dp, rotmat(1:, k:), nstate, &
2688 43516 : rmat_loc(1:, n2 + 1:), n1 + n2, 0.0_dp, gmat(:, :), nstate)
2689 : ! no longer compiles in official sdgb:
2690 : !CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1, 1), nstate, rmat_loc(n2 + 1, n2 + 1), n1 + n2, 1.0_dp, gmat, nstate)
2691 : ! probably inefficient:
2692 : CALL dgemm("N", "N", nstate, n1, n1, 1.0_dp, rotmat(1:, 1:), nstate, rmat_loc(n2 + 1:, n2 + 1:), &
2693 1147004 : n1, 1.0_dp, gmat(:, :), nstate)
2694 : END IF
2695 :
2696 87032 : CALL dcopy(nstate*n1, gmat(1, 1), 1, rotmat(1, 1), 1)
2697 :
2698 349730 : DO idim = 1, dim2
2699 1189368 : DO i = 1, n1
2700 7889886 : xyz_mix_ns(idim)%c_array(1:nstate, i) = z_zero
2701 : END DO
2702 :
2703 1189368 : DO istate = 1, n1
2704 7889886 : DO jstate = 1, nstate
2705 33311712 : DO i = 1, n2
2706 : xyz_mix_ns(idim)%c_array(jstate, istate) = &
2707 : xyz_mix_ns(idim)%c_array(jstate, istate) + &
2708 32385042 : c_array_partner(jstate, i, idim)*rmat_loc(il2 + i - 1, il1 + istate - 1)
2709 : END DO
2710 : END DO
2711 : END DO
2712 1276400 : DO istate = 1, n1
2713 7889886 : DO jstate = 1, nstate
2714 34151136 : DO i = 1, n1
2715 : xyz_mix_ns(idim)%c_array(jstate, istate) = xyz_mix_ns(idim)%c_array(jstate, istate) + &
2716 33224466 : c_array_me(jstate, i, idim)*rmat_loc(il1 + i - 1, il1 + istate - 1)
2717 : END DO
2718 : END DO
2719 : END DO
2720 : END DO ! idim
2721 :
2722 87032 : DEALLOCATE (c_array_partner)
2723 :
2724 : ELSE ! save my data
2725 64 : DO idim = 1, dim2
2726 88 : DO i = 1, ns_me
2727 120 : xyz_mix_ns(idim)%c_array(1:nstate, i) = cz_ij_loc(idim)%c_array(1:nstate, i)
2728 : END DO
2729 : END DO
2730 : END IF
2731 :
2732 349794 : DO idim = 1, dim2
2733 1276488 : DO i = 1, ns_me
2734 7889982 : cz_ij_loc(idim)%c_array(1:nstate, i) = z_zero
2735 : END DO
2736 : END DO
2737 :
2738 87048 : IF (ns_partner*ns_me /= 0) THEN
2739 : ! transpose rotation matrix rmat_loc
2740 698518 : DO i = 1, ns_me + ns_partner
2741 2588016 : DO j = i + 1, ns_me + ns_partner
2742 1889498 : ri = rmat_loc(i, j)
2743 1889498 : rmat_loc(i, j) = rmat_loc(j, i)
2744 2500984 : rmat_loc(j, i) = ri
2745 : END DO
2746 : END DO
2747 :
2748 : ! prepare for distribution
2749 392775 : DO i = 1, n1
2750 1510528 : rmat_send(1:n1, i) = rmat_loc(il1:iu1, il1 + i - 1)
2751 : END DO
2752 87032 : ik = nblock_max
2753 392775 : DO i = 1, n2
2754 1470263 : rmat_send(ik + 1:ik + n1, i) = rmat_loc(il1:iu1, il2 + i - 1)
2755 : END DO
2756 : ELSE
2757 64 : rmat_send = 0.0_dp
2758 : END IF
2759 :
2760 : ! collect data from all tasks (this takes some significant time)
2761 87048 : CALL para_env%allgather(rmat_send, rmat_recv_all)
2762 :
2763 : ! update blocks everywhere
2764 261144 : DO ip = 0, para_env%num_pe - 1
2765 :
2766 174096 : ip_recv_from = MOD(para_env%mepos - IP + para_env%num_pe, para_env%num_pe)
2767 6456192 : rmat_recv(:, :) = rmat_recv_all(:, :, ip_recv_from)
2768 :
2769 174096 : ns_recv_from = ns_bound(ip_recv_from, 2) - ns_bound(ip_recv_from, 1) + 1
2770 :
2771 261144 : IF (ns_me /= 0) THEN
2772 174080 : IF (ns_recv_from /= 0) THEN
2773 : !look for the partner of ip_recv_from
2774 174072 : ip_recv_partner = -1
2775 174072 : ns_recv_partner = 0
2776 174072 : DO ipair = 1, npair
2777 174072 : IF (list_pair(1, ipair) == ip_recv_from) THEN
2778 87040 : ip_recv_partner = list_pair(2, ipair)
2779 87040 : EXIT
2780 87032 : ELSE IF (list_pair(2, ipair) == ip_recv_from) THEN
2781 : ip_recv_partner = list_pair(1, ipair)
2782 : EXIT
2783 : END IF
2784 : END DO
2785 :
2786 174072 : IF (ip_recv_partner >= 0) THEN
2787 174072 : ns_recv_partner = ns_bound(ip_recv_partner, 2) - ns_bound(ip_recv_partner, 1) + 1
2788 : END IF
2789 174072 : IF (ns_recv_partner > 0) THEN
2790 174064 : il1 = ns_bound(para_env%mepos, 1)
2791 174064 : il_recv = ns_bound(ip_recv_from, 1)
2792 174064 : il_recv_partner = ns_bound(ip_recv_partner, 1)
2793 174064 : ik = nblock_max
2794 :
2795 699460 : DO idim = 1, dim2
2796 2378736 : DO i = 1, ns_recv_from
2797 1853340 : ii = il_recv + i - 1
2798 9079254 : DO j = 1, ns_me
2799 33224466 : jj = j
2800 35077806 : DO k = 1, ns_recv_from
2801 26523948 : kk = il_recv + k - 1
2802 : cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2803 33224466 : rmat_recv(i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2804 : END DO
2805 : END DO
2806 : END DO
2807 2552800 : DO i = 1, ns_recv_from
2808 1853340 : ii = il_recv + i - 1
2809 9079254 : DO j = 1, ns_me
2810 32385042 : jj = j
2811 34238382 : DO k = 1, ns_recv_partner
2812 25684524 : kk = il_recv_partner + k - 1
2813 : cz_ij_loc(idim)%c_array(ii, jj) = cz_ij_loc(idim)%c_array(ii, jj) + &
2814 32385042 : rmat_recv(ik + i, k)*xyz_mix_ns(idim)%c_array(kk, j)
2815 : END DO
2816 : END DO
2817 : END DO
2818 : END DO ! idim
2819 : ELSE
2820 8 : il1 = ns_bound(para_env%mepos, 1)
2821 8 : il_recv = ns_bound(ip_recv_from, 1)
2822 32 : DO idim = 1, dim2
2823 56 : DO j = 1, ns_me
2824 48 : jj = j
2825 72 : DO i = 1, ns_recv_from
2826 24 : ii = il_recv + i - 1
2827 48 : cz_ij_loc(idim)%c_array(ii, jj) = xyz_mix_ns(idim)%c_array(ii, j)
2828 : END DO
2829 : END DO
2830 : END DO ! idim
2831 : END IF
2832 : END IF
2833 : END IF ! ns_me
2834 : END DO ! ip
2835 :
2836 174096 : IF (ns_partner*ns_me /= 0) THEN
2837 87032 : DEALLOCATE (rmat_loc)
2838 349730 : DO idim = 1, dim2
2839 349730 : DEALLOCATE (xyz_mix(idim)%c_array)
2840 : END DO
2841 : END IF
2842 :
2843 : END DO ! iperm
2844 :
2845 : ! calculate the max gradient
2846 349794 : DO idim = 1, dim2
2847 1189440 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2848 926694 : ii = i - ns_bound(para_env%mepos, 1) + 1
2849 926694 : zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2850 1189440 : zdiag_me(idim)%c_array(ii) = cz_ij_loc(idim)%c_array(i, ii)
2851 : END DO
2852 788238 : rcount(:) = SIZE(zdiag_me(idim)%c_array)
2853 262746 : rdispl(1) = 0
2854 525492 : DO ip = 2, para_env%num_pe
2855 525492 : rdispl(ip) = rdispl(ip - 1) + rcount(ip - 1)
2856 : END DO
2857 : ! collect all the diagonal elements in a replicated 1d array
2858 3492450 : CALL para_env%allgatherv(zdiag_me(idim)%c_array, zdiag_all(idim)%c_array, rcount, rdispl)
2859 : END DO
2860 :
2861 87048 : gmax = 0.0_dp
2862 392799 : DO j = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2863 305751 : k = j - ns_bound(para_env%mepos, 1) + 1
2864 1337548 : DO i = 1, j - 1
2865 : ! find the location of the diagonal element (i,i)
2866 1087882 : DO ip = 0, para_env%num_pe - 1
2867 1087882 : IF (i >= ns_bound(ip, 1) .AND. i <= ns_bound(ip, 2)) THEN
2868 : ip_has_i = ip
2869 : EXIT
2870 : END IF
2871 : END DO
2872 944749 : ii = nblock_max*ip_has_i + i - ns_bound(ip_has_i, 1) + 1
2873 : ! mepos has the diagonal element (j,j), as well as the off diagonal (i,j)
2874 944749 : jj = nblock_max*para_env%mepos + j - ns_bound(para_env%mepos, 1) + 1
2875 944749 : grad = 0.0_dp
2876 3831673 : DO idim = 1, dim2
2877 2886924 : zi = zdiag_all(idim)%c_array(ii)
2878 2886924 : zj = zdiag_all(idim)%c_array(jj)
2879 3831673 : grad = grad + weights(idim)*REAL(4.0_dp*CONJG(cz_ij_loc(idim)%c_array(i, k))*(zj - zi), dp)
2880 : END DO
2881 1250500 : gmax = MAX(gmax, ABS(grad))
2882 : END DO
2883 : END DO
2884 :
2885 87048 : CALL para_env%max(gmax)
2886 87048 : tolerance = gmax
2887 87048 : IF (PRESENT(tol_out)) tol_out = tolerance
2888 :
2889 87048 : func = 0.0_dp
2890 392799 : DO i = ns_bound(para_env%mepos, 1), ns_bound(para_env%mepos, 2)
2891 305751 : k = i - ns_bound(para_env%mepos, 1) + 1
2892 1319493 : DO idim = 1, dim2
2893 926694 : zr = REAL(cz_ij_loc(idim)%c_array(i, k), dp)
2894 926694 : zc = AIMAG(cz_ij_loc(idim)%c_array(i, k))
2895 1232445 : func = func + weights(idim)*(1.0_dp - (zr*zr + zc*zc))/twopi/twopi
2896 : END DO
2897 : END DO
2898 87048 : CALL para_env%sum(func)
2899 87048 : t2 = m_walltime()
2900 :
2901 87048 : IF (output_unit > 0 .AND. MODULO(sweeps, out_each) == 0) THEN
2902 440 : WRITE (output_unit, '(T20,I12,T35,F20.10,T60,E12.4,F8.3)') sweeps, func, tolerance, t2 - t1
2903 440 : CALL m_flush(output_unit)
2904 : END IF
2905 87048 : IF (PRESENT(eps_localization)) THEN
2906 87016 : IF (tolerance < eps_localization) EXIT
2907 : END IF
2908 87048 : IF (PRESENT(target_time) .AND. PRESENT(start_time)) THEN
2909 86750 : CALL external_control(should_stop, "LOC", target_time=target_time, start_time=start_time)
2910 86750 : IF (should_stop) EXIT
2911 : END IF
2912 :
2913 : END DO ! lsweep
2914 :
2915 : ! buffer for message passing
2916 422 : DEALLOCATE (rmat_recv_all)
2917 :
2918 422 : DEALLOCATE (rmat_recv)
2919 422 : DEALLOCATE (rmat_send)
2920 422 : IF (ns_me > 0) THEN
2921 414 : DEALLOCATE (c_array_me)
2922 : END IF
2923 1712 : DO idim = 1, dim2
2924 1290 : DEALLOCATE (zdiag_me(idim)%c_array)
2925 1712 : DEALLOCATE (zdiag_all(idim)%c_array)
2926 : END DO
2927 422 : DEALLOCATE (zdiag_me)
2928 422 : DEALLOCATE (zdiag_all)
2929 422 : DEALLOCATE (xyz_mix)
2930 1712 : DO idim = 1, dim2
2931 1712 : IF (ns_me /= 0) THEN
2932 1266 : DEALLOCATE (xyz_mix_ns(idim)%c_array)
2933 : END IF
2934 : END DO
2935 422 : DEALLOCATE (xyz_mix_ns)
2936 422 : IF (ns_me /= 0) THEN
2937 414 : DEALLOCATE (gmat)
2938 : END IF
2939 422 : DEALLOCATE (mii)
2940 422 : DEALLOCATE (mij)
2941 422 : DEALLOCATE (mjj)
2942 422 : DEALLOCATE (list_pair)
2943 :
2944 844 : END SUBROUTINE jacobi_rot_para_core
2945 :
2946 : ! **************************************************************************************************
2947 : !> \brief ...
2948 : !> \param iperm ...
2949 : !> \param para_env ...
2950 : !> \param list_pair ...
2951 : ! **************************************************************************************************
2952 87048 : SUBROUTINE eberlein(iperm, para_env, list_pair)
2953 : INTEGER, INTENT(IN) :: iperm
2954 : TYPE(mp_para_env_type), POINTER :: para_env
2955 : INTEGER, DIMENSION(:, :) :: list_pair
2956 :
2957 : INTEGER :: i, ii, jj, npair
2958 :
2959 87048 : npair = (para_env%num_pe + 1)/2
2960 87048 : IF (iperm == 1) THEN
2961 : !..set up initial ordering
2962 261144 : DO I = 0, para_env%num_pe - 1
2963 174096 : II = ((i + 1) + 1)/2
2964 174096 : JJ = MOD((i + 1) + 1, 2) + 1
2965 261144 : list_pair(JJ, II) = i
2966 : END DO
2967 87048 : IF (MOD(para_env%num_pe, 2) == 1) list_pair(2, npair) = -1
2968 0 : ELSEIF (MOD(iperm, 2) == 0) THEN
2969 : !..a type shift
2970 0 : jj = list_pair(1, npair)
2971 0 : DO I = npair, 3, -1
2972 0 : list_pair(1, I) = list_pair(1, I - 1)
2973 : END DO
2974 0 : list_pair(1, 2) = list_pair(2, 1)
2975 0 : list_pair(2, 1) = jj
2976 : ELSE
2977 : !..b type shift
2978 0 : jj = list_pair(2, 1)
2979 0 : DO I = 1, npair - 1
2980 0 : list_pair(2, I) = list_pair(2, I + 1)
2981 : END DO
2982 0 : list_pair(2, npair) = jj
2983 : END IF
2984 :
2985 87048 : END SUBROUTINE eberlein
2986 :
2987 : ! **************************************************************************************************
2988 : !> \brief ...
2989 : !> \param vectors ...
2990 : !> \param op_sm_set ...
2991 : !> \param zij_fm_set ...
2992 : ! **************************************************************************************************
2993 490 : SUBROUTINE zij_matrix(vectors, op_sm_set, zij_fm_set)
2994 :
2995 : TYPE(cp_fm_type), INTENT(IN) :: vectors
2996 : TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER :: op_sm_set
2997 : TYPE(cp_fm_type), DIMENSION(:, :), INTENT(IN) :: zij_fm_set
2998 :
2999 : CHARACTER(len=*), PARAMETER :: routineN = 'zij_matrix'
3000 :
3001 : INTEGER :: handle, i, j, nao, nmoloc
3002 : TYPE(cp_fm_type) :: opvec
3003 :
3004 490 : CALL timeset(routineN, handle)
3005 :
3006 : ! get rows and cols of the input
3007 490 : CALL cp_fm_get_info(vectors, nrow_global=nao, ncol_global=nmoloc)
3008 : ! replicate the input kind of matrix
3009 490 : CALL cp_fm_create(opvec, vectors%matrix_struct)
3010 :
3011 : ! Compute zij here
3012 1996 : DO i = 1, SIZE(zij_fm_set, 2)
3013 5008 : DO j = 1, SIZE(zij_fm_set, 1)
3014 3012 : CALL cp_fm_set_all(zij_fm_set(j, i), 0.0_dp)
3015 3012 : CALL cp_dbcsr_sm_fm_multiply(op_sm_set(j, i)%matrix, vectors, opvec, ncol=nmoloc)
3016 : CALL parallel_gemm("T", "N", nmoloc, nmoloc, nao, 1.0_dp, vectors, opvec, 0.0_dp, &
3017 4518 : zij_fm_set(j, i))
3018 : END DO
3019 : END DO
3020 :
3021 490 : CALL cp_fm_release(opvec)
3022 490 : CALL timestop(handle)
3023 :
3024 490 : END SUBROUTINE zij_matrix
3025 :
3026 : ! **************************************************************************************************
3027 : !> \brief ...
3028 : !> \param vectors ...
3029 : ! **************************************************************************************************
3030 36 : SUBROUTINE scdm_qrfact(vectors)
3031 :
3032 : TYPE(cp_fm_type), INTENT(IN) :: vectors
3033 :
3034 : CHARACTER(len=*), PARAMETER :: routineN = 'scdm_qrfact'
3035 :
3036 : INTEGER :: handle, ncolT, nrowT
3037 36 : REAL(KIND=dp), DIMENSION(:), POINTER :: tau
3038 : TYPE(cp_fm_struct_type), POINTER :: cstruct
3039 : TYPE(cp_fm_type) :: CTp, Qf, tmp
3040 :
3041 36 : CALL timeset(routineN, handle)
3042 :
3043 : ! Create Transpose of Coefficient Matrix vectors
3044 36 : nrowT = vectors%matrix_struct%ncol_global
3045 36 : ncolT = vectors%matrix_struct%nrow_global
3046 :
3047 : CALL cp_fm_struct_create(cstruct, template_fmstruct=vectors%matrix_struct, &
3048 36 : nrow_global=nrowT, ncol_global=ncolT)
3049 36 : CALL cp_fm_create(CTp, cstruct)
3050 36 : CALL cp_fm_struct_release(cstruct)
3051 :
3052 108 : ALLOCATE (tau(nrowT))
3053 :
3054 36 : CALL cp_fm_transpose(vectors, CTp)
3055 :
3056 : ! Get QR decomposition of CTs
3057 36 : CALL cp_fm_pdgeqpf(CTp, tau, nrowT, ncolT, 1, 1)
3058 :
3059 : ! Construction of Q from the scalapack output
3060 : CALL cp_fm_struct_create(cstruct, para_env=CTp%matrix_struct%para_env, &
3061 : context=CTp%matrix_struct%context, nrow_global=CTp%matrix_struct%nrow_global, &
3062 36 : ncol_global=CTp%matrix_struct%nrow_global)
3063 36 : CALL cp_fm_create(Qf, cstruct)
3064 36 : CALL cp_fm_struct_release(cstruct)
3065 36 : CALL cp_fm_to_fm_submat(CTp, Qf, nrowT, nrowT, 1, 1, 1, 1)
3066 :
3067 : ! Get Q
3068 36 : CALL cp_fm_pdorgqr(Qf, tau, nrowT, 1, 1)
3069 :
3070 : ! Transform original coefficient matrix vectors
3071 36 : CALL cp_fm_create(tmp, vectors%matrix_struct)
3072 36 : CALL cp_fm_set_all(tmp, 0.0_dp, 1.0_dp)
3073 36 : CALL cp_fm_to_fm(vectors, tmp)
3074 36 : CALL parallel_gemm('N', 'N', ncolT, nrowT, nrowT, 1.0_dp, tmp, Qf, 0.0_dp, vectors)
3075 :
3076 : ! Cleanup
3077 36 : CALL cp_fm_release(CTp)
3078 36 : CALL cp_fm_release(tmp)
3079 36 : CALL cp_fm_release(Qf)
3080 36 : DEALLOCATE (tau)
3081 :
3082 36 : CALL timestop(handle)
3083 :
3084 144 : END SUBROUTINE scdm_qrfact
3085 :
3086 0 : END MODULE qs_localization_methods
|