Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Performance tests for basic tasks like matrix multiplies, copy, fft.
10 : !> \par History
11 : !> 30-Nov-2000 (JGH) added input
12 : !> 02-Jan-2001 (JGH) Parallel FFT
13 : !> 28-Feb-2002 (JGH) Clebsch-Gordon Coefficients
14 : !> 06-Jun-2003 (JGH) Real space grid test
15 : !> Eigensolver test (29.08.05,MK)
16 : !> \author JGH 6-NOV-2000
17 : ! **************************************************************************************************
18 : MODULE library_tests
19 :
20 : USE ai_coulomb_test, ONLY: eri_test
21 : USE cell_methods, ONLY: cell_create,&
22 : init_cell
23 : USE cell_types, ONLY: cell_release,&
24 : cell_type
25 : USE cg_test, ONLY: clebsch_gordon_test
26 : USE cp_blacs_env, ONLY: cp_blacs_env_create,&
27 : cp_blacs_env_release,&
28 : cp_blacs_env_type
29 : USE cp_dbcsr_api, ONLY: dbcsr_reset_randmat_seed,&
30 : dbcsr_run_tests
31 : USE cp_eri_mme_interface, ONLY: cp_eri_mme_perf_acc_test
32 : USE cp_files, ONLY: close_file,&
33 : open_file
34 : USE cp_fm_basic_linalg, ONLY: cp_fm_gemm
35 : USE cp_fm_diag, ONLY: cp_fm_syevd,&
36 : cp_fm_syevx
37 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
38 : cp_fm_struct_get,&
39 : cp_fm_struct_release,&
40 : cp_fm_struct_type
41 : USE cp_fm_types, ONLY: cp_fm_create,&
42 : cp_fm_pilaenv,&
43 : cp_fm_release,&
44 : cp_fm_set_all,&
45 : cp_fm_set_submatrix,&
46 : cp_fm_to_fm,&
47 : cp_fm_type
48 : USE cp_log_handling, ONLY: cp_get_default_logger,&
49 : cp_logger_type
50 : USE cp_output_handling, ONLY: cp_print_key_finished_output,&
51 : cp_print_key_unit_nr
52 : USE cp_realspace_grid_init, ONLY: init_input_type
53 : USE dbm_tests, ONLY: dbm_run_tests
54 : USE fft_tools, ONLY: BWFFT,&
55 : FFT_RADIX_CLOSEST,&
56 : FWFFT,&
57 : fft3d,&
58 : fft_radix_operations,&
59 : finalize_fft,&
60 : init_fft
61 : USE global_types, ONLY: global_environment_type
62 : USE input_constants, ONLY: do_diag_syevd,&
63 : do_diag_syevx,&
64 : do_mat_random,&
65 : do_mat_read,&
66 : do_pwgrid_ns_fullspace,&
67 : do_pwgrid_ns_halfspace,&
68 : do_pwgrid_spherical
69 : USE input_section_types, ONLY: section_vals_get,&
70 : section_vals_get_subs_vals,&
71 : section_vals_type,&
72 : section_vals_val_get
73 : USE kinds, ONLY: dp
74 : USE machine, ONLY: m_flush,&
75 : m_walltime
76 : USE mathconstants, ONLY: gaussi
77 : USE message_passing, ONLY: mp_para_env_type
78 : USE minimax_exp, ONLY: validate_exp_minimax
79 : USE mp2_grids, ONLY: test_least_square_ft
80 : USE mp_perf_test, ONLY: mpi_perf_test
81 : USE parallel_gemm_api, ONLY: parallel_gemm
82 : USE parallel_rng_types, ONLY: UNIFORM,&
83 : rng_stream_type
84 : USE pw_grid_types, ONLY: FULLSPACE,&
85 : HALFSPACE,&
86 : pw_grid_type
87 : USE pw_grids, ONLY: pw_grid_create,&
88 : pw_grid_release
89 : USE pw_methods, ONLY: pw_transfer,&
90 : pw_zero
91 : USE pw_types, ONLY: pw_c1d_gs_type,&
92 : pw_c3d_rs_type,&
93 : pw_r3d_rs_type
94 : USE realspace_grid_types, ONLY: &
95 : realspace_grid_desc_type, realspace_grid_input_type, realspace_grid_type, rs_grid_create, &
96 : rs_grid_create_descriptor, rs_grid_print, rs_grid_release, rs_grid_release_descriptor, &
97 : rs_grid_zero, transfer_pw2rs, transfer_rs2pw
98 : USE shg_integrals_test, ONLY: shg_integrals_perf_acc_test
99 : #include "./base/base_uses.f90"
100 :
101 : IMPLICIT NONE
102 :
103 : PRIVATE
104 : PUBLIC :: lib_test
105 :
106 : INTEGER :: runtest(100)
107 : REAL(KIND=dp) :: max_memory
108 : REAL(KIND=dp), PARAMETER :: threshold = 1.0E-8_dp
109 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'library_tests'
110 :
111 : CONTAINS
112 :
113 : ! **************************************************************************************************
114 : !> \brief Master routine for tests
115 : !> \param root_section ...
116 : !> \param para_env ...
117 : !> \param globenv ...
118 : !> \par History
119 : !> none
120 : !> \author JGH 6-NOV-2000
121 : ! **************************************************************************************************
122 720 : SUBROUTINE lib_test(root_section, para_env, globenv)
123 :
124 : TYPE(section_vals_type), POINTER :: root_section
125 : TYPE(mp_para_env_type), POINTER :: para_env
126 : TYPE(global_environment_type), POINTER :: globenv
127 :
128 : CHARACTER(LEN=*), PARAMETER :: routineN = 'lib_test'
129 :
130 : INTEGER :: handle, iw
131 : LOGICAL :: explicit
132 : TYPE(cp_logger_type), POINTER :: logger
133 : TYPE(section_vals_type), POINTER :: cp_dbcsr_test_section, cp_fm_gemm_test_section, &
134 : dbm_test_section, eigensolver_section, eri_mme_test_section, pw_transfer_section, &
135 : rs_pw_transfer_section, shg_integrals_test_section
136 :
137 80 : CALL timeset(routineN, handle)
138 :
139 80 : logger => cp_get_default_logger()
140 80 : iw = cp_print_key_unit_nr(logger, root_section, "TEST%PROGRAM_RUN_INFO", extension=".log")
141 :
142 80 : IF (iw > 0) THEN
143 40 : WRITE (iw, '(T2,79("*"))')
144 40 : WRITE (iw, '(A,T31,A,T80,A)') ' *', ' PERFORMANCE TESTS ', '*'
145 40 : WRITE (iw, '(T2,79("*"))')
146 : END IF
147 : !
148 80 : CALL test_input(root_section, para_env)
149 : !
150 80 : IF (runtest(1) /= 0) CALL copy_test(para_env, iw)
151 : !
152 80 : IF (runtest(2) /= 0) CALL matmul_test(para_env, test_matmul=.TRUE., test_dgemm=.FALSE., iw=iw)
153 80 : IF (runtest(5) /= 0) CALL matmul_test(para_env, test_matmul=.FALSE., test_dgemm=.TRUE., iw=iw)
154 : !
155 80 : IF (runtest(3) /= 0) CALL fft_test(para_env, iw, globenv%fftw_plan_type, &
156 2 : globenv%fftw_wisdom_file_name)
157 : !
158 80 : IF (runtest(4) /= 0) CALL eri_test(iw)
159 : !
160 80 : IF (runtest(6) /= 0) CALL clebsch_gordon_test()
161 : !
162 : ! runtest 7 has been deleted and can be recycled
163 : !
164 80 : IF (runtest(8) /= 0) CALL mpi_perf_test(para_env, runtest(8), iw)
165 : !
166 80 : IF (runtest(10) /= 0) CALL validate_exp_minimax(runtest(10), iw)
167 : !
168 80 : IF (runtest(11) /= 0) CALL test_least_square_ft(runtest(11), iw)
169 : !
170 :
171 80 : rs_pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%RS_PW_TRANSFER")
172 80 : CALL section_vals_get(rs_pw_transfer_section, explicit=explicit)
173 80 : IF (explicit) THEN
174 2 : CALL rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
175 : END IF
176 :
177 80 : pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%PW_TRANSFER")
178 80 : CALL section_vals_get(pw_transfer_section, explicit=explicit)
179 80 : IF (explicit) THEN
180 10 : CALL pw_fft_test(para_env, iw, globenv, pw_transfer_section)
181 : END IF
182 :
183 80 : cp_fm_gemm_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_FM_GEMM")
184 80 : CALL section_vals_get(cp_fm_gemm_test_section, explicit=explicit)
185 80 : IF (explicit) THEN
186 4 : CALL cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
187 : END IF
188 :
189 80 : eigensolver_section => section_vals_get_subs_vals(root_section, "TEST%EIGENSOLVER")
190 80 : CALL section_vals_get(eigensolver_section, explicit=explicit)
191 80 : IF (explicit) THEN
192 2 : CALL eigensolver_test(para_env, iw, eigensolver_section)
193 : END IF
194 :
195 80 : eri_mme_test_section => section_vals_get_subs_vals(root_section, "TEST%ERI_MME_TEST")
196 80 : CALL section_vals_get(eri_mme_test_section, explicit=explicit)
197 80 : IF (explicit) THEN
198 8 : CALL cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
199 : END IF
200 :
201 80 : shg_integrals_test_section => section_vals_get_subs_vals(root_section, "TEST%SHG_INTEGRALS_TEST")
202 80 : CALL section_vals_get(shg_integrals_test_section, explicit=explicit)
203 80 : IF (explicit) THEN
204 4 : CALL shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
205 : END IF
206 :
207 : ! DBCSR tests
208 80 : cp_dbcsr_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_DBCSR")
209 80 : CALL section_vals_get(cp_dbcsr_test_section, explicit=explicit)
210 80 : IF (explicit) THEN
211 32 : CALL cp_dbcsr_tests(para_env, iw, cp_dbcsr_test_section)
212 : END IF
213 :
214 : ! DBM tests
215 80 : dbm_test_section => section_vals_get_subs_vals(root_section, "TEST%DBM")
216 80 : CALL section_vals_get(dbm_test_section, explicit=explicit)
217 80 : IF (explicit) THEN
218 14 : CALL run_dbm_tests(para_env, iw, dbm_test_section)
219 : END IF
220 :
221 80 : CALL cp_print_key_finished_output(iw, logger, root_section, "TEST%PROGRAM_RUN_INFO")
222 :
223 80 : CALL timestop(handle)
224 :
225 80 : END SUBROUTINE lib_test
226 :
227 : ! **************************************************************************************************
228 : !> \brief Reads input section &TEST ... &END
229 : !> \param root_section ...
230 : !> \param para_env ...
231 : !> \author JGH 30-NOV-2000
232 : !> \note
233 : !> I---------------------------------------------------------------------------I
234 : !> I SECTION: &TEST ... &END I
235 : !> I I
236 : !> I MEMORY max_memory I
237 : !> I COPY n I
238 : !> I MATMUL n I
239 : !> I FFT n I
240 : !> I ERI n I
241 : !> I PW_FFT n I
242 : !> I Clebsch-Gordon n I
243 : !> I RS_GRIDS n I
244 : !> I MPI n I
245 : !> I RNG n -> Parallel random number generator I
246 : !> I---------------------------------------------------------------------------I
247 : ! **************************************************************************************************
248 160 : SUBROUTINE test_input(root_section, para_env)
249 : TYPE(section_vals_type), POINTER :: root_section
250 : TYPE(mp_para_env_type), POINTER :: para_env
251 :
252 : TYPE(section_vals_type), POINTER :: test_section
253 :
254 : !
255 : !..defaults
256 : ! using this style is not recommended, introduce sections instead (see e.g. cp_fm_gemm)
257 :
258 80 : runtest = 0
259 80 : test_section => section_vals_get_subs_vals(root_section, "TEST")
260 80 : CALL section_vals_val_get(test_section, "MEMORY", r_val=max_memory)
261 80 : CALL section_vals_val_get(test_section, 'COPY', i_val=runtest(1))
262 80 : CALL section_vals_val_get(test_section, 'MATMUL', i_val=runtest(2))
263 80 : CALL section_vals_val_get(test_section, 'DGEMM', i_val=runtest(5))
264 80 : CALL section_vals_val_get(test_section, 'FFT', i_val=runtest(3))
265 80 : CALL section_vals_val_get(test_section, 'ERI', i_val=runtest(4))
266 80 : CALL section_vals_val_get(test_section, 'CLEBSCH_GORDON', i_val=runtest(6))
267 80 : CALL section_vals_val_get(test_section, 'MPI', i_val=runtest(8))
268 80 : CALL section_vals_val_get(test_section, 'MINIMAX', i_val=runtest(10))
269 80 : CALL section_vals_val_get(test_section, 'LEAST_SQ_FT', i_val=runtest(11))
270 :
271 80 : CALL para_env%sync()
272 80 : END SUBROUTINE test_input
273 :
274 : ! **************************************************************************************************
275 : !> \brief Tests the performance to copy two vectors.
276 : !> \param para_env ...
277 : !> \param iw ...
278 : !> \par History
279 : !> none
280 : !> \author JGH 6-NOV-2000
281 : !> \note
282 : !> The results of these tests allow to determine the size of the cache
283 : !> of the CPU. This can be used to optimize the performance of the
284 : !> FFTSG library.
285 : ! **************************************************************************************************
286 2 : SUBROUTINE copy_test(para_env, iw)
287 : TYPE(mp_para_env_type), POINTER :: para_env
288 : INTEGER :: iw
289 :
290 : INTEGER :: i, ierr, j, len, ntim, siz
291 : REAL(KIND=dp) :: perf, t, tend, tstart
292 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: ca, cb
293 :
294 : ! test for copy --> Cache size
295 :
296 2 : siz = ABS(runtest(1))
297 2 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of copy ( F95 ) "
298 34 : DO i = 6, 24
299 34 : len = 2**i
300 34 : IF (8.0_dp*REAL(len, KIND=dp) > max_memory*0.5_dp) EXIT
301 96 : ALLOCATE (ca(len), STAT=ierr)
302 32 : IF (ierr /= 0) EXIT
303 64 : ALLOCATE (cb(len), STAT=ierr)
304 32 : IF (ierr /= 0) EXIT
305 :
306 32 : CALL RANDOM_NUMBER(ca)
307 32 : ntim = NINT(1.e7_dp/REAL(len, KIND=dp))
308 32 : ntim = MAX(ntim, 1)
309 32 : ntim = MIN(ntim, siz*10000)
310 :
311 32 : tstart = m_walltime()
312 512524 : DO j = 1, ntim
313 315058412 : cb(:) = ca(:)
314 512524 : ca(1) = REAL(j, KIND=dp)
315 : END DO
316 32 : tend = m_walltime()
317 32 : t = tend - tstart + threshold
318 32 : IF (t > 0.0_dp) THEN
319 32 : perf = REAL(ntim, KIND=dp)*REAL(len, KIND=dp)*1.e-6_dp/t
320 : ELSE
321 0 : perf = 0.0_dp
322 : END IF
323 :
324 32 : IF (para_env%is_source()) THEN
325 16 : WRITE (iw, '(A,i2,i10,A,T59,F14.4,A)') " Copy test: Size = 2^", i, &
326 32 : len/1024, " Kwords", perf, " Mcopy/s"
327 : END IF
328 :
329 32 : DEALLOCATE (ca)
330 34 : DEALLOCATE (cb)
331 : END DO
332 2 : CALL para_env%sync()
333 2 : END SUBROUTINE copy_test
334 :
335 : ! **************************************************************************************************
336 : !> \brief Tests the performance of different kinds of matrix matrix multiply
337 : !> kernels for the BLAS and F95 intrinsic matmul.
338 : !> \param para_env ...
339 : !> \param test_matmul ...
340 : !> \param test_dgemm ...
341 : !> \param iw ...
342 : !> \par History
343 : !> none
344 : !> \author JGH 6-NOV-2000
345 : ! **************************************************************************************************
346 2 : SUBROUTINE matmul_test(para_env, test_matmul, test_dgemm, iw)
347 : TYPE(mp_para_env_type), POINTER :: para_env
348 : LOGICAL :: test_matmul, test_dgemm
349 : INTEGER :: iw
350 :
351 : INTEGER :: i, ierr, j, len, ntim, siz
352 : REAL(KIND=dp) :: perf, t, tend, tstart, xdum
353 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: ma, mb, mc
354 :
355 : ! test for matrix multpies
356 :
357 2 : IF (test_matmul) THEN
358 2 : siz = ABS(runtest(2))
359 2 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( F95 ) "
360 2 : DO i = 5, siz, 2
361 4 : len = 2**i + 1
362 4 : IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
363 16 : ALLOCATE (ma(len, len), STAT=ierr)
364 4 : IF (ierr /= 0) EXIT
365 12 : ALLOCATE (mb(len, len), STAT=ierr)
366 4 : IF (ierr /= 0) EXIT
367 12 : ALLOCATE (mc(len, len), STAT=ierr)
368 4 : IF (ierr /= 0) EXIT
369 35788 : mc = 0.0_dp
370 :
371 4 : CALL RANDOM_NUMBER(xdum)
372 35788 : ma = xdum
373 4 : CALL RANDOM_NUMBER(xdum)
374 35788 : mb = xdum
375 4 : ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
376 4 : ntim = MAX(ntim, 1)
377 4 : ntim = MIN(ntim, siz*200)
378 4 : tstart = m_walltime()
379 2832 : DO j = 1, ntim
380 2828 : mc(:, :) = MATMUL(ma, mb)
381 2832 : ma(1, 1) = REAL(j, KIND=dp)
382 : END DO
383 4 : tend = m_walltime()
384 4 : t = tend - tstart + threshold
385 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
386 4 : IF (para_env%is_source()) THEN
387 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
388 2 : " Matrix multiply test: c = a * b Size = ", len, perf, " Mflop/s"
389 : END IF
390 4 : tstart = m_walltime()
391 2832 : DO j = 1, ntim
392 3895652 : mc(:, :) = mc + MATMUL(ma, mb)
393 2832 : ma(1, 1) = REAL(j, KIND=dp)
394 : END DO
395 4 : tend = m_walltime()
396 4 : t = tend - tstart + threshold
397 4 : IF (t > 0.0_dp) THEN
398 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
399 : ELSE
400 0 : perf = 0.0_dp
401 : END IF
402 :
403 4 : IF (para_env%is_source()) THEN
404 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
405 2 : " Matrix multiply test: a = a * b Size = ", len, perf, " Mflop/s"
406 : END IF
407 :
408 4 : tstart = m_walltime()
409 2832 : DO j = 1, ntim
410 3895652 : mc(:, :) = mc + MATMUL(ma, TRANSPOSE(mb))
411 2832 : ma(1, 1) = REAL(j, KIND=dp)
412 : END DO
413 4 : tend = m_walltime()
414 4 : t = tend - tstart + threshold
415 4 : IF (t > 0.0_dp) THEN
416 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
417 : ELSE
418 0 : perf = 0.0_dp
419 : END IF
420 :
421 4 : IF (para_env%is_source()) THEN
422 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
423 2 : " Matrix multiply test: c = a * b(T) Size = ", len, perf, " Mflop/s"
424 : END IF
425 :
426 4 : tstart = m_walltime()
427 2832 : DO j = 1, ntim
428 3895652 : mc(:, :) = mc + MATMUL(TRANSPOSE(ma), mb)
429 2832 : ma(1, 1) = REAL(j, KIND=dp)
430 : END DO
431 4 : tend = m_walltime()
432 4 : t = tend - tstart + threshold
433 4 : IF (t > 0.0_dp) THEN
434 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
435 : ELSE
436 0 : perf = 0.0_dp
437 : END IF
438 :
439 4 : IF (para_env%is_source()) THEN
440 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
441 2 : " Matrix multiply test: c = a(T) * b Size = ", len, perf, " Mflop/s"
442 : END IF
443 :
444 4 : DEALLOCATE (ma)
445 4 : DEALLOCATE (mb)
446 4 : DEALLOCATE (mc)
447 : END DO
448 : END IF
449 :
450 : ! test for matrix multpies
451 2 : IF (test_dgemm) THEN
452 0 : siz = ABS(runtest(5))
453 0 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( BLAS ) "
454 0 : DO i = 5, siz, 2
455 0 : len = 2**i + 1
456 0 : IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
457 0 : ALLOCATE (ma(len, len), STAT=ierr)
458 0 : IF (ierr /= 0) EXIT
459 0 : ALLOCATE (mb(len, len), STAT=ierr)
460 0 : IF (ierr /= 0) EXIT
461 0 : ALLOCATE (mc(len, len), STAT=ierr)
462 0 : IF (ierr /= 0) EXIT
463 0 : mc = 0.0_dp
464 :
465 0 : CALL RANDOM_NUMBER(xdum)
466 0 : ma = xdum
467 0 : CALL RANDOM_NUMBER(xdum)
468 0 : mb = xdum
469 0 : ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
470 0 : ntim = MAX(ntim, 1)
471 0 : ntim = MIN(ntim, 1000)
472 :
473 0 : tstart = m_walltime()
474 0 : DO j = 1, ntim
475 0 : CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
476 : END DO
477 0 : tend = m_walltime()
478 0 : t = tend - tstart + threshold
479 0 : IF (t > 0.0_dp) THEN
480 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
481 : ELSE
482 0 : perf = 0.0_dp
483 : END IF
484 :
485 0 : IF (para_env%is_source()) THEN
486 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
487 0 : " Matrix multiply test: c = a * b Size = ", len, perf, " Mflop/s"
488 : END IF
489 :
490 0 : tstart = m_walltime()
491 0 : DO j = 1, ntim
492 0 : CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
493 : END DO
494 0 : tend = m_walltime()
495 0 : t = tend - tstart + threshold
496 0 : IF (t > 0.0_dp) THEN
497 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
498 : ELSE
499 0 : perf = 0.0_dp
500 : END IF
501 :
502 0 : IF (para_env%is_source()) THEN
503 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
504 0 : " Matrix multiply test: a = a * b Size = ", len, perf, " Mflop/s"
505 : END IF
506 :
507 0 : tstart = m_walltime()
508 0 : DO j = 1, ntim
509 0 : CALL dgemm("N", "T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
510 : END DO
511 0 : tend = m_walltime()
512 0 : t = tend - tstart + threshold
513 0 : IF (t > 0.0_dp) THEN
514 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
515 : ELSE
516 0 : perf = 0.0_dp
517 : END IF
518 :
519 0 : IF (para_env%is_source()) THEN
520 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
521 0 : " Matrix multiply test: c = a * b(T) Size = ", len, perf, " Mflop/s"
522 : END IF
523 :
524 0 : tstart = m_walltime()
525 0 : DO j = 1, ntim
526 0 : CALL dgemm("T", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
527 : END DO
528 0 : tend = m_walltime()
529 0 : t = tend - tstart + threshold
530 0 : IF (t > 0.0_dp) THEN
531 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
532 : ELSE
533 0 : perf = 0.0_dp
534 : END IF
535 :
536 0 : IF (para_env%is_source()) THEN
537 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
538 0 : " Matrix multiply test: c = a(T) * b Size = ", len, perf, " Mflop/s"
539 : END IF
540 :
541 0 : DEALLOCATE (ma)
542 0 : DEALLOCATE (mb)
543 0 : DEALLOCATE (mc)
544 : END DO
545 : END IF
546 :
547 2 : CALL para_env%sync()
548 :
549 2 : END SUBROUTINE matmul_test
550 :
551 : ! **************************************************************************************************
552 : !> \brief Tests the performance of all available FFT libraries for 3D FFTs
553 : !> \param para_env ...
554 : !> \param iw ...
555 : !> \param fftw_plan_type ...
556 : !> \param wisdom_file where FFTW3 should look to save/load wisdom
557 : !> \par History
558 : !> none
559 : !> \author JGH 6-NOV-2000
560 : ! **************************************************************************************************
561 2 : SUBROUTINE fft_test(para_env, iw, fftw_plan_type, wisdom_file)
562 :
563 : TYPE(mp_para_env_type), POINTER :: para_env
564 : INTEGER :: iw, fftw_plan_type
565 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
566 :
567 : INTEGER, PARAMETER :: ndate(3) = [12, 48, 96]
568 :
569 : INTEGER :: iall, ierr, it, j, len, n(3), ntim, &
570 : radix_in, radix_out, siz, stat
571 : COMPLEX(KIND=dp), DIMENSION(4, 4, 4) :: zz
572 2 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: ca, cb, cc
573 : CHARACTER(LEN=7) :: method
574 : REAL(KIND=dp) :: flops, perf, scale, t, tdiff, tend, &
575 : tstart
576 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: ra
577 :
578 : ! test for 3d FFT
579 :
580 2 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of 3D-FFT "
581 2 : siz = ABS(runtest(3))
582 :
583 8 : DO iall = 1, 100
584 : SELECT CASE (iall)
585 : CASE DEFAULT
586 2 : EXIT
587 : CASE (1)
588 : CALL init_fft("FFTSG", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
589 2 : pool_limit=10, plan_style=fftw_plan_type)
590 2 : method = "FFTSG "
591 : CASE (2)
592 2 : CYCLE
593 : CASE (3)
594 : CALL init_fft("FFTW3", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
595 2 : pool_limit=10, plan_style=fftw_plan_type)
596 10 : method = "FFTW3 "
597 : END SELECT
598 16 : n = 4
599 4 : zz = 0.0_dp
600 4 : CALL fft3d(FWFFT, n, zz, status=stat)
601 4 : IF (stat == 0) THEN
602 16 : DO it = 1, 3
603 12 : radix_in = ndate(it)
604 12 : CALL fft_radix_operations(radix_in, radix_out, FFT_RADIX_CLOSEST)
605 12 : len = radix_out
606 48 : n = len
607 12 : IF (16.0_dp*REAL(len*len*len, KIND=dp) > max_memory*0.5_dp) EXIT
608 60 : ALLOCATE (ra(len, len, len), STAT=ierr)
609 60 : ALLOCATE (ca(len, len, len), STAT=ierr)
610 12 : CALL RANDOM_NUMBER(ra)
611 4035516 : ca(:, :, :) = ra
612 12 : CALL RANDOM_NUMBER(ra)
613 4035516 : ca(:, :, :) = ca + gaussi*ra
614 12 : flops = REAL(len**3, KIND=dp)*15.0_dp*LOG(REAL(len, KIND=dp))
615 12 : ntim = NINT(siz*1.e7_dp/flops)
616 12 : ntim = MAX(ntim, 1)
617 12 : ntim = MIN(ntim, 200)
618 12 : scale = 1.0_dp/REAL(len**3, KIND=dp)
619 12 : tstart = m_walltime()
620 884 : DO j = 1, ntim
621 872 : CALL fft3d(FWFFT, n, ca)
622 884 : CALL fft3d(BWFFT, n, ca)
623 : END DO
624 12 : tend = m_walltime()
625 12 : t = tend - tstart + threshold
626 12 : IF (t > 0.0_dp) THEN
627 12 : perf = REAL(ntim, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
628 : ELSE
629 0 : perf = 0.0_dp
630 : END IF
631 :
632 12 : IF (para_env%is_source()) THEN
633 : WRITE (iw, '(T2,A,A,i6,T59,F14.4,A)') &
634 6 : ADJUSTR(method), " test (in-place) Size = ", len, perf, " Mflop/s"
635 : END IF
636 12 : DEALLOCATE (ca)
637 16 : DEALLOCATE (ra)
638 : END DO
639 4 : IF (para_env%is_source()) WRITE (iw, *)
640 : ! test if input data is preserved
641 4 : len = 24
642 16 : n = len
643 4 : ALLOCATE (ra(len, len, len))
644 4 : ALLOCATE (ca(len, len, len))
645 4 : ALLOCATE (cb(len, len, len))
646 4 : ALLOCATE (cc(len, len, len))
647 4 : CALL RANDOM_NUMBER(ra)
648 57700 : ca(:, :, :) = ra
649 4 : CALL RANDOM_NUMBER(ra)
650 57700 : ca(:, :, :) = ca + gaussi*ra
651 57700 : cc(:, :, :) = ca
652 4 : CALL fft3d(FWFFT, n, ca, cb)
653 57700 : tdiff = MAXVAL(ABS(ca - cc))
654 4 : IF (tdiff > 1.0E-12_dp) THEN
655 0 : IF (para_env%is_source()) &
656 0 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " FWFFT ", &
657 0 : " Input array is changed in out-of-place FFT !"
658 : ELSE
659 4 : IF (para_env%is_source()) &
660 2 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " FWFFT ", &
661 4 : " Input array is not changed in out-of-place FFT !"
662 : END IF
663 57700 : ca(:, :, :) = cc
664 4 : CALL fft3d(BWFFT, n, ca, cb)
665 57700 : tdiff = MAXVAL(ABS(ca - cc))
666 4 : IF (tdiff > 1.0E-12_dp) THEN
667 0 : IF (para_env%is_source()) &
668 0 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " BWFFT ", &
669 0 : " Input array is changed in out-of-place FFT !"
670 : ELSE
671 4 : IF (para_env%is_source()) &
672 2 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " BWFFT ", &
673 4 : " Input array is not changed in out-of-place FFT !"
674 : END IF
675 4 : IF (para_env%is_source()) WRITE (iw, *)
676 :
677 4 : DEALLOCATE (ra)
678 4 : DEALLOCATE (ca)
679 4 : DEALLOCATE (cb)
680 4 : DEALLOCATE (cc)
681 : END IF
682 4 : CALL finalize_fft(para_env, wisdom_file=wisdom_file)
683 : END DO
684 :
685 2 : END SUBROUTINE fft_test
686 :
687 : ! **************************************************************************************************
688 : !> \brief test rs_pw_transfer performance
689 : !> \param para_env ...
690 : !> \param iw ...
691 : !> \param globenv ...
692 : !> \param rs_pw_transfer_section ...
693 : !> \author Joost VandeVondele
694 : !> 9.2008 Randomise rs grid [Iain Bethune]
695 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2008 on behalf of the HECToR project
696 : ! **************************************************************************************************
697 2 : SUBROUTINE rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
698 :
699 : TYPE(mp_para_env_type), POINTER :: para_env
700 : INTEGER :: iw
701 : TYPE(global_environment_type), POINTER :: globenv
702 : TYPE(section_vals_type), POINTER :: rs_pw_transfer_section
703 :
704 : CHARACTER(LEN=*), PARAMETER :: routineN = 'rs_pw_transfer_test'
705 :
706 : INTEGER :: halo_size, handle, i_loop, n_loop, ns_max
707 : INTEGER, DIMENSION(3) :: no, np
708 2 : INTEGER, DIMENSION(:), POINTER :: i_vals
709 : LOGICAL :: do_rs2pw
710 : REAL(KIND=dp) :: tend, tstart
711 : TYPE(cell_type), POINTER :: box
712 : TYPE(pw_grid_type), POINTER :: grid
713 : TYPE(pw_r3d_rs_type) :: ca
714 : TYPE(realspace_grid_desc_type), POINTER :: rs_desc
715 : TYPE(realspace_grid_input_type) :: input_settings
716 32 : TYPE(realspace_grid_type) :: rs_grid
717 : TYPE(section_vals_type), POINTER :: rs_grid_section
718 :
719 2 : CALL timeset(routineN, handle)
720 :
721 : !..set fft lib
722 : CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
723 : pool_limit=globenv%fft_pool_scratch_limit, &
724 : wisdom_file=globenv%fftw_wisdom_file_name, &
725 2 : plan_style=globenv%fftw_plan_type)
726 :
727 : ! .. set cell (should otherwise be irrelevant)
728 2 : NULLIFY (box)
729 2 : CALL cell_create(box)
730 : box%hmat = RESHAPE([20.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 20.0_dp, 0.0_dp, &
731 26 : 0.0_dp, 0.0_dp, 20.0_dp], [3, 3])
732 2 : CALL init_cell(box)
733 :
734 : ! .. grid type and pw_grid
735 2 : NULLIFY (grid)
736 2 : CALL section_vals_val_get(rs_pw_transfer_section, "GRID", i_vals=i_vals)
737 8 : np = i_vals
738 2 : CALL pw_grid_create(grid, para_env, box%hmat, grid_span=FULLSPACE, npts=np, fft_usage=.TRUE., iounit=iw)
739 8 : no = grid%npts
740 :
741 2 : CALL ca%create(grid)
742 2 : CALL pw_zero(ca)
743 :
744 : ! .. rs input setting type
745 2 : CALL section_vals_val_get(rs_pw_transfer_section, "HALO_SIZE", i_val=halo_size)
746 2 : rs_grid_section => section_vals_get_subs_vals(rs_pw_transfer_section, "RS_GRID")
747 2 : ns_max = 2*halo_size + 1
748 2 : CALL init_input_type(input_settings, ns_max, rs_grid_section, 1, [-1, -1, -1])
749 :
750 : ! .. rs type
751 2 : NULLIFY (rs_desc)
752 2 : CALL rs_grid_create_descriptor(rs_desc, pw_grid=grid, input_settings=input_settings)
753 2 : CALL rs_grid_create(rs_grid, rs_desc)
754 2 : CALL rs_grid_print(rs_grid, iw)
755 2 : CALL rs_grid_zero(rs_grid)
756 :
757 : ! Put random values on the grid, so summation check will pick up errors
758 2 : CALL RANDOM_NUMBER(rs_grid%r)
759 :
760 2 : CALL section_vals_val_get(rs_pw_transfer_section, "N_loop", i_val=N_loop)
761 2 : CALL section_vals_val_get(rs_pw_transfer_section, "RS2PW", l_val=do_rs2pw)
762 :
763 : ! go for the real loops, sync to get max timings
764 2 : IF (para_env%is_source()) THEN
765 1 : WRITE (iw, '(T2,A)') ""
766 1 : WRITE (iw, '(T2,A)') "Timing rs_pw_transfer routine"
767 1 : WRITE (iw, '(T2,A)') ""
768 1 : WRITE (iw, '(T2,A)') "iteration time[s]"
769 : END IF
770 8 : DO i_loop = 1, N_loop
771 6 : CALL para_env%sync()
772 6 : tstart = m_walltime()
773 6 : IF (do_rs2pw) THEN
774 6 : CALL transfer_rs2pw(rs_grid, ca)
775 : ELSE
776 0 : CALL transfer_pw2rs(rs_grid, ca)
777 : END IF
778 6 : CALL para_env%sync()
779 6 : tend = m_walltime()
780 8 : IF (para_env%is_source()) THEN
781 3 : WRITE (iw, '(T2,I9,1X,F12.6)') i_loop, tend - tstart
782 : END IF
783 : END DO
784 :
785 : !cleanup
786 2 : CALL rs_grid_release(rs_grid)
787 2 : CALL rs_grid_release_descriptor(rs_desc)
788 2 : CALL ca%release()
789 2 : CALL pw_grid_release(grid)
790 2 : CALL cell_release(box)
791 2 : CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
792 :
793 2 : CALL timestop(handle)
794 :
795 10 : END SUBROUTINE rs_pw_transfer_test
796 :
797 : ! **************************************************************************************************
798 : !> \brief Tests the performance of PW calls to FFT routines
799 : !> \param para_env ...
800 : !> \param iw ...
801 : !> \param globenv ...
802 : !> \param pw_transfer_section ...
803 : !> \par History
804 : !> JGH 6-Feb-2001 : Test and performance code
805 : !> Made input sensitive [Joost VandeVondele]
806 : !> \author JGH 1-JAN-2001
807 : ! **************************************************************************************************
808 10 : SUBROUTINE pw_fft_test(para_env, iw, globenv, pw_transfer_section)
809 :
810 : TYPE(mp_para_env_type), POINTER :: para_env
811 : INTEGER :: iw
812 : TYPE(global_environment_type), POINTER :: globenv
813 : TYPE(section_vals_type), POINTER :: pw_transfer_section
814 :
815 : REAL(KIND=dp), PARAMETER :: toler = 1.e-11_dp
816 :
817 : INTEGER :: blocked_id, grid_span, i_layout, i_rep, &
818 : ig, ip, itmp, n_loop, n_rep, nn, p, q
819 10 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: layouts
820 : INTEGER, DIMENSION(2) :: distribution_layout
821 : INTEGER, DIMENSION(3) :: no, np
822 10 : INTEGER, DIMENSION(:), POINTER :: i_vals
823 : LOGICAL :: debug, is_fullspace, odd, &
824 : pw_grid_layout_all, spherical
825 : REAL(KIND=dp) :: em, et, flops, gsq, perf, t, t_max, &
826 : t_min, tend, tstart
827 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: t_end, t_start
828 : TYPE(cell_type), POINTER :: box
829 : TYPE(pw_c1d_gs_type) :: ca, cc
830 : TYPE(pw_c3d_rs_type) :: cb
831 : TYPE(pw_grid_type), POINTER :: grid
832 :
833 : !..set fft lib
834 :
835 : CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
836 : pool_limit=globenv%fft_pool_scratch_limit, &
837 : wisdom_file=globenv%fftw_wisdom_file_name, &
838 10 : plan_style=globenv%fftw_plan_type)
839 :
840 : !..the unit cell (should not really matter, the number of grid points do)
841 10 : NULLIFY (box, grid)
842 10 : CALL cell_create(box)
843 : box%hmat = RESHAPE([10.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 8.0_dp, 0.0_dp, &
844 130 : 0.0_dp, 0.0_dp, 7.0_dp], [3, 3])
845 10 : CALL init_cell(box)
846 :
847 10 : CALL section_vals_get(pw_transfer_section, n_repetition=n_rep)
848 70 : DO i_rep = 1, n_rep
849 :
850 : ! how often should we do the transfer
851 60 : CALL section_vals_val_get(pw_transfer_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
852 180 : ALLOCATE (t_start(N_loop))
853 120 : ALLOCATE (t_end(N_loop))
854 :
855 : ! setup of the grids
856 60 : CALL section_vals_val_get(pw_transfer_section, "GRID", i_rep_section=i_rep, i_vals=i_vals)
857 240 : np = i_vals
858 :
859 60 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID_BLOCKED", i_rep_section=i_rep, i_val=blocked_id)
860 60 : CALL section_vals_val_get(pw_transfer_section, "DEBUG", i_rep_section=i_rep, l_val=debug)
861 :
862 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT_ALL", i_rep_section=i_rep, &
863 60 : l_val=pw_grid_layout_all)
864 :
865 : ! prepare to loop over all or a specific layout
866 60 : IF (pw_grid_layout_all) THEN
867 : ! count layouts that fit
868 8 : itmp = 0
869 : ! start from 2, (/1,para_env%num_pe/) is not supported
870 16 : DO p = 2, para_env%num_pe
871 8 : q = para_env%num_pe/p
872 16 : IF (p*q == para_env%num_pe) THEN
873 8 : itmp = itmp + 1
874 : END IF
875 : END DO
876 : ! build list
877 24 : ALLOCATE (layouts(2, itmp))
878 8 : itmp = 0
879 16 : DO p = 2, para_env%num_pe
880 8 : q = para_env%num_pe/p
881 16 : IF (p*q == para_env%num_pe) THEN
882 8 : itmp = itmp + 1
883 24 : layouts(:, itmp) = [p, q]
884 : END IF
885 : END DO
886 : ELSE
887 52 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT", i_rep_section=i_rep, i_vals=i_vals)
888 52 : ALLOCATE (layouts(2, 1))
889 156 : layouts(:, 1) = i_vals
890 : END IF
891 :
892 120 : DO i_layout = 1, SIZE(layouts, 2)
893 :
894 180 : distribution_layout = layouts(:, i_layout)
895 :
896 60 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID", i_rep_section=i_rep, i_val=itmp)
897 :
898 : ! from cp_control_utils
899 16 : SELECT CASE (itmp)
900 : CASE (do_pwgrid_spherical)
901 16 : spherical = .TRUE.
902 16 : is_fullspace = .FALSE.
903 : CASE (do_pwgrid_ns_fullspace)
904 28 : spherical = .FALSE.
905 28 : is_fullspace = .TRUE.
906 : CASE (do_pwgrid_ns_halfspace)
907 16 : spherical = .FALSE.
908 60 : is_fullspace = .FALSE.
909 : END SELECT
910 :
911 : ! from pw_env_methods
912 60 : IF (spherical) THEN
913 16 : grid_span = HALFSPACE
914 16 : spherical = .TRUE.
915 16 : odd = .TRUE.
916 44 : ELSE IF (is_fullspace) THEN
917 28 : grid_span = FULLSPACE
918 28 : spherical = .FALSE.
919 28 : odd = .FALSE.
920 : ELSE
921 16 : grid_span = HALFSPACE
922 16 : spherical = .FALSE.
923 16 : odd = .TRUE.
924 : END IF
925 :
926 : ! actual setup
927 : CALL pw_grid_create(grid, para_env, box%hmat, grid_span=grid_span, odd=odd, spherical=spherical, &
928 : blocked=blocked_id, npts=np, fft_usage=.TRUE., &
929 60 : rs_dims=distribution_layout, iounit=iw)
930 :
931 60 : IF (iw > 0) CALL m_flush(iw)
932 :
933 : ! note that the number of grid points might be different from what the user requested (fft-able needed)
934 240 : no = grid%npts
935 :
936 60 : CALL ca%create(grid)
937 60 : CALL cb%create(grid)
938 60 : CALL cc%create(grid)
939 :
940 : ! initialize data
941 60 : CALL pw_zero(ca)
942 60 : CALL pw_zero(cb)
943 60 : CALL pw_zero(cc)
944 60 : nn = SIZE(ca%array)
945 142042 : DO ig = 1, nn
946 141982 : gsq = grid%gsq(ig)
947 142042 : ca%array(ig) = EXP(-gsq)
948 : END DO
949 :
950 420 : flops = PRODUCT(no)*30.0_dp*LOG(REAL(MAXVAL(no), KIND=dp))
951 60 : tstart = m_walltime()
952 596 : DO ip = 1, n_loop
953 536 : CALL para_env%sync()
954 536 : t_start(ip) = m_walltime()
955 536 : CALL pw_transfer(ca, cb, debug)
956 536 : CALL pw_transfer(cb, cc, debug)
957 536 : CALL para_env%sync()
958 596 : t_end(ip) = m_walltime()
959 : END DO
960 60 : tend = m_walltime()
961 60 : t = tend - tstart + threshold
962 60 : IF (t > 0.0_dp) THEN
963 60 : perf = REAL(n_loop, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
964 : ELSE
965 0 : perf = 0.0_dp
966 : END IF
967 :
968 142102 : em = MAXVAL(ABS(ca%array(:) - cc%array(:)))
969 60 : CALL para_env%max(em)
970 142042 : et = SUM(ABS(ca%array(:) - cc%array(:)))
971 60 : CALL para_env%sum(et)
972 656 : t_min = MINVAL(t_end - t_start)
973 656 : t_max = MAXVAL(t_end - t_start)
974 :
975 60 : IF (para_env%is_source()) THEN
976 30 : WRITE (iw, *)
977 30 : WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Maximal Error ", em
978 30 : WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Total Error ", et
979 : WRITE (iw, '(A,T67,F14.0)') &
980 30 : " Parallel FFT Tests: Performance [Mflops] ", perf
981 30 : WRITE (iw, '(A,T67,F14.6)') " Best time : ", t_min
982 30 : WRITE (iw, '(A,T67,F14.6)') " Worst time: ", t_max
983 30 : IF (iw > 0) CALL m_flush(iw)
984 : END IF
985 :
986 : ! need debugging ???
987 60 : IF (em > toler .OR. et > toler) THEN
988 0 : CPWARN("The FFT results are not accurate ... starting debug pw_transfer")
989 0 : CALL pw_transfer(ca, cb, .TRUE.)
990 0 : CALL pw_transfer(cb, cc, .TRUE.)
991 : END IF
992 :
993 : ! done with these grids
994 60 : CALL ca%release()
995 60 : CALL cb%release()
996 60 : CALL cc%release()
997 180 : CALL pw_grid_release(grid)
998 :
999 : END DO
1000 :
1001 : ! local arrays
1002 60 : DEALLOCATE (layouts)
1003 60 : DEALLOCATE (t_start)
1004 190 : DEALLOCATE (t_end)
1005 :
1006 : END DO
1007 :
1008 : ! cleanup
1009 10 : CALL cell_release(box)
1010 10 : CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
1011 :
1012 20 : END SUBROUTINE pw_fft_test
1013 :
1014 : ! **************************************************************************************************
1015 : !> \brief Tests the eigensolver library routines
1016 : !> \param para_env ...
1017 : !> \param iw ...
1018 : !> \param eigensolver_section ...
1019 : !> \par History
1020 : !> JGH 6-Feb-2001 : Test and performance code
1021 : !> \author JGH 1-JAN-2001
1022 : ! **************************************************************************************************
1023 2 : SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section)
1024 :
1025 : TYPE(mp_para_env_type), POINTER :: para_env
1026 : INTEGER :: iw
1027 : TYPE(section_vals_type), POINTER :: eigensolver_section
1028 :
1029 : INTEGER :: diag_method, i, i_loop, i_rep, &
1030 : init_method, j, n, n_loop, n_rep, &
1031 : neig, unit_number
1032 : REAL(KIND=dp) :: t1, t2
1033 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigenvalues
1034 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer
1035 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
1036 : TYPE(cp_fm_struct_type), POINTER :: fmstruct
1037 : TYPE(cp_fm_type) :: eigenvectors, matrix, work
1038 2 : TYPE(rng_stream_type), ALLOCATABLE :: rng_stream
1039 :
1040 2 : IF (iw > 0) THEN
1041 1 : WRITE (UNIT=iw, FMT="(/,/,T2,A,/)") "EIGENSOLVER TEST"
1042 : END IF
1043 :
1044 : ! create blacs env corresponding to para_env
1045 2 : NULLIFY (blacs_env)
1046 : CALL cp_blacs_env_create(blacs_env=blacs_env, &
1047 2 : para_env=para_env)
1048 :
1049 : ! loop over all tests
1050 2 : CALL section_vals_get(eigensolver_section, n_repetition=n_rep)
1051 10 : DO i_rep = 1, n_rep
1052 :
1053 : ! parse section
1054 8 : CALL section_vals_val_get(eigensolver_section, "N", i_rep_section=i_rep, i_val=n)
1055 8 : CALL section_vals_val_get(eigensolver_section, "EIGENVALUES", i_rep_section=i_rep, i_val=neig)
1056 8 : CALL section_vals_val_get(eigensolver_section, "DIAG_METHOD", i_rep_section=i_rep, i_val=diag_method)
1057 8 : CALL section_vals_val_get(eigensolver_section, "INIT_METHOD", i_rep_section=i_rep, i_val=init_method)
1058 8 : CALL section_vals_val_get(eigensolver_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
1059 :
1060 : ! proper number of eigs
1061 8 : IF (neig < 0) neig = n
1062 8 : neig = MIN(neig, n)
1063 :
1064 : ! report
1065 8 : IF (iw > 0) THEN
1066 4 : WRITE (iw, *) "Matrix size", n
1067 4 : WRITE (iw, *) "Number of eigenvalues", neig
1068 4 : WRITE (iw, *) "Timing loops", n_loop
1069 2 : SELECT CASE (diag_method)
1070 : CASE (do_diag_syevd)
1071 2 : WRITE (iw, *) "Diag using syevd"
1072 : CASE (do_diag_syevx)
1073 4 : WRITE (iw, *) "Diag using syevx"
1074 : CASE DEFAULT
1075 : ! stop
1076 : END SELECT
1077 :
1078 4 : SELECT CASE (init_method)
1079 : CASE (do_mat_random)
1080 4 : WRITE (iw, *) "using random matrix"
1081 : CASE (do_mat_read)
1082 4 : WRITE (iw, *) "reading from file"
1083 : CASE DEFAULT
1084 : ! stop
1085 : END SELECT
1086 : END IF
1087 :
1088 : ! create matrix struct type
1089 8 : NULLIFY (fmstruct)
1090 : CALL cp_fm_struct_create(fmstruct=fmstruct, &
1091 : para_env=para_env, &
1092 : context=blacs_env, &
1093 : nrow_global=n, &
1094 8 : ncol_global=n)
1095 :
1096 : ! create all needed matrices, and buffers for the eigenvalues
1097 : CALL cp_fm_create(matrix=matrix, &
1098 : matrix_struct=fmstruct, &
1099 8 : name="MATRIX")
1100 8 : CALL cp_fm_set_all(matrix, 0.0_dp)
1101 :
1102 : CALL cp_fm_create(matrix=eigenvectors, &
1103 : matrix_struct=fmstruct, &
1104 8 : name="EIGENVECTORS")
1105 8 : CALL cp_fm_set_all(eigenvectors, 0.0_dp)
1106 :
1107 : CALL cp_fm_create(matrix=work, &
1108 : matrix_struct=fmstruct, &
1109 8 : name="WORK")
1110 8 : CALL cp_fm_set_all(matrix, 0.0_dp)
1111 :
1112 24 : ALLOCATE (eigenvalues(n))
1113 100 : eigenvalues = 0.0_dp
1114 16 : ALLOCATE (buffer(1, n))
1115 :
1116 : ! generate initial matrix, either by reading a file, or using random numbers
1117 8 : IF (para_env%is_source()) THEN
1118 4 : SELECT CASE (init_method)
1119 : CASE (do_mat_random)
1120 : rng_stream = rng_stream_type( &
1121 : name="rng_stream", &
1122 : distribution_type=UNIFORM, &
1123 4 : extended_precision=.TRUE.)
1124 : CASE (do_mat_read)
1125 : CALL open_file(file_name="MATRIX", &
1126 : file_action="READ", &
1127 : file_form="FORMATTED", &
1128 : file_status="OLD", &
1129 4 : unit_number=unit_number)
1130 : END SELECT
1131 : END IF
1132 :
1133 100 : DO i = 1, n
1134 92 : IF (para_env%is_source()) THEN
1135 : SELECT CASE (init_method)
1136 : CASE (do_mat_random)
1137 347 : DO j = i, n
1138 347 : buffer(1, j) = rng_stream%next() - 0.5_dp
1139 : END DO
1140 : !MK activate/modify for a diagonal dominant symmetric matrix:
1141 : !MK buffer(1,i) = 10.0_dp*buffer(1,i)
1142 : CASE (do_mat_read)
1143 46 : READ (UNIT=unit_number, FMT=*) buffer(1, 1:n)
1144 : END SELECT
1145 : END IF
1146 92 : CALL para_env%bcast(buffer)
1147 8 : SELECT CASE (init_method)
1148 : CASE (do_mat_random)
1149 : CALL cp_fm_set_submatrix(fm=matrix, &
1150 : new_values=buffer, &
1151 : start_row=i, &
1152 : start_col=i, &
1153 : n_rows=1, &
1154 : n_cols=n - i + 1, &
1155 : alpha=1.0_dp, &
1156 : beta=0.0_dp, &
1157 92 : transpose=.FALSE.)
1158 : CALL cp_fm_set_submatrix(fm=matrix, &
1159 : new_values=buffer, &
1160 : start_row=i, &
1161 : start_col=i, &
1162 : n_rows=n - i + 1, &
1163 : n_cols=1, &
1164 : alpha=1.0_dp, &
1165 : beta=0.0_dp, &
1166 92 : transpose=.TRUE.)
1167 : CASE (do_mat_read)
1168 : CALL cp_fm_set_submatrix(fm=matrix, &
1169 : new_values=buffer, &
1170 : start_row=i, &
1171 : start_col=1, &
1172 : n_rows=1, &
1173 : n_cols=n, &
1174 : alpha=1.0_dp, &
1175 : beta=0.0_dp, &
1176 92 : transpose=.FALSE.)
1177 : END SELECT
1178 : END DO
1179 :
1180 8 : DEALLOCATE (buffer)
1181 :
1182 8 : IF (para_env%is_source()) THEN
1183 0 : SELECT CASE (init_method)
1184 : CASE (do_mat_read)
1185 4 : CALL close_file(unit_number=unit_number)
1186 : END SELECT
1187 : END IF
1188 :
1189 88 : DO i_loop = 1, n_loop
1190 1000 : eigenvalues = 0.0_dp
1191 80 : CALL cp_fm_set_all(eigenvectors, 0.0_dp)
1192 : CALL cp_fm_to_fm(source=matrix, &
1193 80 : destination=work)
1194 :
1195 : ! DONE, now testing
1196 80 : t1 = m_walltime()
1197 40 : SELECT CASE (diag_method)
1198 : CASE (do_diag_syevd)
1199 : CALL cp_fm_syevd(matrix=work, &
1200 : eigenvectors=eigenvectors, &
1201 40 : eigenvalues=eigenvalues)
1202 : CASE (do_diag_syevx)
1203 : CALL cp_fm_syevx(matrix=work, &
1204 : eigenvectors=eigenvectors, &
1205 : eigenvalues=eigenvalues, &
1206 : neig=neig, &
1207 80 : work_syevx=1.0_dp)
1208 : END SELECT
1209 80 : t2 = m_walltime()
1210 88 : IF (iw > 0) WRITE (iw, *) "Timing for loop ", i_loop, " : ", t2 - t1
1211 : END DO
1212 :
1213 8 : IF (iw > 0) THEN
1214 4 : WRITE (iw, *) "Eigenvalues: "
1215 4 : WRITE (UNIT=iw, FMT="(T3,5F14.6)") eigenvalues(1:neig)
1216 42 : WRITE (UNIT=iw, FMT="(T3,A4,F16.6)") "Sum:", SUM(eigenvalues(1:neig))
1217 4 : WRITE (iw, *) ""
1218 : END IF
1219 :
1220 : ! Clean up
1221 8 : DEALLOCATE (eigenvalues)
1222 8 : CALL cp_fm_release(matrix=work)
1223 8 : CALL cp_fm_release(matrix=eigenvectors)
1224 8 : CALL cp_fm_release(matrix=matrix)
1225 42 : CALL cp_fm_struct_release(fmstruct=fmstruct)
1226 :
1227 : END DO
1228 :
1229 2 : CALL cp_blacs_env_release(blacs_env=blacs_env)
1230 :
1231 4 : END SUBROUTINE eigensolver_test
1232 :
1233 : ! **************************************************************************************************
1234 : !> \brief Tests the parallel matrix multiply
1235 : !> \param para_env ...
1236 : !> \param iw ...
1237 : !> \param cp_fm_gemm_test_section ...
1238 : ! **************************************************************************************************
1239 4 : SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
1240 :
1241 : TYPE(mp_para_env_type), POINTER :: para_env
1242 : INTEGER :: iw
1243 : TYPE(section_vals_type), POINTER :: cp_fm_gemm_test_section
1244 :
1245 : CHARACTER(LEN=1) :: transa, transb
1246 : INTEGER :: i_loop, i_rep, k, m, n, N_loop, n_rep, ncol_block, ncol_block_actual, &
1247 : ncol_global, np, nrow_block, nrow_block_actual, nrow_global
1248 4 : INTEGER, DIMENSION(:), POINTER :: grid_2d
1249 : LOGICAL :: force_blocksize, row_major, transa_p, &
1250 : transb_p
1251 : REAL(KIND=dp) :: t1, t2, t3, t4
1252 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
1253 : TYPE(cp_fm_struct_type), POINTER :: fmstruct_a, fmstruct_b, fmstruct_c
1254 : TYPE(cp_fm_type) :: matrix_a, matrix_b, matrix_c
1255 :
1256 4 : CALL section_vals_get(cp_fm_gemm_test_section, n_repetition=n_rep)
1257 24 : DO i_rep = 1, n_rep
1258 :
1259 : ! how often should we do the multiply
1260 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
1261 :
1262 : ! matrices def.
1263 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "K", i_rep_section=i_rep, i_val=k)
1264 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "N", i_rep_section=i_rep, i_val=n)
1265 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "M", i_rep_section=i_rep, i_val=m)
1266 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1267 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1268 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "nrow_block", i_rep_section=i_rep, i_val=nrow_block)
1269 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "ncol_block", i_rep_section=i_rep, i_val=ncol_block)
1270 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "ROW_MAJOR", i_rep_section=i_rep, l_val=row_major)
1271 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "GRID_2D", i_rep_section=i_rep, i_vals=grid_2d)
1272 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "FORCE_BLOCKSIZE", i_rep_section=i_rep, l_val=force_blocksize)
1273 20 : transa = "N"
1274 20 : transb = "N"
1275 20 : IF (transa_p) transa = "T"
1276 20 : IF (transb_p) transb = "T"
1277 :
1278 20 : IF (iw > 0) THEN
1279 10 : WRITE (iw, '(T2,A)') "----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
1280 10 : WRITE (iw, '(T2,A)', ADVANCE="NO") "C = "
1281 10 : IF (transa_p) THEN
1282 2 : WRITE (iw, '(A)', ADVANCE="NO") "TRANSPOSE(A) x"
1283 : ELSE
1284 8 : WRITE (iw, '(A)', ADVANCE="NO") "A x "
1285 : END IF
1286 10 : IF (transb_p) THEN
1287 2 : WRITE (iw, '(A)') "TRANSPOSE(B) "
1288 : ELSE
1289 8 : WRITE (iw, '(A)') "B "
1290 : END IF
1291 10 : WRITE (iw, '(T2,A,T50,I5,A,I5)') 'requested block size', nrow_block, ' by ', ncol_block
1292 10 : WRITE (iw, '(T2,A,T50,I5)') 'number of repetitions of cp_fm_gemm ', n_loop
1293 10 : WRITE (iw, '(T2,A,T50,L5)') 'Row Major', row_major
1294 30 : WRITE (iw, '(T2,A,T50,2I7)') 'GRID_2D ', grid_2d
1295 10 : WRITE (iw, '(T2,A,T50,L5)') 'Force blocksize ', force_blocksize
1296 : ! check the return value of pilaenv, too small values limit the performance (assuming pdgemm is the vanilla variant)
1297 10 : np = cp_fm_pilaenv(0, 'D')
1298 10 : IF (np > 0) THEN
1299 10 : WRITE (iw, '(T2,A,T50,I5)') 'PILAENV blocksize', np
1300 : END IF
1301 : END IF
1302 :
1303 20 : NULLIFY (blacs_env)
1304 : CALL cp_blacs_env_create(blacs_env=blacs_env, &
1305 : para_env=para_env, &
1306 : row_major=row_major, &
1307 20 : grid_2d=grid_2d)
1308 :
1309 20 : NULLIFY (fmstruct_a)
1310 20 : IF (transa_p) THEN
1311 4 : nrow_global = m; ncol_global = k
1312 : ELSE
1313 16 : nrow_global = k; ncol_global = m
1314 : END IF
1315 : CALL cp_fm_struct_create(fmstruct=fmstruct_a, para_env=para_env, context=blacs_env, &
1316 : nrow_global=nrow_global, ncol_global=ncol_global, &
1317 20 : nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1318 20 : CALL cp_fm_struct_get(fmstruct_a, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1319 30 : IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix A ', nrow_global, " by ", ncol_global, &
1320 20 : ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1321 :
1322 20 : IF (transb_p) THEN
1323 4 : nrow_global = n; ncol_global = m
1324 : ELSE
1325 16 : nrow_global = m; ncol_global = n
1326 : END IF
1327 20 : NULLIFY (fmstruct_b)
1328 : CALL cp_fm_struct_create(fmstruct=fmstruct_b, para_env=para_env, context=blacs_env, &
1329 : nrow_global=nrow_global, ncol_global=ncol_global, &
1330 20 : nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1331 20 : CALL cp_fm_struct_get(fmstruct_b, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1332 30 : IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix B ', nrow_global, " by ", ncol_global, &
1333 20 : ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1334 :
1335 20 : NULLIFY (fmstruct_c)
1336 20 : nrow_global = k
1337 20 : ncol_global = n
1338 : CALL cp_fm_struct_create(fmstruct=fmstruct_c, para_env=para_env, context=blacs_env, &
1339 : nrow_global=nrow_global, ncol_global=ncol_global, &
1340 20 : nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1341 20 : CALL cp_fm_struct_get(fmstruct_c, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1342 30 : IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix C ', nrow_global, " by ", ncol_global, &
1343 20 : ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1344 :
1345 20 : CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name="MATRIX A")
1346 20 : CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name="MATRIX B")
1347 20 : CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name="MATRIX C")
1348 :
1349 20 : CALL RANDOM_NUMBER(matrix_a%local_data)
1350 20 : CALL RANDOM_NUMBER(matrix_b%local_data)
1351 20 : CALL RANDOM_NUMBER(matrix_c%local_data)
1352 :
1353 20 : IF (iw > 0) CALL m_flush(iw)
1354 :
1355 20 : t1 = m_walltime()
1356 932 : DO i_loop = 1, N_loop
1357 912 : t3 = m_walltime()
1358 912 : CALL parallel_gemm(transa, transb, k, n, m, 1.0_dp, matrix_a, matrix_b, 0.0_dp, matrix_c)
1359 912 : t4 = m_walltime()
1360 932 : IF (iw > 0) THEN
1361 456 : WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm timing: ", (t4 - t3)
1362 456 : CALL m_flush(iw)
1363 : END IF
1364 : END DO
1365 20 : t2 = m_walltime()
1366 :
1367 20 : IF (iw > 0) THEN
1368 10 : WRITE (iw, '(T2,A,T50,F12.6)') "average cp_fm_gemm timing: ", (t2 - t1)/N_loop
1369 10 : IF (t2 > t1) THEN
1370 10 : WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm Gflops per MPI task: ", &
1371 20 : 2*REAL(m, kind=dp)*REAL(n, kind=dp)*REAL(k, kind=dp)*N_loop/MAX(0.001_dp, t2 - t1)/1.0E9_dp/para_env%num_pe
1372 : END IF
1373 : END IF
1374 :
1375 20 : CALL cp_fm_release(matrix=matrix_a)
1376 20 : CALL cp_fm_release(matrix=matrix_b)
1377 20 : CALL cp_fm_release(matrix=matrix_c)
1378 20 : CALL cp_fm_struct_release(fmstruct=fmstruct_a)
1379 20 : CALL cp_fm_struct_release(fmstruct=fmstruct_b)
1380 20 : CALL cp_fm_struct_release(fmstruct=fmstruct_c)
1381 144 : CALL cp_blacs_env_release(blacs_env=blacs_env)
1382 :
1383 : END DO
1384 :
1385 4 : END SUBROUTINE cp_fm_gemm_test
1386 :
1387 : ! **************************************************************************************************
1388 : !> \brief Tests the DBCSR interface.
1389 : !> \param para_env ...
1390 : !> \param iw ...
1391 : !> \param input_section ...
1392 : ! **************************************************************************************************
1393 32 : SUBROUTINE cp_dbcsr_tests(para_env, iw, input_section)
1394 :
1395 : TYPE(mp_para_env_type), POINTER :: para_env
1396 : INTEGER :: iw
1397 : TYPE(section_vals_type), POINTER :: input_section
1398 :
1399 : CHARACTER, DIMENSION(3) :: types
1400 : INTEGER :: data_type, i_rep, k, m, n, N_loop, &
1401 : n_rep, test_type
1402 32 : INTEGER, DIMENSION(:), POINTER :: bs_k, bs_m, bs_n, nproc
1403 : LOGICAL :: always_checksum, retain_sparsity, &
1404 : transa_p, transb_p
1405 : REAL(KIND=dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1406 :
1407 : ! ---------------------------------------------------------------------------
1408 :
1409 32 : NULLIFY (bs_m, bs_n, bs_k)
1410 32 : CALL section_vals_get(input_section, n_repetition=n_rep)
1411 32 : CALL dbcsr_reset_randmat_seed()
1412 78 : DO i_rep = 1, n_rep
1413 : ! how often should we do the multiply
1414 46 : CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
1415 :
1416 : ! matrices def.
1417 46 : CALL section_vals_val_get(input_section, "TEST_TYPE", i_rep_section=i_rep, i_val=test_type)
1418 46 : CALL section_vals_val_get(input_section, "DATA_TYPE", i_rep_section=i_rep, i_val=data_type)
1419 46 : CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
1420 46 : CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
1421 46 : CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
1422 46 : CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1423 46 : CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1424 : CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, &
1425 46 : i_vals=bs_m)
1426 : CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, &
1427 46 : i_vals=bs_n)
1428 : CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, &
1429 46 : i_vals=bs_k)
1430 46 : CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1431 46 : CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
1432 46 : CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
1433 46 : CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
1434 46 : CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
1435 46 : CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
1436 : CALL section_vals_val_get(input_section, "nproc", i_rep_section=i_rep, &
1437 46 : i_vals=nproc)
1438 : CALL section_vals_val_get(input_section, "atype", i_rep_section=i_rep, &
1439 46 : c_val=types(1))
1440 : CALL section_vals_val_get(input_section, "btype", i_rep_section=i_rep, &
1441 46 : c_val=types(2))
1442 : CALL section_vals_val_get(input_section, "ctype", i_rep_section=i_rep, &
1443 46 : c_val=types(3))
1444 : CALL section_vals_val_get(input_section, "filter_eps", &
1445 46 : i_rep_section=i_rep, r_val=filter_eps)
1446 46 : CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1447 :
1448 : CALL dbcsr_run_tests(para_env%get_handle(), iw, nproc, &
1449 : [m, n, k], &
1450 : [transa_p, transb_p], &
1451 : bs_m, bs_n, bs_k, &
1452 : [s_a, s_b, s_c], &
1453 : alpha, beta, &
1454 : data_type=data_type, &
1455 : test_type=test_type, &
1456 : n_loops=n_loop, eps=filter_eps, retain_sparsity=retain_sparsity, &
1457 538 : always_checksum=always_checksum)
1458 : END DO
1459 32 : END SUBROUTINE cp_dbcsr_tests
1460 :
1461 : ! **************************************************************************************************
1462 : !> \brief Tests the DBM library.
1463 : !> \param para_env ...
1464 : !> \param iw ...
1465 : !> \param input_section ...
1466 : ! **************************************************************************************************
1467 14 : SUBROUTINE run_dbm_tests(para_env, iw, input_section)
1468 :
1469 : TYPE(mp_para_env_type), POINTER :: para_env
1470 : INTEGER :: iw
1471 : TYPE(section_vals_type), POINTER :: input_section
1472 :
1473 : INTEGER :: i_rep, k, m, n, N_loop, n_rep
1474 14 : INTEGER, DIMENSION(:), POINTER :: bs_k, bs_m, bs_n
1475 : LOGICAL :: always_checksum, retain_sparsity, &
1476 : transa_p, transb_p
1477 : REAL(KIND=dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1478 :
1479 : ! ---------------------------------------------------------------------------
1480 :
1481 14 : NULLIFY (bs_m, bs_n, bs_k)
1482 14 : CALL section_vals_get(input_section, n_repetition=n_rep)
1483 14 : CALL dbcsr_reset_randmat_seed()
1484 28 : DO i_rep = 1, n_rep
1485 14 : CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
1486 14 : CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
1487 14 : CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
1488 14 : CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
1489 14 : CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1490 14 : CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1491 14 : CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, i_vals=bs_m)
1492 14 : CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, i_vals=bs_n)
1493 14 : CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, i_vals=bs_k)
1494 14 : CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1495 14 : CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
1496 14 : CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
1497 14 : CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
1498 14 : CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
1499 14 : CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
1500 14 : CALL section_vals_val_get(input_section, "filter_eps", i_rep_section=i_rep, r_val=filter_eps)
1501 14 : CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1502 :
1503 : CALL dbm_run_tests(mp_group=para_env, &
1504 : io_unit=iw, &
1505 : matrix_sizes=[m, n, k], &
1506 : trs=[transa_p, transb_p], &
1507 : bs_m=bs_m, &
1508 : bs_n=bs_n, &
1509 : bs_k=bs_k, &
1510 : sparsities=[s_a, s_b, s_c], &
1511 : alpha=alpha, &
1512 : beta=beta, &
1513 : n_loops=n_loop, &
1514 : eps=filter_eps, &
1515 : retain_sparsity=retain_sparsity, &
1516 154 : always_checksum=always_checksum)
1517 : END DO
1518 14 : END SUBROUTINE run_dbm_tests
1519 :
1520 8484 : END MODULE library_tests
|