Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Performance tests for basic tasks like matrix multiplies, copy, fft.
10 : !> \par History
11 : !> 30-Nov-2000 (JGH) added input
12 : !> 02-Jan-2001 (JGH) Parallel FFT
13 : !> 28-Feb-2002 (JGH) Clebsch-Gordon Coefficients
14 : !> 06-Jun-2003 (JGH) Real space grid test
15 : !> Eigensolver test (29.08.05,MK)
16 : !> \author JGH 6-NOV-2000
17 : ! **************************************************************************************************
18 : MODULE library_tests
19 :
20 : USE ai_coulomb_test, ONLY: eri_test
21 : USE cell_methods, ONLY: cell_create,&
22 : init_cell
23 : USE cell_types, ONLY: cell_release,&
24 : cell_type
25 : USE cg_test, ONLY: clebsch_gordon_test
26 : USE cp_blacs_env, ONLY: cp_blacs_env_create,&
27 : cp_blacs_env_release,&
28 : cp_blacs_env_type
29 : USE cp_eri_mme_interface, ONLY: cp_eri_mme_perf_acc_test
30 : USE cp_files, ONLY: close_file,&
31 : open_file
32 : USE cp_fm_basic_linalg, ONLY: cp_fm_gemm
33 : USE cp_fm_diag, ONLY: cp_fm_syevd,&
34 : cp_fm_syevx
35 : USE cp_fm_struct, ONLY: cp_fm_struct_create,&
36 : cp_fm_struct_get,&
37 : cp_fm_struct_release,&
38 : cp_fm_struct_type
39 : USE cp_fm_types, ONLY: cp_fm_create,&
40 : cp_fm_pilaenv,&
41 : cp_fm_release,&
42 : cp_fm_set_all,&
43 : cp_fm_set_submatrix,&
44 : cp_fm_to_fm,&
45 : cp_fm_type
46 : USE cp_log_handling, ONLY: cp_get_default_logger,&
47 : cp_logger_type
48 : USE cp_output_handling, ONLY: cp_print_key_finished_output,&
49 : cp_print_key_unit_nr
50 : USE cp_realspace_grid_init, ONLY: init_input_type
51 : USE dbcsr_api, ONLY: dbcsr_reset_randmat_seed,&
52 : dbcsr_run_tests
53 : USE dbm_tests, ONLY: dbm_run_tests
54 : USE fft_tools, ONLY: BWFFT,&
55 : FFT_RADIX_CLOSEST,&
56 : FWFFT,&
57 : fft3d,&
58 : fft_radix_operations,&
59 : finalize_fft,&
60 : init_fft
61 : USE global_types, ONLY: global_environment_type
62 : USE input_constants, ONLY: do_diag_syevd,&
63 : do_diag_syevx,&
64 : do_mat_random,&
65 : do_mat_read,&
66 : do_pwgrid_ns_fullspace,&
67 : do_pwgrid_ns_halfspace,&
68 : do_pwgrid_spherical
69 : USE input_section_types, ONLY: section_vals_get,&
70 : section_vals_get_subs_vals,&
71 : section_vals_type,&
72 : section_vals_val_get
73 : USE kinds, ONLY: dp
74 : USE machine, ONLY: m_flush,&
75 : m_walltime
76 : USE message_passing, ONLY: mp_para_env_type
77 : USE minimax_exp, ONLY: validate_exp_minimax
78 : USE mp2_grids, ONLY: test_least_square_ft
79 : USE mp_perf_test, ONLY: mpi_perf_test
80 : USE parallel_gemm_api, ONLY: parallel_gemm
81 : USE parallel_rng_types, ONLY: UNIFORM,&
82 : rng_stream_type
83 : USE pw_grid_types, ONLY: FULLSPACE,&
84 : HALFSPACE,&
85 : pw_grid_type
86 : USE pw_grids, ONLY: pw_grid_create,&
87 : pw_grid_release,&
88 : pw_grid_setup
89 : USE pw_methods, ONLY: pw_transfer,&
90 : pw_zero
91 : USE pw_types, ONLY: pw_c1d_gs_type,&
92 : pw_c3d_rs_type,&
93 : pw_r3d_rs_type
94 : USE realspace_grid_types, ONLY: &
95 : realspace_grid_desc_type, realspace_grid_input_type, realspace_grid_type, rs_grid_create, &
96 : rs_grid_create_descriptor, rs_grid_print, rs_grid_release, rs_grid_release_descriptor, &
97 : rs_grid_zero, transfer_pw2rs, transfer_rs2pw
98 : USE shg_integrals_test, ONLY: shg_integrals_perf_acc_test
99 : #include "./base/base_uses.f90"
100 :
101 : IMPLICIT NONE
102 :
103 : PRIVATE
104 : PUBLIC :: lib_test
105 :
106 : INTEGER :: runtest(100)
107 : REAL(KIND=dp) :: max_memory
108 : REAL(KIND=dp), PARAMETER :: threshold = 1.0E-8_dp
109 : CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'library_tests'
110 :
111 : CONTAINS
112 :
113 : ! **************************************************************************************************
114 : !> \brief Master routine for tests
115 : !> \param root_section ...
116 : !> \param para_env ...
117 : !> \param globenv ...
118 : !> \par History
119 : !> none
120 : !> \author JGH 6-NOV-2000
121 : ! **************************************************************************************************
122 720 : SUBROUTINE lib_test(root_section, para_env, globenv)
123 :
124 : TYPE(section_vals_type), POINTER :: root_section
125 : TYPE(mp_para_env_type), POINTER :: para_env
126 : TYPE(global_environment_type), POINTER :: globenv
127 :
128 : CHARACTER(LEN=*), PARAMETER :: routineN = 'lib_test'
129 :
130 : INTEGER :: handle, iw
131 : LOGICAL :: explicit
132 : TYPE(cp_logger_type), POINTER :: logger
133 : TYPE(section_vals_type), POINTER :: cp_dbcsr_test_section, cp_fm_gemm_test_section, &
134 : dbm_test_section, eigensolver_section, eri_mme_test_section, pw_transfer_section, &
135 : rs_pw_transfer_section, shg_integrals_test_section
136 :
137 80 : CALL timeset(routineN, handle)
138 :
139 80 : logger => cp_get_default_logger()
140 80 : iw = cp_print_key_unit_nr(logger, root_section, "TEST%PROGRAM_RUN_INFO", extension=".log")
141 :
142 80 : IF (iw > 0) THEN
143 40 : WRITE (iw, '(T2,79("*"))')
144 40 : WRITE (iw, '(A,T31,A,T80,A)') ' *', ' PERFORMANCE TESTS ', '*'
145 40 : WRITE (iw, '(T2,79("*"))')
146 : END IF
147 : !
148 80 : CALL test_input(root_section, para_env)
149 : !
150 80 : IF (runtest(1) /= 0) CALL copy_test(para_env, iw)
151 : !
152 80 : IF (runtest(2) /= 0) CALL matmul_test(para_env, test_matmul=.TRUE., test_dgemm=.FALSE., iw=iw)
153 80 : IF (runtest(5) /= 0) CALL matmul_test(para_env, test_matmul=.FALSE., test_dgemm=.TRUE., iw=iw)
154 : !
155 80 : IF (runtest(3) /= 0) CALL fft_test(para_env, iw, globenv%fftw_plan_type, &
156 2 : globenv%fftw_wisdom_file_name)
157 : !
158 80 : IF (runtest(4) /= 0) CALL eri_test(iw)
159 : !
160 80 : IF (runtest(6) /= 0) CALL clebsch_gordon_test()
161 : !
162 : ! runtest 7 has been deleted and can be recycled
163 : !
164 80 : IF (runtest(8) /= 0) CALL mpi_perf_test(para_env, runtest(8), iw)
165 : !
166 80 : IF (runtest(10) /= 0) CALL validate_exp_minimax(runtest(10), iw)
167 : !
168 80 : IF (runtest(11) /= 0) CALL test_least_square_ft(runtest(11), iw)
169 : !
170 :
171 80 : rs_pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%RS_PW_TRANSFER")
172 80 : CALL section_vals_get(rs_pw_transfer_section, explicit=explicit)
173 80 : IF (explicit) THEN
174 2 : CALL rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
175 : END IF
176 :
177 80 : pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%PW_TRANSFER")
178 80 : CALL section_vals_get(pw_transfer_section, explicit=explicit)
179 80 : IF (explicit) THEN
180 10 : CALL pw_fft_test(para_env, iw, globenv, pw_transfer_section)
181 : END IF
182 :
183 80 : cp_fm_gemm_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_FM_GEMM")
184 80 : CALL section_vals_get(cp_fm_gemm_test_section, explicit=explicit)
185 80 : IF (explicit) THEN
186 4 : CALL cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
187 : END IF
188 :
189 80 : eigensolver_section => section_vals_get_subs_vals(root_section, "TEST%EIGENSOLVER")
190 80 : CALL section_vals_get(eigensolver_section, explicit=explicit)
191 80 : IF (explicit) THEN
192 2 : CALL eigensolver_test(para_env, iw, eigensolver_section)
193 : END IF
194 :
195 80 : eri_mme_test_section => section_vals_get_subs_vals(root_section, "TEST%ERI_MME_TEST")
196 80 : CALL section_vals_get(eri_mme_test_section, explicit=explicit)
197 80 : IF (explicit) THEN
198 8 : CALL cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
199 : END IF
200 :
201 80 : shg_integrals_test_section => section_vals_get_subs_vals(root_section, "TEST%SHG_INTEGRALS_TEST")
202 80 : CALL section_vals_get(shg_integrals_test_section, explicit=explicit)
203 80 : IF (explicit) THEN
204 4 : CALL shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
205 : END IF
206 :
207 : ! DBCSR tests
208 80 : cp_dbcsr_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_DBCSR")
209 80 : CALL section_vals_get(cp_dbcsr_test_section, explicit=explicit)
210 80 : IF (explicit) THEN
211 32 : CALL cp_dbcsr_tests(para_env, iw, cp_dbcsr_test_section)
212 : END IF
213 :
214 : ! DBM tests
215 80 : dbm_test_section => section_vals_get_subs_vals(root_section, "TEST%DBM")
216 80 : CALL section_vals_get(dbm_test_section, explicit=explicit)
217 80 : IF (explicit) THEN
218 14 : CALL run_dbm_tests(para_env, iw, dbm_test_section)
219 : END IF
220 :
221 80 : CALL cp_print_key_finished_output(iw, logger, root_section, "TEST%PROGRAM_RUN_INFO")
222 :
223 80 : CALL timestop(handle)
224 :
225 80 : END SUBROUTINE lib_test
226 :
227 : ! **************************************************************************************************
228 : !> \brief Reads input section &TEST ... &END
229 : !> \param root_section ...
230 : !> \param para_env ...
231 : !> \author JGH 30-NOV-2000
232 : !> \note
233 : !> I---------------------------------------------------------------------------I
234 : !> I SECTION: &TEST ... &END I
235 : !> I I
236 : !> I MEMORY max_memory I
237 : !> I COPY n I
238 : !> I MATMUL n I
239 : !> I FFT n I
240 : !> I ERI n I
241 : !> I PW_FFT n I
242 : !> I Clebsch-Gordon n I
243 : !> I RS_GRIDS n I
244 : !> I MPI n I
245 : !> I RNG n -> Parallel random number generator I
246 : !> I---------------------------------------------------------------------------I
247 : ! **************************************************************************************************
248 80 : SUBROUTINE test_input(root_section, para_env)
249 : TYPE(section_vals_type), POINTER :: root_section
250 : TYPE(mp_para_env_type), POINTER :: para_env
251 :
252 : TYPE(section_vals_type), POINTER :: test_section
253 :
254 : !
255 : !..defaults
256 : ! using this style is not recommended, introduce sections instead (see e.g. cp_fm_gemm)
257 :
258 80 : runtest = 0
259 80 : test_section => section_vals_get_subs_vals(root_section, "TEST")
260 80 : CALL section_vals_val_get(test_section, "MEMORY", r_val=max_memory)
261 80 : CALL section_vals_val_get(test_section, 'COPY', i_val=runtest(1))
262 80 : CALL section_vals_val_get(test_section, 'MATMUL', i_val=runtest(2))
263 80 : CALL section_vals_val_get(test_section, 'DGEMM', i_val=runtest(5))
264 80 : CALL section_vals_val_get(test_section, 'FFT', i_val=runtest(3))
265 80 : CALL section_vals_val_get(test_section, 'ERI', i_val=runtest(4))
266 80 : CALL section_vals_val_get(test_section, 'CLEBSCH_GORDON', i_val=runtest(6))
267 80 : CALL section_vals_val_get(test_section, 'MPI', i_val=runtest(8))
268 80 : CALL section_vals_val_get(test_section, 'MINIMAX', i_val=runtest(10))
269 80 : CALL section_vals_val_get(test_section, 'LEAST_SQ_FT', i_val=runtest(11))
270 :
271 80 : CALL para_env%sync()
272 80 : END SUBROUTINE test_input
273 :
274 : ! **************************************************************************************************
275 : !> \brief Tests the performance to copy two vectors.
276 : !> \param para_env ...
277 : !> \param iw ...
278 : !> \par History
279 : !> none
280 : !> \author JGH 6-NOV-2000
281 : !> \note
282 : !> The results of these tests allow to determine the size of the cache
283 : !> of the CPU. This can be used to optimize the performance of the
284 : !> FFTSG library.
285 : ! **************************************************************************************************
286 2 : SUBROUTINE copy_test(para_env, iw)
287 : TYPE(mp_para_env_type), POINTER :: para_env
288 : INTEGER :: iw
289 :
290 : INTEGER :: i, ierr, j, len, ntim, siz
291 : REAL(KIND=dp) :: perf, t, tend, tstart
292 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: ca, cb
293 :
294 : ! test for copy --> Cache size
295 :
296 2 : siz = ABS(runtest(1))
297 2 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of copy ( F95 ) "
298 34 : DO i = 6, 24
299 34 : len = 2**i
300 34 : IF (8.0_dp*REAL(len, KIND=dp) > max_memory*0.5_dp) EXIT
301 96 : ALLOCATE (ca(len), STAT=ierr)
302 32 : IF (ierr /= 0) EXIT
303 96 : ALLOCATE (cb(len), STAT=ierr)
304 32 : IF (ierr /= 0) EXIT
305 :
306 32 : CALL RANDOM_NUMBER(ca)
307 32 : ntim = NINT(1.e7_dp/REAL(len, KIND=dp))
308 32 : ntim = MAX(ntim, 1)
309 32 : ntim = MIN(ntim, siz*10000)
310 :
311 32 : tstart = m_walltime()
312 512524 : DO j = 1, ntim
313 315058412 : cb(:) = ca(:)
314 512524 : ca(1) = REAL(j, KIND=dp)
315 : END DO
316 32 : tend = m_walltime()
317 32 : t = tend - tstart + threshold
318 32 : IF (t > 0.0_dp) THEN
319 32 : perf = REAL(ntim, KIND=dp)*REAL(len, KIND=dp)*1.e-6_dp/t
320 : ELSE
321 0 : perf = 0.0_dp
322 : END IF
323 :
324 32 : IF (para_env%is_source()) THEN
325 16 : WRITE (iw, '(A,i2,i10,A,T59,F14.4,A)') " Copy test: Size = 2^", i, &
326 32 : len/1024, " Kwords", perf, " Mcopy/s"
327 : END IF
328 :
329 32 : DEALLOCATE (ca)
330 34 : DEALLOCATE (cb)
331 : END DO
332 2 : CALL para_env%sync()
333 2 : END SUBROUTINE copy_test
334 :
335 : ! **************************************************************************************************
336 : !> \brief Tests the performance of different kinds of matrix matrix multiply
337 : !> kernels for the BLAS and F95 intrinsic matmul.
338 : !> \param para_env ...
339 : !> \param test_matmul ...
340 : !> \param test_dgemm ...
341 : !> \param iw ...
342 : !> \par History
343 : !> none
344 : !> \author JGH 6-NOV-2000
345 : ! **************************************************************************************************
346 2 : SUBROUTINE matmul_test(para_env, test_matmul, test_dgemm, iw)
347 : TYPE(mp_para_env_type), POINTER :: para_env
348 : LOGICAL :: test_matmul, test_dgemm
349 : INTEGER :: iw
350 :
351 : INTEGER :: i, ierr, j, len, ntim, siz
352 : REAL(KIND=dp) :: perf, t, tend, tstart, xdum
353 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: ma, mb, mc
354 :
355 : ! test for matrix multpies
356 :
357 2 : IF (test_matmul) THEN
358 2 : siz = ABS(runtest(2))
359 2 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( F95 ) "
360 2 : DO i = 5, siz, 2
361 4 : len = 2**i + 1
362 4 : IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
363 16 : ALLOCATE (ma(len, len), STAT=ierr)
364 4 : IF (ierr /= 0) EXIT
365 16 : ALLOCATE (mb(len, len), STAT=ierr)
366 4 : IF (ierr /= 0) EXIT
367 16 : ALLOCATE (mc(len, len), STAT=ierr)
368 4 : IF (ierr /= 0) EXIT
369 35788 : mc = 0.0_dp
370 :
371 4 : CALL RANDOM_NUMBER(xdum)
372 35788 : ma = xdum
373 4 : CALL RANDOM_NUMBER(xdum)
374 35788 : mb = xdum
375 4 : ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
376 4 : ntim = MAX(ntim, 1)
377 4 : ntim = MIN(ntim, siz*200)
378 4 : tstart = m_walltime()
379 2832 : DO j = 1, ntim
380 2828 : mc(:, :) = MATMUL(ma, mb)
381 2832 : ma(1, 1) = REAL(j, KIND=dp)
382 : END DO
383 4 : tend = m_walltime()
384 4 : t = tend - tstart + threshold
385 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
386 4 : IF (para_env%is_source()) THEN
387 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
388 2 : " Matrix multiply test: c = a * b Size = ", len, perf, " Mflop/s"
389 : END IF
390 4 : tstart = m_walltime()
391 2832 : DO j = 1, ntim
392 3895652 : mc(:, :) = mc + MATMUL(ma, mb)
393 2832 : ma(1, 1) = REAL(j, KIND=dp)
394 : END DO
395 4 : tend = m_walltime()
396 4 : t = tend - tstart + threshold
397 4 : IF (t > 0.0_dp) THEN
398 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
399 : ELSE
400 0 : perf = 0.0_dp
401 : END IF
402 :
403 4 : IF (para_env%is_source()) THEN
404 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
405 2 : " Matrix multiply test: a = a * b Size = ", len, perf, " Mflop/s"
406 : END IF
407 :
408 4 : tstart = m_walltime()
409 2832 : DO j = 1, ntim
410 3895652 : mc(:, :) = mc + MATMUL(ma, TRANSPOSE(mb))
411 2832 : ma(1, 1) = REAL(j, KIND=dp)
412 : END DO
413 4 : tend = m_walltime()
414 4 : t = tend - tstart + threshold
415 4 : IF (t > 0.0_dp) THEN
416 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
417 : ELSE
418 0 : perf = 0.0_dp
419 : END IF
420 :
421 4 : IF (para_env%is_source()) THEN
422 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
423 2 : " Matrix multiply test: c = a * b(T) Size = ", len, perf, " Mflop/s"
424 : END IF
425 :
426 4 : tstart = m_walltime()
427 2832 : DO j = 1, ntim
428 3895652 : mc(:, :) = mc + MATMUL(TRANSPOSE(ma), mb)
429 2832 : ma(1, 1) = REAL(j, KIND=dp)
430 : END DO
431 4 : tend = m_walltime()
432 4 : t = tend - tstart + threshold
433 4 : IF (t > 0.0_dp) THEN
434 4 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
435 : ELSE
436 0 : perf = 0.0_dp
437 : END IF
438 :
439 4 : IF (para_env%is_source()) THEN
440 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
441 2 : " Matrix multiply test: c = a(T) * b Size = ", len, perf, " Mflop/s"
442 : END IF
443 :
444 4 : DEALLOCATE (ma)
445 4 : DEALLOCATE (mb)
446 4 : DEALLOCATE (mc)
447 : END DO
448 : END IF
449 :
450 : ! test for matrix multpies
451 2 : IF (test_dgemm) THEN
452 0 : siz = ABS(runtest(5))
453 0 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( BLAS ) "
454 0 : DO i = 5, siz, 2
455 0 : len = 2**i + 1
456 0 : IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
457 0 : ALLOCATE (ma(len, len), STAT=ierr)
458 0 : IF (ierr /= 0) EXIT
459 0 : ALLOCATE (mb(len, len), STAT=ierr)
460 0 : IF (ierr /= 0) EXIT
461 0 : ALLOCATE (mc(len, len), STAT=ierr)
462 0 : IF (ierr /= 0) EXIT
463 0 : mc = 0.0_dp
464 :
465 0 : CALL RANDOM_NUMBER(xdum)
466 0 : ma = xdum
467 0 : CALL RANDOM_NUMBER(xdum)
468 0 : mb = xdum
469 0 : ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
470 0 : ntim = MAX(ntim, 1)
471 0 : ntim = MIN(ntim, 1000)
472 :
473 0 : tstart = m_walltime()
474 0 : DO j = 1, ntim
475 0 : CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
476 : END DO
477 0 : tend = m_walltime()
478 0 : t = tend - tstart + threshold
479 0 : IF (t > 0.0_dp) THEN
480 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
481 : ELSE
482 0 : perf = 0.0_dp
483 : END IF
484 :
485 0 : IF (para_env%is_source()) THEN
486 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
487 0 : " Matrix multiply test: c = a * b Size = ", len, perf, " Mflop/s"
488 : END IF
489 :
490 0 : tstart = m_walltime()
491 0 : DO j = 1, ntim
492 0 : CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
493 : END DO
494 0 : tend = m_walltime()
495 0 : t = tend - tstart + threshold
496 0 : IF (t > 0.0_dp) THEN
497 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
498 : ELSE
499 0 : perf = 0.0_dp
500 : END IF
501 :
502 0 : IF (para_env%is_source()) THEN
503 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
504 0 : " Matrix multiply test: a = a * b Size = ", len, perf, " Mflop/s"
505 : END IF
506 :
507 0 : tstart = m_walltime()
508 0 : DO j = 1, ntim
509 0 : CALL dgemm("N", "T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
510 : END DO
511 0 : tend = m_walltime()
512 0 : t = tend - tstart + threshold
513 0 : IF (t > 0.0_dp) THEN
514 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
515 : ELSE
516 0 : perf = 0.0_dp
517 : END IF
518 :
519 0 : IF (para_env%is_source()) THEN
520 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
521 0 : " Matrix multiply test: c = a * b(T) Size = ", len, perf, " Mflop/s"
522 : END IF
523 :
524 0 : tstart = m_walltime()
525 0 : DO j = 1, ntim
526 0 : CALL dgemm("T", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
527 : END DO
528 0 : tend = m_walltime()
529 0 : t = tend - tstart + threshold
530 0 : IF (t > 0.0_dp) THEN
531 0 : perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
532 : ELSE
533 0 : perf = 0.0_dp
534 : END IF
535 :
536 0 : IF (para_env%is_source()) THEN
537 : WRITE (iw, '(A,i6,T59,F14.4,A)') &
538 0 : " Matrix multiply test: c = a(T) * b Size = ", len, perf, " Mflop/s"
539 : END IF
540 :
541 0 : DEALLOCATE (ma)
542 0 : DEALLOCATE (mb)
543 0 : DEALLOCATE (mc)
544 : END DO
545 : END IF
546 :
547 2 : CALL para_env%sync()
548 :
549 2 : END SUBROUTINE matmul_test
550 :
551 : ! **************************************************************************************************
552 : !> \brief Tests the performance of all available FFT libraries for 3D FFTs
553 : !> \param para_env ...
554 : !> \param iw ...
555 : !> \param fftw_plan_type ...
556 : !> \param wisdom_file where FFTW3 should look to save/load wisdom
557 : !> \par History
558 : !> none
559 : !> \author JGH 6-NOV-2000
560 : ! **************************************************************************************************
561 2 : SUBROUTINE fft_test(para_env, iw, fftw_plan_type, wisdom_file)
562 :
563 : TYPE(mp_para_env_type), POINTER :: para_env
564 : INTEGER :: iw, fftw_plan_type
565 : CHARACTER(LEN=*), INTENT(IN) :: wisdom_file
566 :
567 : INTEGER, PARAMETER :: ndate(3) = (/12, 48, 96/)
568 :
569 : INTEGER :: iall, ierr, it, j, len, n(3), ntim, &
570 : radix_in, radix_out, siz, stat
571 : COMPLEX(KIND=dp), DIMENSION(4, 4, 4) :: zz
572 2 : COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: ca, cb, cc
573 : CHARACTER(LEN=7) :: method
574 : REAL(KIND=dp) :: flops, perf, scale, t, tdiff, tend, &
575 : tstart
576 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :) :: ra
577 :
578 : ! test for 3d FFT
579 :
580 2 : IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of 3D-FFT "
581 2 : siz = ABS(runtest(3))
582 :
583 8 : DO iall = 1, 100
584 : SELECT CASE (iall)
585 : CASE DEFAULT
586 2 : EXIT
587 : CASE (1)
588 : CALL init_fft("FFTSG", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
589 2 : pool_limit=10, plan_style=fftw_plan_type)
590 2 : method = "FFTSG "
591 : CASE (2)
592 2 : CYCLE
593 : CASE (3)
594 : CALL init_fft("FFTW3", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
595 2 : pool_limit=10, plan_style=fftw_plan_type)
596 10 : method = "FFTW3 "
597 : END SELECT
598 16 : n = 4
599 4 : zz = 0.0_dp
600 4 : CALL fft3d(1, n, zz, status=stat)
601 4 : IF (stat == 0) THEN
602 16 : DO it = 1, 3
603 12 : radix_in = ndate(it)
604 12 : CALL fft_radix_operations(radix_in, radix_out, FFT_RADIX_CLOSEST)
605 12 : len = radix_out
606 48 : n = len
607 12 : IF (16.0_dp*REAL(len*len*len, KIND=dp) > max_memory*0.5_dp) EXIT
608 60 : ALLOCATE (ra(len, len, len), STAT=ierr)
609 60 : ALLOCATE (ca(len, len, len), STAT=ierr)
610 12 : CALL RANDOM_NUMBER(ra)
611 4035516 : ca(:, :, :) = ra
612 12 : CALL RANDOM_NUMBER(ra)
613 4035516 : ca(:, :, :) = ca + CMPLX(0.0_dp, 1.0_dp, KIND=dp)*ra
614 12 : flops = REAL(len**3, KIND=dp)*15.0_dp*LOG(REAL(len, KIND=dp))
615 12 : ntim = NINT(siz*1.e7_dp/flops)
616 12 : ntim = MAX(ntim, 1)
617 12 : ntim = MIN(ntim, 200)
618 12 : scale = 1.0_dp/REAL(len**3, KIND=dp)
619 12 : tstart = m_walltime()
620 884 : DO j = 1, ntim
621 872 : CALL fft3d(FWFFT, n, ca)
622 884 : CALL fft3d(BWFFT, n, ca, SCALE=scale)
623 : END DO
624 12 : tend = m_walltime()
625 12 : t = tend - tstart + threshold
626 12 : IF (t > 0.0_dp) THEN
627 12 : perf = REAL(ntim, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
628 : ELSE
629 0 : perf = 0.0_dp
630 : END IF
631 :
632 12 : IF (para_env%is_source()) THEN
633 : WRITE (iw, '(T2,A,A,i6,T59,F14.4,A)') &
634 6 : ADJUSTR(method), " test (in-place) Size = ", len, perf, " Mflop/s"
635 : END IF
636 12 : DEALLOCATE (ca)
637 16 : DEALLOCATE (ra)
638 : END DO
639 4 : IF (para_env%is_source()) WRITE (iw, *)
640 : ! test if input data is preserved
641 4 : len = 24
642 16 : n = len
643 4 : ALLOCATE (ra(len, len, len))
644 4 : ALLOCATE (ca(len, len, len))
645 4 : ALLOCATE (cb(len, len, len))
646 4 : ALLOCATE (cc(len, len, len))
647 4 : CALL RANDOM_NUMBER(ra)
648 57700 : ca(:, :, :) = ra
649 4 : CALL RANDOM_NUMBER(ra)
650 57700 : ca(:, :, :) = ca + CMPLX(0.0_dp, 1.0_dp, KIND=dp)*ra
651 57700 : cc(:, :, :) = ca
652 4 : CALL fft3d(FWFFT, n, ca, cb)
653 57700 : tdiff = MAXVAL(ABS(ca - cc))
654 4 : IF (tdiff > 1.0E-12_dp) THEN
655 0 : IF (para_env%is_source()) &
656 0 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " FWFFT ", &
657 0 : " Input array is changed in out-of-place FFT !"
658 : ELSE
659 4 : IF (para_env%is_source()) &
660 2 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " FWFFT ", &
661 4 : " Input array is not changed in out-of-place FFT !"
662 : END IF
663 57700 : ca(:, :, :) = cc
664 4 : CALL fft3d(BWFFT, n, ca, cb)
665 57700 : tdiff = MAXVAL(ABS(ca - cc))
666 4 : IF (tdiff > 1.0E-12_dp) THEN
667 0 : IF (para_env%is_source()) &
668 0 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " BWFFT ", &
669 0 : " Input array is changed in out-of-place FFT !"
670 : ELSE
671 4 : IF (para_env%is_source()) &
672 2 : WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), " BWFFT ", &
673 4 : " Input array is not changed in out-of-place FFT !"
674 : END IF
675 4 : IF (para_env%is_source()) WRITE (iw, *)
676 :
677 4 : DEALLOCATE (ra)
678 4 : DEALLOCATE (ca)
679 4 : DEALLOCATE (cb)
680 4 : DEALLOCATE (cc)
681 : END IF
682 4 : CALL finalize_fft(para_env, wisdom_file=wisdom_file)
683 : END DO
684 :
685 2 : END SUBROUTINE fft_test
686 :
687 : ! **************************************************************************************************
688 : !> \brief test rs_pw_transfer performance
689 : !> \param para_env ...
690 : !> \param iw ...
691 : !> \param globenv ...
692 : !> \param rs_pw_transfer_section ...
693 : !> \author Joost VandeVondele
694 : !> 9.2008 Randomise rs grid [Iain Bethune]
695 : !> (c) The Numerical Algorithms Group (NAG) Ltd, 2008 on behalf of the HECToR project
696 : ! **************************************************************************************************
697 2 : SUBROUTINE rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
698 :
699 : TYPE(mp_para_env_type), POINTER :: para_env
700 : INTEGER :: iw
701 : TYPE(global_environment_type), POINTER :: globenv
702 : TYPE(section_vals_type), POINTER :: rs_pw_transfer_section
703 :
704 : CHARACTER(LEN=*), PARAMETER :: routineN = 'rs_pw_transfer_test'
705 :
706 : INTEGER :: halo_size, handle, i_loop, n_loop, ns_max
707 : INTEGER, DIMENSION(3) :: no, np
708 2 : INTEGER, DIMENSION(:), POINTER :: i_vals
709 : LOGICAL :: do_rs2pw
710 : REAL(KIND=dp) :: tend, tstart
711 : TYPE(cell_type), POINTER :: box
712 : TYPE(pw_grid_type), POINTER :: grid
713 : TYPE(pw_r3d_rs_type) :: ca
714 : TYPE(realspace_grid_desc_type), POINTER :: rs_desc
715 : TYPE(realspace_grid_input_type) :: input_settings
716 32 : TYPE(realspace_grid_type) :: rs_grid
717 : TYPE(section_vals_type), POINTER :: rs_grid_section
718 :
719 2 : CALL timeset(routineN, handle)
720 :
721 : !..set fft lib
722 : CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
723 : pool_limit=globenv%fft_pool_scratch_limit, &
724 : wisdom_file=globenv%fftw_wisdom_file_name, &
725 2 : plan_style=globenv%fftw_plan_type)
726 :
727 : ! .. set cell (should otherwise be irrelevant)
728 2 : NULLIFY (box)
729 2 : CALL cell_create(box)
730 : box%hmat = RESHAPE((/20.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 20.0_dp, 0.0_dp, &
731 26 : 0.0_dp, 0.0_dp, 20.0_dp/), (/3, 3/))
732 2 : CALL init_cell(box)
733 :
734 : ! .. grid type and pw_grid
735 2 : NULLIFY (grid)
736 2 : CALL section_vals_val_get(rs_pw_transfer_section, "GRID", i_vals=i_vals)
737 8 : np = i_vals
738 2 : CALL pw_grid_create(grid, para_env)
739 2 : CALL pw_grid_setup(box%hmat, grid, grid_span=FULLSPACE, npts=np, fft_usage=.TRUE., iounit=iw)
740 8 : no = grid%npts
741 :
742 2 : CALL ca%create(grid)
743 2 : CALL pw_zero(ca)
744 :
745 : ! .. rs input setting type
746 2 : CALL section_vals_val_get(rs_pw_transfer_section, "HALO_SIZE", i_val=halo_size)
747 2 : rs_grid_section => section_vals_get_subs_vals(rs_pw_transfer_section, "RS_GRID")
748 2 : ns_max = 2*halo_size + 1
749 2 : CALL init_input_type(input_settings, ns_max, rs_grid_section, 1, (/-1, -1, -1/))
750 :
751 : ! .. rs type
752 2 : NULLIFY (rs_desc)
753 2 : CALL rs_grid_create_descriptor(rs_desc, pw_grid=grid, input_settings=input_settings)
754 2 : CALL rs_grid_create(rs_grid, rs_desc)
755 2 : CALL rs_grid_print(rs_grid, iw)
756 2 : CALL rs_grid_zero(rs_grid)
757 :
758 : ! Put random values on the grid, so summation check will pick up errors
759 2 : CALL RANDOM_NUMBER(rs_grid%r)
760 :
761 2 : CALL section_vals_val_get(rs_pw_transfer_section, "N_loop", i_val=N_loop)
762 2 : CALL section_vals_val_get(rs_pw_transfer_section, "RS2PW", l_val=do_rs2pw)
763 :
764 : ! go for the real loops, sync to get max timings
765 2 : IF (para_env%is_source()) THEN
766 1 : WRITE (iw, '(T2,A)') ""
767 1 : WRITE (iw, '(T2,A)') "Timing rs_pw_transfer routine"
768 1 : WRITE (iw, '(T2,A)') ""
769 1 : WRITE (iw, '(T2,A)') "iteration time[s]"
770 : END IF
771 8 : DO i_loop = 1, N_loop
772 6 : CALL para_env%sync()
773 6 : tstart = m_walltime()
774 6 : IF (do_rs2pw) THEN
775 6 : CALL transfer_rs2pw(rs_grid, ca)
776 : ELSE
777 0 : CALL transfer_pw2rs(rs_grid, ca)
778 : END IF
779 6 : CALL para_env%sync()
780 6 : tend = m_walltime()
781 8 : IF (para_env%is_source()) THEN
782 3 : WRITE (iw, '(T2,I9,1X,F12.6)') i_loop, tend - tstart
783 : END IF
784 : END DO
785 :
786 : !cleanup
787 2 : CALL rs_grid_release(rs_grid)
788 2 : CALL rs_grid_release_descriptor(rs_desc)
789 2 : CALL ca%release()
790 2 : CALL pw_grid_release(grid)
791 2 : CALL cell_release(box)
792 2 : CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
793 :
794 2 : CALL timestop(handle)
795 :
796 10 : END SUBROUTINE rs_pw_transfer_test
797 :
798 : ! **************************************************************************************************
799 : !> \brief Tests the performance of PW calls to FFT routines
800 : !> \param para_env ...
801 : !> \param iw ...
802 : !> \param globenv ...
803 : !> \param pw_transfer_section ...
804 : !> \par History
805 : !> JGH 6-Feb-2001 : Test and performance code
806 : !> Made input sensitive [Joost VandeVondele]
807 : !> \author JGH 1-JAN-2001
808 : ! **************************************************************************************************
809 10 : SUBROUTINE pw_fft_test(para_env, iw, globenv, pw_transfer_section)
810 :
811 : TYPE(mp_para_env_type), POINTER :: para_env
812 : INTEGER :: iw
813 : TYPE(global_environment_type), POINTER :: globenv
814 : TYPE(section_vals_type), POINTER :: pw_transfer_section
815 :
816 : REAL(KIND=dp), PARAMETER :: toler = 1.e-11_dp
817 :
818 : INTEGER :: blocked_id, grid_span, i_layout, i_rep, &
819 : ig, ip, itmp, n_loop, n_rep, nn, p, q
820 10 : INTEGER, ALLOCATABLE, DIMENSION(:, :) :: layouts
821 : INTEGER, DIMENSION(2) :: distribution_layout
822 : INTEGER, DIMENSION(3) :: no, np
823 10 : INTEGER, DIMENSION(:), POINTER :: i_vals
824 : LOGICAL :: debug, is_fullspace, odd, &
825 : pw_grid_layout_all, spherical
826 : REAL(KIND=dp) :: em, et, flops, gsq, perf, t, t_max, &
827 : t_min, tend, tstart
828 10 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: t_end, t_start
829 : TYPE(cell_type), POINTER :: box
830 : TYPE(pw_c1d_gs_type) :: ca, cc
831 : TYPE(pw_c3d_rs_type) :: cb
832 : TYPE(pw_grid_type), POINTER :: grid
833 :
834 : !..set fft lib
835 :
836 : CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
837 : pool_limit=globenv%fft_pool_scratch_limit, &
838 : wisdom_file=globenv%fftw_wisdom_file_name, &
839 10 : plan_style=globenv%fftw_plan_type)
840 :
841 : !..the unit cell (should not really matter, the number of grid points do)
842 10 : NULLIFY (box, grid)
843 10 : CALL cell_create(box)
844 : box%hmat = RESHAPE((/10.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 8.0_dp, 0.0_dp, &
845 130 : 0.0_dp, 0.0_dp, 7.0_dp/), (/3, 3/))
846 10 : CALL init_cell(box)
847 :
848 10 : CALL section_vals_get(pw_transfer_section, n_repetition=n_rep)
849 70 : DO i_rep = 1, n_rep
850 :
851 : ! how often should we do the transfer
852 60 : CALL section_vals_val_get(pw_transfer_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
853 180 : ALLOCATE (t_start(N_loop))
854 180 : ALLOCATE (t_end(N_loop))
855 :
856 : ! setup of the grids
857 60 : CALL section_vals_val_get(pw_transfer_section, "GRID", i_rep_section=i_rep, i_vals=i_vals)
858 240 : np = i_vals
859 :
860 60 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID_BLOCKED", i_rep_section=i_rep, i_val=blocked_id)
861 60 : CALL section_vals_val_get(pw_transfer_section, "DEBUG", i_rep_section=i_rep, l_val=debug)
862 :
863 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT_ALL", i_rep_section=i_rep, &
864 60 : l_val=pw_grid_layout_all)
865 :
866 : ! prepare to loop over all or a specific layout
867 60 : IF (pw_grid_layout_all) THEN
868 : ! count layouts that fit
869 8 : itmp = 0
870 : ! start from 2, (/1,para_env%num_pe/) is not supported
871 16 : DO p = 2, para_env%num_pe
872 8 : q = para_env%num_pe/p
873 16 : IF (p*q == para_env%num_pe) THEN
874 8 : itmp = itmp + 1
875 : END IF
876 : END DO
877 : ! build list
878 24 : ALLOCATE (layouts(2, itmp))
879 8 : itmp = 0
880 16 : DO p = 2, para_env%num_pe
881 8 : q = para_env%num_pe/p
882 16 : IF (p*q == para_env%num_pe) THEN
883 8 : itmp = itmp + 1
884 24 : layouts(:, itmp) = (/p, q/)
885 : END IF
886 : END DO
887 : ELSE
888 52 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT", i_rep_section=i_rep, i_vals=i_vals)
889 52 : ALLOCATE (layouts(2, 1))
890 156 : layouts(:, 1) = i_vals
891 : END IF
892 :
893 120 : DO i_layout = 1, SIZE(layouts, 2)
894 :
895 180 : distribution_layout = layouts(:, i_layout)
896 :
897 60 : CALL pw_grid_create(grid, para_env)
898 :
899 60 : CALL section_vals_val_get(pw_transfer_section, "PW_GRID", i_rep_section=i_rep, i_val=itmp)
900 :
901 : ! from cp_control_utils
902 16 : SELECT CASE (itmp)
903 : CASE (do_pwgrid_spherical)
904 16 : spherical = .TRUE.
905 16 : is_fullspace = .FALSE.
906 : CASE (do_pwgrid_ns_fullspace)
907 28 : spherical = .FALSE.
908 28 : is_fullspace = .TRUE.
909 : CASE (do_pwgrid_ns_halfspace)
910 16 : spherical = .FALSE.
911 60 : is_fullspace = .FALSE.
912 : END SELECT
913 :
914 : ! from pw_env_methods
915 60 : IF (spherical) THEN
916 16 : grid_span = HALFSPACE
917 16 : spherical = .TRUE.
918 16 : odd = .TRUE.
919 44 : ELSE IF (is_fullspace) THEN
920 28 : grid_span = FULLSPACE
921 28 : spherical = .FALSE.
922 28 : odd = .FALSE.
923 : ELSE
924 16 : grid_span = HALFSPACE
925 16 : spherical = .FALSE.
926 16 : odd = .TRUE.
927 : END IF
928 :
929 : ! actual setup
930 : CALL pw_grid_setup(box%hmat, grid, grid_span=grid_span, odd=odd, spherical=spherical, &
931 : blocked=blocked_id, npts=np, fft_usage=.TRUE., &
932 60 : rs_dims=distribution_layout, iounit=iw)
933 :
934 60 : IF (iw > 0) CALL m_flush(iw)
935 :
936 : ! note that the number of grid points might be different from what the user requested (fft-able needed)
937 240 : no = grid%npts
938 :
939 60 : CALL ca%create(grid)
940 60 : CALL cb%create(grid)
941 60 : CALL cc%create(grid)
942 :
943 : ! initialize data
944 60 : CALL pw_zero(ca)
945 60 : CALL pw_zero(cb)
946 60 : CALL pw_zero(cc)
947 60 : nn = SIZE(ca%array)
948 142042 : DO ig = 1, nn
949 141982 : gsq = grid%gsq(ig)
950 142042 : ca%array(ig) = EXP(-gsq)
951 : END DO
952 :
953 420 : flops = PRODUCT(no)*30.0_dp*LOG(REAL(MAXVAL(no), KIND=dp))
954 60 : tstart = m_walltime()
955 596 : DO ip = 1, n_loop
956 536 : CALL para_env%sync()
957 536 : t_start(ip) = m_walltime()
958 536 : CALL pw_transfer(ca, cb, debug)
959 536 : CALL pw_transfer(cb, cc, debug)
960 536 : CALL para_env%sync()
961 596 : t_end(ip) = m_walltime()
962 : END DO
963 60 : tend = m_walltime()
964 60 : t = tend - tstart + threshold
965 60 : IF (t > 0.0_dp) THEN
966 60 : perf = REAL(n_loop, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
967 : ELSE
968 0 : perf = 0.0_dp
969 : END IF
970 :
971 142102 : em = MAXVAL(ABS(ca%array(:) - cc%array(:)))
972 60 : CALL para_env%max(em)
973 142042 : et = SUM(ABS(ca%array(:) - cc%array(:)))
974 60 : CALL para_env%sum(et)
975 656 : t_min = MINVAL(t_end - t_start)
976 656 : t_max = MAXVAL(t_end - t_start)
977 :
978 60 : IF (para_env%is_source()) THEN
979 30 : WRITE (iw, *)
980 30 : WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Maximal Error ", em
981 30 : WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Total Error ", et
982 : WRITE (iw, '(A,T67,F14.0)') &
983 30 : " Parallel FFT Tests: Performance [Mflops] ", perf
984 30 : WRITE (iw, '(A,T67,F14.6)') " Best time : ", t_min
985 30 : WRITE (iw, '(A,T67,F14.6)') " Worst time: ", t_max
986 30 : IF (iw > 0) CALL m_flush(iw)
987 : END IF
988 :
989 : ! need debugging ???
990 60 : IF (em > toler .OR. et > toler) THEN
991 0 : CPWARN("The FFT results are not accurate ... starting debug pw_transfer")
992 0 : CALL pw_transfer(ca, cb, .TRUE.)
993 0 : CALL pw_transfer(cb, cc, .TRUE.)
994 : END IF
995 :
996 : ! done with these grids
997 60 : CALL ca%release()
998 60 : CALL cb%release()
999 60 : CALL cc%release()
1000 180 : CALL pw_grid_release(grid)
1001 :
1002 : END DO
1003 :
1004 : ! local arrays
1005 60 : DEALLOCATE (layouts)
1006 60 : DEALLOCATE (t_start)
1007 190 : DEALLOCATE (t_end)
1008 :
1009 : END DO
1010 :
1011 : ! cleanup
1012 10 : CALL cell_release(box)
1013 10 : CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
1014 :
1015 20 : END SUBROUTINE pw_fft_test
1016 :
1017 : ! **************************************************************************************************
1018 : !> \brief Tests the eigensolver library routines
1019 : !> \param para_env ...
1020 : !> \param iw ...
1021 : !> \param eigensolver_section ...
1022 : !> \par History
1023 : !> JGH 6-Feb-2001 : Test and performance code
1024 : !> \author JGH 1-JAN-2001
1025 : ! **************************************************************************************************
1026 2 : SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section)
1027 :
1028 : TYPE(mp_para_env_type), POINTER :: para_env
1029 : INTEGER :: iw
1030 : TYPE(section_vals_type), POINTER :: eigensolver_section
1031 :
1032 : INTEGER :: diag_method, i, i_loop, i_rep, &
1033 : init_method, j, n, n_loop, n_rep, &
1034 : neig, unit_number
1035 : REAL(KIND=dp) :: t1, t2
1036 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: eigenvalues
1037 2 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :) :: buffer
1038 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
1039 : TYPE(cp_fm_struct_type), POINTER :: fmstruct
1040 : TYPE(cp_fm_type) :: eigenvectors, matrix, work
1041 2 : TYPE(rng_stream_type), ALLOCATABLE :: rng_stream
1042 :
1043 2 : IF (iw > 0) THEN
1044 1 : WRITE (UNIT=iw, FMT="(/,/,T2,A,/)") "EIGENSOLVER TEST"
1045 : END IF
1046 :
1047 : ! create blacs env corresponding to para_env
1048 2 : NULLIFY (blacs_env)
1049 : CALL cp_blacs_env_create(blacs_env=blacs_env, &
1050 2 : para_env=para_env)
1051 :
1052 : ! loop over all tests
1053 2 : CALL section_vals_get(eigensolver_section, n_repetition=n_rep)
1054 10 : DO i_rep = 1, n_rep
1055 :
1056 : ! parse section
1057 8 : CALL section_vals_val_get(eigensolver_section, "N", i_rep_section=i_rep, i_val=n)
1058 8 : CALL section_vals_val_get(eigensolver_section, "EIGENVALUES", i_rep_section=i_rep, i_val=neig)
1059 8 : CALL section_vals_val_get(eigensolver_section, "DIAG_METHOD", i_rep_section=i_rep, i_val=diag_method)
1060 8 : CALL section_vals_val_get(eigensolver_section, "INIT_METHOD", i_rep_section=i_rep, i_val=init_method)
1061 8 : CALL section_vals_val_get(eigensolver_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
1062 :
1063 : ! proper number of eigs
1064 8 : IF (neig < 0) neig = n
1065 8 : neig = MIN(neig, n)
1066 :
1067 : ! report
1068 8 : IF (iw > 0) THEN
1069 4 : WRITE (iw, *) "Matrix size", n
1070 4 : WRITE (iw, *) "Number of eigenvalues", neig
1071 4 : WRITE (iw, *) "Timing loops", n_loop
1072 2 : SELECT CASE (diag_method)
1073 : CASE (do_diag_syevd)
1074 2 : WRITE (iw, *) "Diag using syevd"
1075 : CASE (do_diag_syevx)
1076 4 : WRITE (iw, *) "Diag using syevx"
1077 : CASE DEFAULT
1078 : ! stop
1079 : END SELECT
1080 :
1081 4 : SELECT CASE (init_method)
1082 : CASE (do_mat_random)
1083 4 : WRITE (iw, *) "using random matrix"
1084 : CASE (do_mat_read)
1085 4 : WRITE (iw, *) "reading from file"
1086 : CASE DEFAULT
1087 : ! stop
1088 : END SELECT
1089 : END IF
1090 :
1091 : ! create matrix struct type
1092 8 : NULLIFY (fmstruct)
1093 : CALL cp_fm_struct_create(fmstruct=fmstruct, &
1094 : para_env=para_env, &
1095 : context=blacs_env, &
1096 : nrow_global=n, &
1097 8 : ncol_global=n)
1098 :
1099 : ! create all needed matrices, and buffers for the eigenvalues
1100 : CALL cp_fm_create(matrix=matrix, &
1101 : matrix_struct=fmstruct, &
1102 8 : name="MATRIX")
1103 8 : CALL cp_fm_set_all(matrix, 0.0_dp)
1104 :
1105 : CALL cp_fm_create(matrix=eigenvectors, &
1106 : matrix_struct=fmstruct, &
1107 8 : name="EIGENVECTORS")
1108 8 : CALL cp_fm_set_all(eigenvectors, 0.0_dp)
1109 :
1110 : CALL cp_fm_create(matrix=work, &
1111 : matrix_struct=fmstruct, &
1112 8 : name="WORK")
1113 8 : CALL cp_fm_set_all(matrix, 0.0_dp)
1114 :
1115 24 : ALLOCATE (eigenvalues(n))
1116 100 : eigenvalues = 0.0_dp
1117 24 : ALLOCATE (buffer(1, n))
1118 :
1119 : ! generate initial matrix, either by reading a file, or using random numbers
1120 8 : IF (para_env%is_source()) THEN
1121 4 : SELECT CASE (init_method)
1122 : CASE (do_mat_random)
1123 : rng_stream = rng_stream_type( &
1124 : name="rng_stream", &
1125 : distribution_type=UNIFORM, &
1126 4 : extended_precision=.TRUE.)
1127 : CASE (do_mat_read)
1128 : CALL open_file(file_name="MATRIX", &
1129 : file_action="READ", &
1130 : file_form="FORMATTED", &
1131 : file_status="OLD", &
1132 4 : unit_number=unit_number)
1133 : END SELECT
1134 : END IF
1135 :
1136 100 : DO i = 1, n
1137 92 : IF (para_env%is_source()) THEN
1138 : SELECT CASE (init_method)
1139 : CASE (do_mat_random)
1140 347 : DO j = i, n
1141 347 : buffer(1, j) = rng_stream%next() - 0.5_dp
1142 : END DO
1143 : !MK activate/modify for a diagonal dominant symmetric matrix:
1144 : !MK buffer(1,i) = 10.0_dp*buffer(1,i)
1145 : CASE (do_mat_read)
1146 46 : READ (UNIT=unit_number, FMT=*) buffer(1, 1:n)
1147 : END SELECT
1148 : END IF
1149 92 : CALL para_env%bcast(buffer)
1150 8 : SELECT CASE (init_method)
1151 : CASE (do_mat_random)
1152 : CALL cp_fm_set_submatrix(fm=matrix, &
1153 : new_values=buffer, &
1154 : start_row=i, &
1155 : start_col=i, &
1156 : n_rows=1, &
1157 : n_cols=n - i + 1, &
1158 : alpha=1.0_dp, &
1159 : beta=0.0_dp, &
1160 92 : transpose=.FALSE.)
1161 : CALL cp_fm_set_submatrix(fm=matrix, &
1162 : new_values=buffer, &
1163 : start_row=i, &
1164 : start_col=i, &
1165 : n_rows=n - i + 1, &
1166 : n_cols=1, &
1167 : alpha=1.0_dp, &
1168 : beta=0.0_dp, &
1169 92 : transpose=.TRUE.)
1170 : CASE (do_mat_read)
1171 : CALL cp_fm_set_submatrix(fm=matrix, &
1172 : new_values=buffer, &
1173 : start_row=i, &
1174 : start_col=1, &
1175 : n_rows=1, &
1176 : n_cols=n, &
1177 : alpha=1.0_dp, &
1178 : beta=0.0_dp, &
1179 92 : transpose=.FALSE.)
1180 : END SELECT
1181 : END DO
1182 :
1183 8 : DEALLOCATE (buffer)
1184 :
1185 8 : IF (para_env%is_source()) THEN
1186 0 : SELECT CASE (init_method)
1187 : CASE (do_mat_read)
1188 4 : CALL close_file(unit_number=unit_number)
1189 : END SELECT
1190 : END IF
1191 :
1192 88 : DO i_loop = 1, n_loop
1193 1000 : eigenvalues = 0.0_dp
1194 80 : CALL cp_fm_set_all(eigenvectors, 0.0_dp)
1195 : CALL cp_fm_to_fm(source=matrix, &
1196 80 : destination=work)
1197 :
1198 : ! DONE, now testing
1199 80 : t1 = m_walltime()
1200 40 : SELECT CASE (diag_method)
1201 : CASE (do_diag_syevd)
1202 : CALL cp_fm_syevd(matrix=work, &
1203 : eigenvectors=eigenvectors, &
1204 40 : eigenvalues=eigenvalues)
1205 : CASE (do_diag_syevx)
1206 : CALL cp_fm_syevx(matrix=work, &
1207 : eigenvectors=eigenvectors, &
1208 : eigenvalues=eigenvalues, &
1209 : neig=neig, &
1210 80 : work_syevx=1.0_dp)
1211 : END SELECT
1212 80 : t2 = m_walltime()
1213 88 : IF (iw > 0) WRITE (iw, *) "Timing for loop ", i_loop, " : ", t2 - t1
1214 : END DO
1215 :
1216 8 : IF (iw > 0) THEN
1217 4 : WRITE (iw, *) "Eigenvalues: "
1218 4 : WRITE (UNIT=iw, FMT="(T3,5F14.6)") eigenvalues(1:neig)
1219 42 : WRITE (UNIT=iw, FMT="(T3,A4,F16.6)") "Sum:", SUM(eigenvalues(1:neig))
1220 4 : WRITE (iw, *) ""
1221 : END IF
1222 :
1223 : ! Clean up
1224 8 : DEALLOCATE (eigenvalues)
1225 8 : CALL cp_fm_release(matrix=work)
1226 8 : CALL cp_fm_release(matrix=eigenvectors)
1227 8 : CALL cp_fm_release(matrix=matrix)
1228 18 : CALL cp_fm_struct_release(fmstruct=fmstruct)
1229 :
1230 : END DO
1231 :
1232 2 : CALL cp_blacs_env_release(blacs_env=blacs_env)
1233 :
1234 4 : END SUBROUTINE eigensolver_test
1235 :
1236 : ! **************************************************************************************************
1237 : !> \brief Tests the parallel matrix multiply
1238 : !> \param para_env ...
1239 : !> \param iw ...
1240 : !> \param cp_fm_gemm_test_section ...
1241 : ! **************************************************************************************************
1242 4 : SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
1243 :
1244 : TYPE(mp_para_env_type), POINTER :: para_env
1245 : INTEGER :: iw
1246 : TYPE(section_vals_type), POINTER :: cp_fm_gemm_test_section
1247 :
1248 : CHARACTER(LEN=1) :: transa, transb
1249 : INTEGER :: i_loop, i_rep, k, m, n, N_loop, n_rep, ncol_block, ncol_block_actual, &
1250 : ncol_global, np, nrow_block, nrow_block_actual, nrow_global
1251 4 : INTEGER, DIMENSION(:), POINTER :: grid_2d
1252 : LOGICAL :: force_blocksize, row_major, transa_p, &
1253 : transb_p
1254 : REAL(KIND=dp) :: t1, t2, t3, t4
1255 : TYPE(cp_blacs_env_type), POINTER :: blacs_env
1256 : TYPE(cp_fm_struct_type), POINTER :: fmstruct_a, fmstruct_b, fmstruct_c
1257 : TYPE(cp_fm_type) :: matrix_a, matrix_b, matrix_c
1258 :
1259 4 : CALL section_vals_get(cp_fm_gemm_test_section, n_repetition=n_rep)
1260 24 : DO i_rep = 1, n_rep
1261 :
1262 : ! how often should we do the multiply
1263 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
1264 :
1265 : ! matrices def.
1266 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "K", i_rep_section=i_rep, i_val=k)
1267 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "N", i_rep_section=i_rep, i_val=n)
1268 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "M", i_rep_section=i_rep, i_val=m)
1269 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1270 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1271 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "nrow_block", i_rep_section=i_rep, i_val=nrow_block)
1272 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "ncol_block", i_rep_section=i_rep, i_val=ncol_block)
1273 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "ROW_MAJOR", i_rep_section=i_rep, l_val=row_major)
1274 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "GRID_2D", i_rep_section=i_rep, i_vals=grid_2d)
1275 20 : CALL section_vals_val_get(cp_fm_gemm_test_section, "FORCE_BLOCKSIZE", i_rep_section=i_rep, l_val=force_blocksize)
1276 20 : transa = "N"
1277 20 : transb = "N"
1278 20 : IF (transa_p) transa = "T"
1279 20 : IF (transb_p) transb = "T"
1280 :
1281 20 : IF (iw > 0) THEN
1282 10 : WRITE (iw, '(T2,A)') "----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
1283 10 : WRITE (iw, '(T2,A)', ADVANCE="NO") "C = "
1284 10 : IF (transa_p) THEN
1285 2 : WRITE (iw, '(A)', ADVANCE="NO") "TRANSPOSE(A) x"
1286 : ELSE
1287 8 : WRITE (iw, '(A)', ADVANCE="NO") "A x "
1288 : END IF
1289 10 : IF (transb_p) THEN
1290 2 : WRITE (iw, '(A)') "TRANSPOSE(B) "
1291 : ELSE
1292 8 : WRITE (iw, '(A)') "B "
1293 : END IF
1294 10 : WRITE (iw, '(T2,A,T50,I5,A,I5)') 'requested block size', nrow_block, ' by ', ncol_block
1295 10 : WRITE (iw, '(T2,A,T50,I5)') 'number of repetitions of cp_fm_gemm ', n_loop
1296 10 : WRITE (iw, '(T2,A,T50,L5)') 'Row Major', row_major
1297 30 : WRITE (iw, '(T2,A,T50,2I7)') 'GRID_2D ', grid_2d
1298 10 : WRITE (iw, '(T2,A,T50,L5)') 'Force blocksize ', force_blocksize
1299 : ! check the return value of pilaenv, too small values limit the performance (assuming pdgemm is the vanilla variant)
1300 10 : np = cp_fm_pilaenv(0, 'D')
1301 10 : IF (np > 0) THEN
1302 10 : WRITE (iw, '(T2,A,T50,I5)') 'PILAENV blocksize', np
1303 : END IF
1304 : END IF
1305 :
1306 20 : NULLIFY (blacs_env)
1307 : CALL cp_blacs_env_create(blacs_env=blacs_env, &
1308 : para_env=para_env, &
1309 : row_major=row_major, &
1310 20 : grid_2d=grid_2d)
1311 :
1312 20 : NULLIFY (fmstruct_a)
1313 20 : IF (transa_p) THEN
1314 4 : nrow_global = m; ncol_global = k
1315 : ELSE
1316 16 : nrow_global = k; ncol_global = m
1317 : END IF
1318 : CALL cp_fm_struct_create(fmstruct=fmstruct_a, para_env=para_env, context=blacs_env, &
1319 : nrow_global=nrow_global, ncol_global=ncol_global, &
1320 20 : nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1321 20 : CALL cp_fm_struct_get(fmstruct_a, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1322 30 : IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix A ', nrow_global, " by ", ncol_global, &
1323 20 : ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1324 :
1325 20 : IF (transb_p) THEN
1326 4 : nrow_global = n; ncol_global = m
1327 : ELSE
1328 16 : nrow_global = m; ncol_global = n
1329 : END IF
1330 20 : NULLIFY (fmstruct_b)
1331 : CALL cp_fm_struct_create(fmstruct=fmstruct_b, para_env=para_env, context=blacs_env, &
1332 : nrow_global=nrow_global, ncol_global=ncol_global, &
1333 20 : nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1334 20 : CALL cp_fm_struct_get(fmstruct_b, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1335 30 : IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix B ', nrow_global, " by ", ncol_global, &
1336 20 : ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1337 :
1338 20 : NULLIFY (fmstruct_c)
1339 20 : nrow_global = k
1340 20 : ncol_global = n
1341 : CALL cp_fm_struct_create(fmstruct=fmstruct_c, para_env=para_env, context=blacs_env, &
1342 : nrow_global=nrow_global, ncol_global=ncol_global, &
1343 20 : nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
1344 20 : CALL cp_fm_struct_get(fmstruct_c, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
1345 30 : IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix C ', nrow_global, " by ", ncol_global, &
1346 20 : ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
1347 :
1348 20 : CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name="MATRIX A")
1349 20 : CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name="MATRIX B")
1350 20 : CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name="MATRIX C")
1351 :
1352 20 : CALL RANDOM_NUMBER(matrix_a%local_data)
1353 20 : CALL RANDOM_NUMBER(matrix_b%local_data)
1354 20 : CALL RANDOM_NUMBER(matrix_c%local_data)
1355 :
1356 20 : IF (iw > 0) CALL m_flush(iw)
1357 :
1358 20 : t1 = m_walltime()
1359 932 : DO i_loop = 1, N_loop
1360 912 : t3 = m_walltime()
1361 912 : CALL parallel_gemm(transa, transb, k, n, m, 1.0_dp, matrix_a, matrix_b, 0.0_dp, matrix_c)
1362 912 : t4 = m_walltime()
1363 932 : IF (iw > 0) THEN
1364 456 : WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm timing: ", (t4 - t3)
1365 456 : CALL m_flush(iw)
1366 : END IF
1367 : END DO
1368 20 : t2 = m_walltime()
1369 :
1370 20 : IF (iw > 0) THEN
1371 10 : WRITE (iw, '(T2,A,T50,F12.6)') "average cp_fm_gemm timing: ", (t2 - t1)/N_loop
1372 10 : IF (t2 > t1) THEN
1373 10 : WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm Gflops per MPI task: ", &
1374 20 : 2*REAL(m, kind=dp)*REAL(n, kind=dp)*REAL(k, kind=dp)*N_loop/MAX(0.001_dp, t2 - t1)/1.0E9_dp/para_env%num_pe
1375 : END IF
1376 : END IF
1377 :
1378 20 : CALL cp_fm_release(matrix=matrix_a)
1379 20 : CALL cp_fm_release(matrix=matrix_b)
1380 20 : CALL cp_fm_release(matrix=matrix_c)
1381 20 : CALL cp_fm_struct_release(fmstruct=fmstruct_a)
1382 20 : CALL cp_fm_struct_release(fmstruct=fmstruct_b)
1383 20 : CALL cp_fm_struct_release(fmstruct=fmstruct_c)
1384 124 : CALL cp_blacs_env_release(blacs_env=blacs_env)
1385 :
1386 : END DO
1387 :
1388 4 : END SUBROUTINE cp_fm_gemm_test
1389 :
1390 : ! **************************************************************************************************
1391 : !> \brief Tests the DBCSR interface.
1392 : !> \param para_env ...
1393 : !> \param iw ...
1394 : !> \param input_section ...
1395 : ! **************************************************************************************************
1396 32 : SUBROUTINE cp_dbcsr_tests(para_env, iw, input_section)
1397 :
1398 : TYPE(mp_para_env_type), POINTER :: para_env
1399 : INTEGER :: iw
1400 : TYPE(section_vals_type), POINTER :: input_section
1401 :
1402 : CHARACTER, DIMENSION(3) :: types
1403 : INTEGER :: data_type, i_rep, k, m, n, N_loop, &
1404 : n_rep, test_type
1405 32 : INTEGER, DIMENSION(:), POINTER :: bs_k, bs_m, bs_n, nproc
1406 : LOGICAL :: always_checksum, retain_sparsity, &
1407 : transa_p, transb_p
1408 : REAL(KIND=dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1409 :
1410 : ! ---------------------------------------------------------------------------
1411 :
1412 32 : NULLIFY (bs_m, bs_n, bs_k)
1413 32 : CALL section_vals_get(input_section, n_repetition=n_rep)
1414 32 : CALL dbcsr_reset_randmat_seed()
1415 78 : DO i_rep = 1, n_rep
1416 : ! how often should we do the multiply
1417 46 : CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
1418 :
1419 : ! matrices def.
1420 46 : CALL section_vals_val_get(input_section, "TEST_TYPE", i_rep_section=i_rep, i_val=test_type)
1421 46 : CALL section_vals_val_get(input_section, "DATA_TYPE", i_rep_section=i_rep, i_val=data_type)
1422 46 : CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
1423 46 : CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
1424 46 : CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
1425 46 : CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1426 46 : CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1427 : CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, &
1428 46 : i_vals=bs_m)
1429 : CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, &
1430 46 : i_vals=bs_n)
1431 : CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, &
1432 46 : i_vals=bs_k)
1433 46 : CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1434 46 : CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
1435 46 : CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
1436 46 : CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
1437 46 : CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
1438 46 : CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
1439 : CALL section_vals_val_get(input_section, "nproc", i_rep_section=i_rep, &
1440 46 : i_vals=nproc)
1441 : CALL section_vals_val_get(input_section, "atype", i_rep_section=i_rep, &
1442 46 : c_val=types(1))
1443 : CALL section_vals_val_get(input_section, "btype", i_rep_section=i_rep, &
1444 46 : c_val=types(2))
1445 : CALL section_vals_val_get(input_section, "ctype", i_rep_section=i_rep, &
1446 46 : c_val=types(3))
1447 : CALL section_vals_val_get(input_section, "filter_eps", &
1448 46 : i_rep_section=i_rep, r_val=filter_eps)
1449 46 : CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1450 :
1451 : CALL dbcsr_run_tests(para_env%get_handle(), iw, nproc, &
1452 : (/m, n, k/), &
1453 : (/transa_p, transb_p/), &
1454 : bs_m, bs_n, bs_k, &
1455 : (/s_a, s_b, s_c/), &
1456 : alpha, beta, &
1457 : data_type=data_type, &
1458 : test_type=test_type, &
1459 : n_loops=n_loop, eps=filter_eps, retain_sparsity=retain_sparsity, &
1460 538 : always_checksum=always_checksum)
1461 : END DO
1462 32 : END SUBROUTINE cp_dbcsr_tests
1463 :
1464 : ! **************************************************************************************************
1465 : !> \brief Tests the DBM library.
1466 : !> \param para_env ...
1467 : !> \param iw ...
1468 : !> \param input_section ...
1469 : ! **************************************************************************************************
1470 14 : SUBROUTINE run_dbm_tests(para_env, iw, input_section)
1471 :
1472 : TYPE(mp_para_env_type), POINTER :: para_env
1473 : INTEGER :: iw
1474 : TYPE(section_vals_type), POINTER :: input_section
1475 :
1476 : INTEGER :: i_rep, k, m, n, N_loop, n_rep
1477 14 : INTEGER, DIMENSION(:), POINTER :: bs_k, bs_m, bs_n
1478 : LOGICAL :: always_checksum, retain_sparsity, &
1479 : transa_p, transb_p
1480 : REAL(KIND=dp) :: alpha, beta, filter_eps, s_a, s_b, s_c
1481 :
1482 : ! ---------------------------------------------------------------------------
1483 :
1484 14 : NULLIFY (bs_m, bs_n, bs_k)
1485 14 : CALL section_vals_get(input_section, n_repetition=n_rep)
1486 14 : CALL dbcsr_reset_randmat_seed()
1487 28 : DO i_rep = 1, n_rep
1488 14 : CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
1489 14 : CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
1490 14 : CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
1491 14 : CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
1492 14 : CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
1493 14 : CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
1494 14 : CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, i_vals=bs_m)
1495 14 : CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, i_vals=bs_n)
1496 14 : CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, i_vals=bs_k)
1497 14 : CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
1498 14 : CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
1499 14 : CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
1500 14 : CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
1501 14 : CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
1502 14 : CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
1503 14 : CALL section_vals_val_get(input_section, "filter_eps", i_rep_section=i_rep, r_val=filter_eps)
1504 14 : CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
1505 :
1506 : CALL dbm_run_tests(mp_group=para_env, &
1507 : io_unit=iw, &
1508 : matrix_sizes=(/m, n, k/), &
1509 : trs=(/transa_p, transb_p/), &
1510 : bs_m=bs_m, &
1511 : bs_n=bs_n, &
1512 : bs_k=bs_k, &
1513 : sparsities=(/s_a, s_b, s_c/), &
1514 : alpha=alpha, &
1515 : beta=beta, &
1516 : n_loops=n_loop, &
1517 : eps=filter_eps, &
1518 : retain_sparsity=retain_sparsity, &
1519 154 : always_checksum=always_checksum)
1520 : END DO
1521 14 : END SUBROUTINE run_dbm_tests
1522 :
1523 8484 : END MODULE library_tests
|