Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <math.h>
10 : #include <stddef.h>
11 : #include <stdio.h>
12 : #include <stdlib.h>
13 : #include <string.h>
14 :
15 : #include "common/grid_common.h"
16 : #include "common/grid_constants.h"
17 : #include "common/grid_library.h"
18 : #include "grid_task_list.h"
19 :
20 : /*******************************************************************************
21 : * \brief Allocates a task list which can be passed to grid_collocate_task_list.
22 : * See grid_task_list.h for details.
23 : * \author Ole Schuett
24 : ******************************************************************************/
25 13262 : void grid_create_task_list(
26 : const bool orthorhombic, const int ntasks, const int nlevels,
27 : const int natoms, const int nkinds, const int nblocks,
28 : const int block_offsets[nblocks], const double atom_positions[natoms][3],
29 : const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
30 : const int level_list[ntasks], const int iatom_list[ntasks],
31 : const int jatom_list[ntasks], const int iset_list[ntasks],
32 : const int jset_list[ntasks], const int ipgf_list[ntasks],
33 : const int jpgf_list[ntasks], const int border_mask_list[ntasks],
34 : const int block_num_list[ntasks], const double radius_list[ntasks],
35 : const double rab_list[ntasks][3], const int npts_global[nlevels][3],
36 : const int npts_local[nlevels][3], const int shift_local[nlevels][3],
37 : const int border_width[nlevels][3], const double dh[nlevels][3][3],
38 : const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
39 :
40 13262 : const grid_library_config config = grid_library_get_config();
41 :
42 13262 : grid_task_list *task_list = NULL;
43 :
44 13262 : if (*task_list_out == NULL) {
45 7916 : task_list = malloc(sizeof(grid_task_list));
46 7916 : memset(task_list, 0, sizeof(grid_task_list));
47 :
48 : // Resolve AUTO to a concrete backend.
49 7916 : if (config.backend == GRID_BACKEND_AUTO) {
50 :
51 : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
52 : task_list->backend = GRID_BACKEND_HIP;
53 : #elif defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
54 : task_list->backend = GRID_BACKEND_GPU;
55 : #else
56 7896 : task_list->backend = GRID_BACKEND_CPU;
57 : #endif
58 : } else {
59 20 : task_list->backend = config.backend;
60 : }
61 : } else {
62 : // Reuse existing task list.
63 5346 : task_list = *task_list_out;
64 5346 : free(task_list->npts_local);
65 : }
66 :
67 : // Store npts_local for bounds checking and validation.
68 13262 : task_list->nlevels = nlevels;
69 13262 : size_t size = nlevels * 3 * sizeof(int);
70 13262 : task_list->npts_local = malloc(size);
71 13262 : memcpy(task_list->npts_local, npts_local, size);
72 :
73 : // Always create reference backend because it might be needed for validation.
74 13262 : grid_ref_create_task_list(
75 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
76 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
77 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
78 : block_num_list, radius_list, rab_list, npts_global, npts_local,
79 : shift_local, border_width, dh, dh_inv, &task_list->ref);
80 :
81 : // Create other backend, if selected.
82 13262 : switch (task_list->backend) {
83 : case GRID_BACKEND_REF:
84 : break; // was already created above
85 13238 : case GRID_BACKEND_CPU:
86 13238 : grid_cpu_create_task_list(
87 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
88 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
89 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
90 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
91 : npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
92 13238 : break;
93 20 : case GRID_BACKEND_DGEMM:
94 20 : grid_dgemm_create_task_list(
95 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
96 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
97 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
98 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
99 : npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
100 20 : break;
101 :
102 0 : case GRID_BACKEND_GPU:
103 : #if (defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID))
104 : grid_gpu_create_task_list(
105 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
106 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
107 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
108 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
109 : npts_local, shift_local, border_width, dh, dh_inv, &task_list->gpu);
110 : #else
111 0 : fprintf(stderr,
112 : "Error: The GPU grid backend is not available. "
113 : "Please re-compile with -D__OFFLOAD_CUDA or -D__OFFLOAD_HIP");
114 0 : abort();
115 : #endif
116 0 : break;
117 :
118 0 : case GRID_BACKEND_HIP:
119 : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
120 : grid_hip_create_task_list(
121 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
122 : &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
123 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
124 : border_mask_list, block_num_list, radius_list, &rab_list[0][0],
125 : &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
126 : &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->hip);
127 : #else
128 0 : fprintf(stderr, "Error: The HIP grid backend is not available. "
129 : "Please re-compile with -D__OFFLOAD_HIP");
130 0 : abort();
131 : #endif
132 0 : break;
133 :
134 0 : default:
135 0 : printf("Error: Unknown grid backend: %i.\n", config.backend);
136 0 : abort();
137 13262 : break;
138 : }
139 :
140 13262 : *task_list_out = task_list;
141 13262 : }
142 :
143 : /*******************************************************************************
144 : * \brief Deallocates given task list, basis_sets have to be freed separately.
145 : * \author Ole Schuett
146 : ******************************************************************************/
147 7916 : void grid_free_task_list(grid_task_list *task_list) {
148 :
149 7916 : if (task_list->ref != NULL) {
150 7916 : grid_ref_free_task_list(task_list->ref);
151 7916 : task_list->ref = NULL;
152 : }
153 7916 : if (task_list->cpu != NULL) {
154 7904 : grid_cpu_free_task_list(task_list->cpu);
155 7904 : task_list->cpu = NULL;
156 : }
157 7916 : if (task_list->dgemm != NULL) {
158 8 : grid_dgemm_free_task_list(task_list->dgemm);
159 8 : task_list->dgemm = NULL;
160 : }
161 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
162 : if (task_list->gpu != NULL) {
163 : grid_gpu_free_task_list(task_list->gpu);
164 : task_list->gpu = NULL;
165 : }
166 : #endif
167 : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
168 : if (task_list->hip != NULL) {
169 : grid_hip_free_task_list(task_list->hip);
170 : task_list->hip = NULL;
171 : }
172 : #endif
173 :
174 7916 : free(task_list->npts_local);
175 7916 : free(task_list);
176 7916 : }
177 :
178 : /*******************************************************************************
179 : * \brief Collocate all tasks of in given list onto given grids.
180 : * See grid_task_list.h for details.
181 : * \author Ole Schuett
182 : ******************************************************************************/
183 191371 : void grid_collocate_task_list(const grid_task_list *task_list,
184 : const enum grid_func func, const int nlevels,
185 : const int npts_local[nlevels][3],
186 : const offload_buffer *pab_blocks,
187 : offload_buffer *grids[nlevels]) {
188 :
189 : // Bounds check.
190 191371 : assert(task_list->nlevels == nlevels);
191 951429 : for (int ilevel = 0; ilevel < nlevels; ilevel++) {
192 760058 : assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
193 760058 : assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
194 760058 : assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
195 : }
196 :
197 191371 : switch (task_list->backend) {
198 26 : case GRID_BACKEND_REF:
199 26 : grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
200 : grids);
201 26 : break;
202 191187 : case GRID_BACKEND_CPU:
203 191187 : grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
204 : grids);
205 191187 : break;
206 158 : case GRID_BACKEND_DGEMM:
207 158 : grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
208 : grids);
209 158 : break;
210 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
211 : case GRID_BACKEND_GPU:
212 : grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
213 : grids);
214 : break;
215 : #endif
216 : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
217 : case GRID_BACKEND_HIP:
218 : grid_hip_collocate_task_list(task_list->hip, func, nlevels, pab_blocks,
219 : grids);
220 : break;
221 : #endif
222 0 : default:
223 0 : printf("Error: Unknown grid backend: %i.\n", task_list->backend);
224 0 : abort();
225 191371 : break;
226 : }
227 :
228 : // Perform validation if enabled.
229 191371 : if (grid_library_get_config().validate) {
230 : // Allocate space for reference results.
231 26 : offload_buffer *grids_ref[nlevels];
232 130 : for (int level = 0; level < nlevels; level++) {
233 104 : const int npts_local_total =
234 104 : npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
235 104 : grids_ref[level] = NULL;
236 104 : offload_create_buffer(npts_local_total, &grids_ref[level]);
237 : }
238 :
239 : // Call reference implementation.
240 26 : grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
241 : grids_ref);
242 :
243 : // Compare results.
244 26 : const double tolerance = 1e-12;
245 26 : double max_rel_diff = 0.0;
246 130 : for (int level = 0; level < nlevels; level++) {
247 2374 : for (int i = 0; i < npts_local[level][0]; i++) {
248 81112 : for (int j = 0; j < npts_local[level][1]; j++) {
249 3619188 : for (int k = 0; k < npts_local[level][2]; k++) {
250 3540346 : const int idx = k * npts_local[level][1] * npts_local[level][0] +
251 3540346 : j * npts_local[level][0] + i;
252 3540346 : const double ref_value = grids_ref[level]->host_buffer[idx];
253 3540346 : const double test_value = grids[level]->host_buffer[idx];
254 3540346 : const double diff = fabs(test_value - ref_value);
255 3540346 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
256 3540346 : max_rel_diff = fmax(max_rel_diff, rel_diff);
257 3540346 : if (rel_diff > tolerance) {
258 0 : fprintf(stderr, "Error: Validation failure in grid collocate\n");
259 0 : fprintf(stderr, " diff: %le\n", diff);
260 0 : fprintf(stderr, " rel_diff: %le\n", rel_diff);
261 0 : fprintf(stderr, " value: %le\n", ref_value);
262 0 : fprintf(stderr, " level: %i\n", level);
263 0 : fprintf(stderr, " ijk: %i %i %i\n", i, j, k);
264 0 : abort();
265 : }
266 : }
267 : }
268 : }
269 104 : offload_free_buffer(grids_ref[level]);
270 104 : printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
271 : }
272 : }
273 191371 : }
274 :
275 : /*******************************************************************************
276 : * \brief Integrate all tasks of in given list from given grids.
277 : * See grid_task_list.h for details.
278 : * \author Ole Schuett
279 : ******************************************************************************/
280 178825 : void grid_integrate_task_list(
281 : const grid_task_list *task_list, const bool compute_tau, const int natoms,
282 : const int nlevels, const int npts_local[nlevels][3],
283 : const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels],
284 : offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3]) {
285 :
286 : // Bounds check.
287 178825 : assert(task_list->nlevels == nlevels);
288 888331 : for (int ilevel = 0; ilevel < nlevels; ilevel++) {
289 709506 : assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
290 709506 : assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
291 709506 : assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
292 : }
293 :
294 178825 : assert(forces == NULL || pab_blocks != NULL);
295 178825 : assert(virial == NULL || pab_blocks != NULL);
296 :
297 178825 : switch (task_list->backend) {
298 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
299 : case GRID_BACKEND_GPU:
300 : grid_gpu_integrate_task_list(task_list->gpu, compute_tau, natoms, nlevels,
301 : pab_blocks, grids, hab_blocks, forces, virial);
302 : break;
303 : #endif
304 : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
305 : case GRID_BACKEND_HIP:
306 : grid_hip_integrate_task_list(task_list->hip, compute_tau, nlevels,
307 : pab_blocks, grids, hab_blocks, &forces[0][0],
308 : &virial[0][0]);
309 : break;
310 : #endif
311 156 : case GRID_BACKEND_DGEMM:
312 156 : grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
313 : nlevels, pab_blocks, grids, hab_blocks,
314 : forces, virial);
315 156 : break;
316 178645 : case GRID_BACKEND_CPU:
317 178645 : grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
318 : pab_blocks, grids, hab_blocks, forces, virial);
319 178645 : break;
320 24 : case GRID_BACKEND_REF:
321 24 : grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
322 : pab_blocks, grids, hab_blocks, forces, virial);
323 24 : break;
324 0 : default:
325 0 : printf("Error: Unknown grid backend: %i.\n", task_list->backend);
326 0 : abort();
327 178825 : break;
328 : }
329 :
330 : // Perform validation if enabled.
331 178825 : if (grid_library_get_config().validate) {
332 : // Allocate space for reference results.
333 24 : const int hab_length = hab_blocks->size / sizeof(double);
334 24 : offload_buffer *hab_blocks_ref = NULL;
335 24 : offload_create_buffer(hab_length, &hab_blocks_ref);
336 24 : double forces_ref[natoms][3], virial_ref[3][3];
337 :
338 : // Call reference implementation.
339 48 : grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
340 : pab_blocks, grids, hab_blocks_ref,
341 : (forces != NULL) ? forces_ref : NULL,
342 : (virial != NULL) ? virial_ref : NULL);
343 :
344 : // Compare hab.
345 24 : const double hab_tolerance = 1e-12;
346 24 : double hab_max_rel_diff = 0.0;
347 2025 : for (int i = 0; i < hab_length; i++) {
348 2001 : const double ref_value = hab_blocks_ref->host_buffer[i];
349 2001 : const double test_value = hab_blocks->host_buffer[i];
350 2001 : const double diff = fabs(test_value - ref_value);
351 2001 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
352 2001 : hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
353 2001 : if (rel_diff > hab_tolerance) {
354 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
355 0 : fprintf(stderr, " hab diff: %le\n", diff);
356 0 : fprintf(stderr, " hab rel_diff: %le\n", rel_diff);
357 0 : fprintf(stderr, " hab value: %le\n", ref_value);
358 0 : fprintf(stderr, " hab i: %i\n", i);
359 0 : abort();
360 : }
361 : }
362 :
363 : // Compare forces.
364 24 : const double forces_tolerance = 1e-8; // account for higher numeric noise
365 24 : double forces_max_rel_diff = 0.0;
366 24 : if (forces != NULL) {
367 14 : for (int iatom = 0; iatom < natoms; iatom++) {
368 40 : for (int idir = 0; idir < 3; idir++) {
369 30 : const double ref_value = forces_ref[iatom][idir];
370 30 : const double test_value = forces[iatom][idir];
371 30 : const double diff = fabs(test_value - ref_value);
372 30 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
373 30 : forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
374 30 : if (rel_diff > forces_tolerance) {
375 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
376 0 : fprintf(stderr, " forces diff: %le\n", diff);
377 0 : fprintf(stderr, " forces rel_diff: %le\n", rel_diff);
378 0 : fprintf(stderr, " forces value: %le\n", ref_value);
379 0 : fprintf(stderr, " forces atom: %i\n", iatom);
380 0 : fprintf(stderr, " forces dir: %i\n", idir);
381 0 : abort();
382 : }
383 : }
384 : }
385 : }
386 :
387 : // Compare virial.
388 24 : const double virial_tolerance = 1e-8; // account for higher numeric noise
389 24 : double virial_max_rel_diff = 0.0;
390 24 : if (virial != NULL) {
391 0 : for (int i = 0; i < 3; i++) {
392 0 : for (int j = 0; j < 3; j++) {
393 0 : const double ref_value = virial_ref[i][j];
394 0 : const double test_value = virial[i][j];
395 0 : const double diff = fabs(test_value - ref_value);
396 0 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
397 0 : virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
398 0 : if (rel_diff > virial_tolerance) {
399 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
400 0 : fprintf(stderr, " virial diff: %le\n", diff);
401 0 : fprintf(stderr, " virial rel_diff: %le\n", rel_diff);
402 0 : fprintf(stderr, " virial value: %le\n", ref_value);
403 0 : fprintf(stderr, " virial ij: %i %i\n", i, j);
404 0 : abort();
405 : }
406 : }
407 : }
408 : }
409 :
410 24 : printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
411 : hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
412 24 : offload_free_buffer(hab_blocks_ref);
413 : }
414 178825 : }
415 :
416 : // EOF
|