Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2026 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <math.h>
10 : #include <stddef.h>
11 : #include <stdio.h>
12 : #include <stdlib.h>
13 : #include <string.h>
14 :
15 : #include "grid_task_list.h"
16 : #include "grid_task_list_internal.h"
17 :
18 : /*******************************************************************************
19 : * \brief Allocates a task list which can be passed to grid_collocate_task_list.
20 : * See grid_task_list.h for details.
21 : * \author Ole Schuett
22 : ******************************************************************************/
23 16158 : void grid_create_task_list(
24 : const bool orthorhombic, const int ntasks, const int nlevels,
25 : const int natoms, const int nkinds, const int nblocks,
26 : const int block_offsets[nblocks], const double atom_positions[natoms][3],
27 : const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
28 : const int level_list[ntasks], const int iatom_list[ntasks],
29 : const int jatom_list[ntasks], const int iset_list[ntasks],
30 : const int jset_list[ntasks], const int ipgf_list[ntasks],
31 : const int jpgf_list[ntasks], const int border_mask_list[ntasks],
32 : const int block_num_list[ntasks], const double radius_list[ntasks],
33 : const double rab_list[ntasks][3], const int npts_global[nlevels][3],
34 : const int npts_local[nlevels][3], const int shift_local[nlevels][3],
35 : const int border_width[nlevels][3], const double dh[nlevels][3][3],
36 : const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
37 :
38 16158 : const grid_library_config config = grid_library_get_config();
39 :
40 16158 : grid_task_list_internal *task_list = NULL;
41 :
42 16158 : if (*task_list_out == NULL) {
43 9802 : task_list = malloc(sizeof(grid_task_list_internal));
44 : // not a proper handling of errors. assert is a debug tool
45 9802 : assert(task_list != NULL);
46 9802 : memset(task_list, 0, sizeof(grid_task_list_internal));
47 9802 : *task_list_out = task_list;
48 :
49 : // Resolve AUTO to a concrete backend.
50 9802 : if (config.backend == GRID_BACKEND_AUTO) {
51 : #if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) && \
52 : !defined(__NO_OFFLOAD_GRID)
53 : task_list->backend = GRID_BACKEND_GPU;
54 : #else
55 9782 : task_list->backend = GRID_BACKEND_CPU;
56 : #endif
57 : } else {
58 20 : task_list->backend = config.backend;
59 : }
60 : } else {
61 : // Reuse existing task list.
62 : task_list = (grid_task_list_internal *)*task_list_out;
63 : }
64 :
65 16158 : if ((nblocks == 0) || (ntasks == 0) || (nlevels == 0)) {
66 245 : task_list->empty = true;
67 245 : return;
68 : } else {
69 15913 : task_list->empty = false;
70 : }
71 :
72 15913 : size_t size = (size_t)nlevels * 3 * sizeof(int);
73 :
74 15913 : if (task_list->nlevels < nlevels) {
75 9601 : free(task_list->npts_local);
76 9601 : task_list->npts_local = malloc(size);
77 : }
78 :
79 : // Store npts_local for bounds checking and validation.
80 15913 : task_list->nlevels = nlevels;
81 15913 : assert(task_list->npts_local != NULL);
82 15913 : memcpy(task_list->npts_local, npts_local, size);
83 :
84 : // Always create reference backend because it might be needed for validation.
85 15913 : grid_ref_create_task_list(
86 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
87 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
88 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
89 : block_num_list, radius_list, rab_list, npts_global, npts_local,
90 : shift_local, border_width, dh, dh_inv, &task_list->ref);
91 :
92 : // Create other backend, if selected.
93 15913 : switch (task_list->backend) {
94 : case GRID_BACKEND_REF:
95 : break; // was already created above
96 15891 : case GRID_BACKEND_CPU:
97 15891 : grid_cpu_create_task_list(
98 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
99 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
100 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
101 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
102 : npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
103 15891 : break;
104 19 : case GRID_BACKEND_DGEMM:
105 19 : grid_dgemm_create_task_list(
106 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
107 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
108 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
109 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
110 : npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
111 19 : break;
112 :
113 0 : case GRID_BACKEND_GPU:
114 : #if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) && \
115 : !defined(__NO_OFFLOAD_GRID)
116 : grid_gpu_create_task_list(
117 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
118 : &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
119 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
120 : border_mask_list, block_num_list, radius_list, &rab_list[0][0],
121 : &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
122 : &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->gpu);
123 : #else
124 0 : fprintf(stderr, "Error: The GPU grid backend is not available. "
125 : "Please re-compile with -D__OFFLOAD");
126 0 : abort();
127 : #endif
128 0 : break;
129 :
130 0 : default:
131 0 : printf("Error: Unknown grid backend: %i.\n", config.backend);
132 0 : abort();
133 15913 : break;
134 : }
135 : }
136 :
137 : /*******************************************************************************
138 : * \brief Deallocates given task list, basis_sets have to be freed separately.
139 : * \author Ole Schuett
140 : ******************************************************************************/
141 9802 : void grid_free_task_list(grid_task_list *ptr) {
142 9802 : if (ptr == NULL)
143 : return;
144 :
145 9802 : grid_task_list_internal *task_list = (grid_task_list_internal *)ptr;
146 :
147 9802 : if (task_list->ref != NULL) {
148 9601 : grid_ref_free_task_list(task_list->ref);
149 9601 : task_list->ref = NULL;
150 : }
151 9802 : if (task_list->cpu != NULL) {
152 9591 : grid_cpu_free_task_list(task_list->cpu);
153 9591 : task_list->cpu = NULL;
154 : }
155 9802 : if (task_list->dgemm != NULL) {
156 7 : grid_dgemm_free_task_list(task_list->dgemm);
157 7 : task_list->dgemm = NULL;
158 : }
159 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
160 : if (task_list->gpu != NULL) {
161 : grid_gpu_free_task_list(task_list->gpu);
162 : task_list->gpu = NULL;
163 : }
164 : #endif
165 :
166 9802 : free(task_list->npts_local);
167 9802 : free(task_list);
168 : }
169 :
170 : /*******************************************************************************
171 : * \brief Collocate all tasks of in given list onto given grids.
172 : * See grid_task_list.h for details.
173 : * \author Ole Schuett
174 : ******************************************************************************/
175 234996 : void grid_collocate_task_list(const grid_task_list *ptr,
176 : const enum grid_func func, const int nlevels,
177 : const int npts_local[nlevels][3],
178 : const offload_buffer *pab_blocks,
179 : offload_buffer *grids[nlevels]) {
180 234996 : if (ptr == NULL)
181 : return;
182 :
183 234996 : grid_task_list_internal *task_list = (grid_task_list_internal *)ptr;
184 :
185 234996 : if (task_list->empty) {
186 13277 : for (int level = 0; level < nlevels; level++)
187 10626 : memset(grids[level]->host_buffer, 0, grids[level]->size);
188 : return;
189 : }
190 :
191 : // Bounds check.
192 232345 : assert(task_list->nlevels == nlevels);
193 1150393 : for (int ilevel = 0; ilevel < nlevels; ilevel++) {
194 918048 : assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
195 918048 : assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
196 918048 : assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
197 : }
198 :
199 232345 : switch (task_list->backend) {
200 21 : case GRID_BACKEND_REF:
201 21 : grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
202 : grids);
203 21 : break;
204 232171 : case GRID_BACKEND_CPU:
205 232171 : grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
206 : grids);
207 232171 : break;
208 153 : case GRID_BACKEND_DGEMM:
209 153 : grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
210 : grids);
211 153 : break;
212 : #if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) && \
213 : !defined(__NO_OFFLOAD_GRID)
214 : case GRID_BACKEND_GPU:
215 : grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
216 : grids);
217 : break;
218 : #endif
219 0 : default:
220 0 : printf("Error: Unknown grid backend: %i.\n", task_list->backend);
221 0 : abort();
222 232345 : break;
223 : }
224 :
225 : // Perform validation if enabled.
226 232345 : if (grid_library_get_config().validate) {
227 : // Allocate space for reference results.
228 21 : offload_buffer *grids_ref[nlevels];
229 105 : for (int level = 0; level < nlevels; level++) {
230 84 : const int npts_local_total =
231 84 : npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
232 84 : grids_ref[level] = NULL;
233 84 : offload_create_buffer(npts_local_total, &grids_ref[level]);
234 : }
235 :
236 : // Call reference implementation.
237 21 : grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
238 : grids_ref);
239 :
240 : // Compare results.
241 21 : const double tolerance = 1e-12;
242 21 : double max_rel_diff = 0.0;
243 105 : for (int level = 0; level < nlevels; level++) {
244 1491 : for (int i = 0; i < npts_local[level][0]; i++) {
245 43692 : for (int j = 0; j < npts_local[level][1]; j++) {
246 1914354 : for (int k = 0; k < npts_local[level][2]; k++) {
247 1872069 : const int idx = k * npts_local[level][1] * npts_local[level][0] +
248 1872069 : j * npts_local[level][0] + i;
249 1872069 : const double ref_value = grids_ref[level]->host_buffer[idx];
250 1872069 : const double test_value = grids[level]->host_buffer[idx];
251 1872069 : const double diff = fabs(test_value - ref_value);
252 1872069 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
253 1872069 : max_rel_diff = fmax(max_rel_diff, rel_diff);
254 1872069 : if (rel_diff > tolerance) {
255 0 : fprintf(stderr, "Error: Validation failure in grid collocate\n");
256 0 : fprintf(stderr, " diff: %le\n", diff);
257 0 : fprintf(stderr, " rel_diff: %le\n", rel_diff);
258 0 : fprintf(stderr, " value: %le\n", ref_value);
259 0 : fprintf(stderr, " level: %i\n", level);
260 0 : fprintf(stderr, " ijk: %i %i %i\n", i, j, k);
261 0 : abort();
262 : }
263 : }
264 : }
265 : }
266 84 : offload_free_buffer(grids_ref[level]);
267 84 : printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
268 : }
269 : }
270 : }
271 :
272 : /*******************************************************************************
273 : * \brief Integrate all tasks of in given list from given grids.
274 : * See grid_task_list.h for details.
275 : * \author Ole Schuett
276 : ******************************************************************************/
277 214244 : void grid_integrate_task_list(const grid_task_list *ptr, const bool compute_tau,
278 : const int natoms, const int nlevels,
279 : const int npts_local[nlevels][3],
280 : const offload_buffer *pab_blocks,
281 : const offload_buffer *grids[nlevels],
282 : offload_buffer *hab_blocks,
283 : double forces[natoms][3], double virial[3][3]) {
284 :
285 214244 : if (ptr == NULL)
286 : return;
287 :
288 214244 : grid_task_list_internal *task_list = (grid_task_list_internal *)ptr;
289 :
290 214244 : if (task_list->empty) {
291 2077 : memset(hab_blocks->host_buffer, 0, hab_blocks->size);
292 2077 : if (virial) {
293 24 : virial[0][0] = 0.0;
294 24 : virial[0][1] = 0.0;
295 24 : virial[0][2] = 0.0;
296 24 : virial[1][0] = 0.0;
297 24 : virial[1][1] = 0.0;
298 24 : virial[1][2] = 0.0;
299 24 : virial[2][0] = 0.0;
300 24 : virial[2][1] = 0.0;
301 24 : virial[2][2] = 0.0;
302 : }
303 2077 : if (forces) {
304 312 : for (int atom = 0; atom < natoms; atom++) {
305 198 : forces[atom][0] = 0.0;
306 198 : forces[atom][1] = 0.0;
307 198 : forces[atom][2] = 0.0;
308 : }
309 : }
310 2077 : return;
311 : }
312 :
313 : // Bounds check.
314 212167 : assert(task_list->nlevels == nlevels);
315 1051603 : for (int ilevel = 0; ilevel < nlevels; ilevel++) {
316 839436 : assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
317 839436 : assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
318 839436 : assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
319 : }
320 :
321 212167 : assert(forces == NULL || pab_blocks != NULL);
322 212167 : assert(virial == NULL || pab_blocks != NULL);
323 :
324 212167 : switch (task_list->backend) {
325 : #if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) && \
326 : !defined(__NO_OFFLOAD_GRID)
327 : case GRID_BACKEND_GPU:
328 : grid_gpu_integrate_task_list(task_list->gpu, compute_tau, nlevels,
329 : pab_blocks, grids, hab_blocks, &forces[0][0],
330 : &virial[0][0]);
331 : break;
332 : #endif
333 149 : case GRID_BACKEND_DGEMM:
334 149 : grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
335 : nlevels, pab_blocks, grids, hab_blocks,
336 : forces, virial);
337 149 : break;
338 212001 : case GRID_BACKEND_CPU:
339 212001 : grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
340 : pab_blocks, grids, hab_blocks, forces, virial);
341 212001 : break;
342 17 : case GRID_BACKEND_REF:
343 17 : grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
344 : pab_blocks, grids, hab_blocks, forces, virial);
345 17 : break;
346 0 : default:
347 0 : printf("Error: Unknown grid backend: %i.\n", task_list->backend);
348 0 : abort();
349 212167 : break;
350 : }
351 :
352 : // Perform validation if enabled.
353 212167 : if (grid_library_get_config().validate) {
354 : // Allocate space for reference results.
355 17 : const int hab_length = hab_blocks->size / sizeof(double);
356 17 : offload_buffer *hab_blocks_ref = NULL;
357 17 : offload_create_buffer(hab_length, &hab_blocks_ref);
358 17 : double forces_ref[natoms][3], virial_ref[3][3];
359 :
360 : // Call reference implementation.
361 34 : grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
362 : pab_blocks, grids, hab_blocks_ref,
363 : (forces != NULL) ? forces_ref : NULL,
364 : (virial != NULL) ? virial_ref : NULL);
365 :
366 : // Compare hab.
367 17 : const double hab_tolerance = 1e-12;
368 17 : double hab_max_rel_diff = 0.0;
369 2178 : for (int i = 0; i < hab_length; i++) {
370 2161 : const double ref_value = hab_blocks_ref->host_buffer[i];
371 2161 : const double test_value = hab_blocks->host_buffer[i];
372 2161 : const double diff = fabs(test_value - ref_value);
373 2161 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
374 2161 : hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
375 2161 : if (rel_diff > hab_tolerance) {
376 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
377 0 : fprintf(stderr, " hab diff: %le\n", diff);
378 0 : fprintf(stderr, " hab rel_diff: %le\n", rel_diff);
379 0 : fprintf(stderr, " hab value: %le\n", ref_value);
380 0 : fprintf(stderr, " hab i: %i\n", i);
381 0 : abort();
382 : }
383 : }
384 :
385 : // Compare forces.
386 17 : const double forces_tolerance = 1e-8; // account for higher numeric noise
387 17 : double forces_max_rel_diff = 0.0;
388 17 : if (forces != NULL) {
389 12 : for (int iatom = 0; iatom < natoms; iatom++) {
390 36 : for (int idir = 0; idir < 3; idir++) {
391 27 : const double ref_value = forces_ref[iatom][idir];
392 27 : const double test_value = forces[iatom][idir];
393 27 : const double diff = fabs(test_value - ref_value);
394 27 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
395 27 : forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
396 27 : if (rel_diff > forces_tolerance) {
397 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
398 0 : fprintf(stderr, " forces diff: %le\n", diff);
399 0 : fprintf(stderr, " forces rel_diff: %le\n", rel_diff);
400 0 : fprintf(stderr, " forces value: %le\n", ref_value);
401 0 : fprintf(stderr, " forces atom: %i\n", iatom);
402 0 : fprintf(stderr, " forces dir: %i\n", idir);
403 0 : abort();
404 : }
405 : }
406 : }
407 : }
408 :
409 : // Compare virial.
410 17 : const double virial_tolerance = 1e-8; // account for higher numeric noise
411 17 : double virial_max_rel_diff = 0.0;
412 17 : if (virial != NULL) {
413 0 : for (int i = 0; i < 3; i++) {
414 0 : for (int j = 0; j < 3; j++) {
415 0 : const double ref_value = virial_ref[i][j];
416 0 : const double test_value = virial[i][j];
417 0 : const double diff = fabs(test_value - ref_value);
418 0 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
419 0 : virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
420 0 : if (rel_diff > virial_tolerance) {
421 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
422 0 : fprintf(stderr, " virial diff: %le\n", diff);
423 0 : fprintf(stderr, " virial rel_diff: %le\n", rel_diff);
424 0 : fprintf(stderr, " virial value: %le\n", ref_value);
425 0 : fprintf(stderr, " virial ij: %i %i\n", i, j);
426 0 : abort();
427 : }
428 : }
429 : }
430 : }
431 :
432 17 : printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
433 : hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
434 17 : offload_free_buffer(hab_blocks_ref);
435 : }
436 : }
437 :
438 : // EOF
|