Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2026 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <math.h>
10 : #include <stddef.h>
11 : #include <stdio.h>
12 : #include <stdlib.h>
13 : #include <string.h>
14 :
15 : #include "common/grid_common.h"
16 : #include "common/grid_constants.h"
17 : #include "common/grid_library.h"
18 : #include "grid_task_list.h"
19 :
20 : /*******************************************************************************
21 : * \brief Allocates a task list which can be passed to grid_collocate_task_list.
22 : * See grid_task_list.h for details.
23 : * \author Ole Schuett
24 : ******************************************************************************/
25 15802 : void grid_create_task_list(
26 : const bool orthorhombic, const int ntasks, const int nlevels,
27 : const int natoms, const int nkinds, const int nblocks,
28 : const int block_offsets[nblocks], const double atom_positions[natoms][3],
29 : const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
30 : const int level_list[ntasks], const int iatom_list[ntasks],
31 : const int jatom_list[ntasks], const int iset_list[ntasks],
32 : const int jset_list[ntasks], const int ipgf_list[ntasks],
33 : const int jpgf_list[ntasks], const int border_mask_list[ntasks],
34 : const int block_num_list[ntasks], const double radius_list[ntasks],
35 : const double rab_list[ntasks][3], const int npts_global[nlevels][3],
36 : const int npts_local[nlevels][3], const int shift_local[nlevels][3],
37 : const int border_width[nlevels][3], const double dh[nlevels][3][3],
38 : const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
39 :
40 15802 : const grid_library_config config = grid_library_get_config();
41 :
42 15802 : grid_task_list *task_list = NULL;
43 :
44 15802 : if (*task_list_out == NULL) {
45 9584 : task_list = malloc(sizeof(grid_task_list));
46 9584 : assert(task_list != NULL);
47 9584 : memset(task_list, 0, sizeof(grid_task_list));
48 :
49 : // Resolve AUTO to a concrete backend.
50 9584 : if (config.backend == GRID_BACKEND_AUTO) {
51 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
52 : task_list->backend = GRID_BACKEND_GPU;
53 : #else
54 9564 : task_list->backend = GRID_BACKEND_CPU;
55 : #endif
56 : } else {
57 20 : task_list->backend = config.backend;
58 : }
59 : } else {
60 : // Reuse existing task list.
61 6218 : task_list = *task_list_out;
62 6218 : free(task_list->npts_local);
63 : }
64 :
65 : // Store npts_local for bounds checking and validation.
66 15802 : task_list->nlevels = nlevels;
67 15802 : size_t size = nlevels * 3 * sizeof(int);
68 15802 : task_list->npts_local = malloc(size);
69 15802 : assert(task_list->npts_local != NULL);
70 15802 : memcpy(task_list->npts_local, npts_local, size);
71 :
72 : // Always create reference backend because it might be needed for validation.
73 15802 : grid_ref_create_task_list(
74 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
75 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
76 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
77 : block_num_list, radius_list, rab_list, npts_global, npts_local,
78 : shift_local, border_width, dh, dh_inv, &task_list->ref);
79 :
80 : // Create other backend, if selected.
81 15802 : switch (task_list->backend) {
82 : case GRID_BACKEND_REF:
83 : break; // was already created above
84 15778 : case GRID_BACKEND_CPU:
85 15778 : grid_cpu_create_task_list(
86 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
87 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
88 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
89 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
90 : npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
91 15778 : break;
92 20 : case GRID_BACKEND_DGEMM:
93 20 : grid_dgemm_create_task_list(
94 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
95 : atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
96 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
97 : border_mask_list, block_num_list, radius_list, rab_list, npts_global,
98 : npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
99 20 : break;
100 :
101 0 : case GRID_BACKEND_GPU:
102 : #if (defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID))
103 : grid_gpu_create_task_list(
104 : orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
105 : &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
106 : jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
107 : border_mask_list, block_num_list, radius_list, &rab_list[0][0],
108 : &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
109 : &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->gpu);
110 : #else
111 0 : fprintf(stderr, "Error: The GPU grid backend is not available. "
112 : "Please re-compile with -D__OFFLOAD");
113 0 : abort();
114 : #endif
115 0 : break;
116 :
117 0 : default:
118 0 : printf("Error: Unknown grid backend: %i.\n", config.backend);
119 0 : abort();
120 15802 : break;
121 : }
122 :
123 15802 : *task_list_out = task_list;
124 15802 : }
125 :
126 : /*******************************************************************************
127 : * \brief Deallocates given task list, basis_sets have to be freed separately.
128 : * \author Ole Schuett
129 : ******************************************************************************/
130 9584 : void grid_free_task_list(grid_task_list *task_list) {
131 :
132 9584 : if (task_list->ref != NULL) {
133 9584 : grid_ref_free_task_list(task_list->ref);
134 9584 : task_list->ref = NULL;
135 : }
136 9584 : if (task_list->cpu != NULL) {
137 9572 : grid_cpu_free_task_list(task_list->cpu);
138 9572 : task_list->cpu = NULL;
139 : }
140 9584 : if (task_list->dgemm != NULL) {
141 8 : grid_dgemm_free_task_list(task_list->dgemm);
142 8 : task_list->dgemm = NULL;
143 : }
144 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
145 : if (task_list->gpu != NULL) {
146 : grid_gpu_free_task_list(task_list->gpu);
147 : task_list->gpu = NULL;
148 : }
149 : #endif
150 :
151 9584 : free(task_list->npts_local);
152 9584 : free(task_list);
153 9584 : }
154 :
155 : /*******************************************************************************
156 : * \brief Collocate all tasks of in given list onto given grids.
157 : * See grid_task_list.h for details.
158 : * \author Ole Schuett
159 : ******************************************************************************/
160 230860 : void grid_collocate_task_list(const grid_task_list *task_list,
161 : const enum grid_func func, const int nlevels,
162 : const int npts_local[nlevels][3],
163 : const offload_buffer *pab_blocks,
164 : offload_buffer *grids[nlevels]) {
165 :
166 : // Bounds check.
167 230860 : assert(task_list->nlevels == nlevels);
168 1145540 : for (int ilevel = 0; ilevel < nlevels; ilevel++) {
169 914680 : assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
170 914680 : assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
171 914680 : assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
172 : }
173 :
174 230860 : switch (task_list->backend) {
175 30 : case GRID_BACKEND_REF:
176 30 : grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
177 : grids);
178 30 : break;
179 230668 : case GRID_BACKEND_CPU:
180 230668 : grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
181 : grids);
182 230668 : break;
183 162 : case GRID_BACKEND_DGEMM:
184 162 : grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
185 : grids);
186 162 : break;
187 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
188 : case GRID_BACKEND_GPU:
189 : grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
190 : grids);
191 : break;
192 : #endif
193 0 : default:
194 0 : printf("Error: Unknown grid backend: %i.\n", task_list->backend);
195 0 : abort();
196 230860 : break;
197 : }
198 :
199 : // Perform validation if enabled.
200 230860 : if (grid_library_get_config().validate) {
201 : // Allocate space for reference results.
202 30 : offload_buffer *grids_ref[nlevels];
203 150 : for (int level = 0; level < nlevels; level++) {
204 120 : const int npts_local_total =
205 120 : npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
206 120 : grids_ref[level] = NULL;
207 120 : offload_create_buffer(npts_local_total, &grids_ref[level]);
208 : }
209 :
210 : // Call reference implementation.
211 30 : grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
212 : grids_ref);
213 :
214 : // Compare results.
215 30 : const double tolerance = 1e-12;
216 30 : double max_rel_diff = 0.0;
217 150 : for (int level = 0; level < nlevels; level++) {
218 2526 : for (int i = 0; i < npts_local[level][0]; i++) {
219 82680 : for (int j = 0; j < npts_local[level][1]; j++) {
220 3671568 : for (int k = 0; k < npts_local[level][2]; k++) {
221 3591294 : const int idx = k * npts_local[level][1] * npts_local[level][0] +
222 3591294 : j * npts_local[level][0] + i;
223 3591294 : const double ref_value = grids_ref[level]->host_buffer[idx];
224 3591294 : const double test_value = grids[level]->host_buffer[idx];
225 3591294 : const double diff = fabs(test_value - ref_value);
226 3591294 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
227 3591294 : max_rel_diff = fmax(max_rel_diff, rel_diff);
228 3591294 : if (rel_diff > tolerance) {
229 0 : fprintf(stderr, "Error: Validation failure in grid collocate\n");
230 0 : fprintf(stderr, " diff: %le\n", diff);
231 0 : fprintf(stderr, " rel_diff: %le\n", rel_diff);
232 0 : fprintf(stderr, " value: %le\n", ref_value);
233 0 : fprintf(stderr, " level: %i\n", level);
234 0 : fprintf(stderr, " ijk: %i %i %i\n", i, j, k);
235 0 : abort();
236 : }
237 : }
238 : }
239 : }
240 120 : offload_free_buffer(grids_ref[level]);
241 120 : printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
242 : }
243 : }
244 230860 : }
245 :
246 : /*******************************************************************************
247 : * \brief Integrate all tasks of in given list from given grids.
248 : * See grid_task_list.h for details.
249 : * \author Ole Schuett
250 : ******************************************************************************/
251 210640 : void grid_integrate_task_list(
252 : const grid_task_list *task_list, const bool compute_tau, const int natoms,
253 : const int nlevels, const int npts_local[nlevels][3],
254 : const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels],
255 : offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3]) {
256 :
257 : // Bounds check.
258 210640 : assert(task_list->nlevels == nlevels);
259 1045316 : for (int ilevel = 0; ilevel < nlevels; ilevel++) {
260 834676 : assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
261 834676 : assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
262 834676 : assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
263 : }
264 :
265 210640 : assert(forces == NULL || pab_blocks != NULL);
266 210640 : assert(virial == NULL || pab_blocks != NULL);
267 :
268 210640 : switch (task_list->backend) {
269 : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
270 : case GRID_BACKEND_GPU:
271 : grid_gpu_integrate_task_list(task_list->gpu, compute_tau, nlevels,
272 : pab_blocks, grids, hab_blocks, &forces[0][0],
273 : &virial[0][0]);
274 : break;
275 : #endif
276 158 : case GRID_BACKEND_DGEMM:
277 158 : grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
278 : nlevels, pab_blocks, grids, hab_blocks,
279 : forces, virial);
280 158 : break;
281 210456 : case GRID_BACKEND_CPU:
282 210456 : grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
283 : pab_blocks, grids, hab_blocks, forces, virial);
284 210456 : break;
285 26 : case GRID_BACKEND_REF:
286 26 : grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
287 : pab_blocks, grids, hab_blocks, forces, virial);
288 26 : break;
289 0 : default:
290 0 : printf("Error: Unknown grid backend: %i.\n", task_list->backend);
291 0 : abort();
292 210640 : break;
293 : }
294 :
295 : // Perform validation if enabled.
296 210640 : if (grid_library_get_config().validate) {
297 : // Allocate space for reference results.
298 26 : const int hab_length = hab_blocks->size / sizeof(double);
299 26 : offload_buffer *hab_blocks_ref = NULL;
300 26 : offload_create_buffer(hab_length, &hab_blocks_ref);
301 26 : double forces_ref[natoms][3], virial_ref[3][3];
302 :
303 : // Call reference implementation.
304 52 : grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
305 : pab_blocks, grids, hab_blocks_ref,
306 : (forces != NULL) ? forces_ref : NULL,
307 : (virial != NULL) ? virial_ref : NULL);
308 :
309 : // Compare hab.
310 26 : const double hab_tolerance = 1e-12;
311 26 : double hab_max_rel_diff = 0.0;
312 2187 : for (int i = 0; i < hab_length; i++) {
313 2161 : const double ref_value = hab_blocks_ref->host_buffer[i];
314 2161 : const double test_value = hab_blocks->host_buffer[i];
315 2161 : const double diff = fabs(test_value - ref_value);
316 2161 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
317 2161 : hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
318 2161 : if (rel_diff > hab_tolerance) {
319 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
320 0 : fprintf(stderr, " hab diff: %le\n", diff);
321 0 : fprintf(stderr, " hab rel_diff: %le\n", rel_diff);
322 0 : fprintf(stderr, " hab value: %le\n", ref_value);
323 0 : fprintf(stderr, " hab i: %i\n", i);
324 0 : abort();
325 : }
326 : }
327 :
328 : // Compare forces.
329 26 : const double forces_tolerance = 1e-8; // account for higher numeric noise
330 26 : double forces_max_rel_diff = 0.0;
331 26 : if (forces != NULL) {
332 14 : for (int iatom = 0; iatom < natoms; iatom++) {
333 40 : for (int idir = 0; idir < 3; idir++) {
334 30 : const double ref_value = forces_ref[iatom][idir];
335 30 : const double test_value = forces[iatom][idir];
336 30 : const double diff = fabs(test_value - ref_value);
337 30 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
338 30 : forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
339 30 : if (rel_diff > forces_tolerance) {
340 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
341 0 : fprintf(stderr, " forces diff: %le\n", diff);
342 0 : fprintf(stderr, " forces rel_diff: %le\n", rel_diff);
343 0 : fprintf(stderr, " forces value: %le\n", ref_value);
344 0 : fprintf(stderr, " forces atom: %i\n", iatom);
345 0 : fprintf(stderr, " forces dir: %i\n", idir);
346 0 : abort();
347 : }
348 : }
349 : }
350 : }
351 :
352 : // Compare virial.
353 26 : const double virial_tolerance = 1e-8; // account for higher numeric noise
354 26 : double virial_max_rel_diff = 0.0;
355 26 : if (virial != NULL) {
356 0 : for (int i = 0; i < 3; i++) {
357 0 : for (int j = 0; j < 3; j++) {
358 0 : const double ref_value = virial_ref[i][j];
359 0 : const double test_value = virial[i][j];
360 0 : const double diff = fabs(test_value - ref_value);
361 0 : const double rel_diff = diff / fmax(1.0, fabs(ref_value));
362 0 : virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
363 0 : if (rel_diff > virial_tolerance) {
364 0 : fprintf(stderr, "Error: Validation failure in grid integrate\n");
365 0 : fprintf(stderr, " virial diff: %le\n", diff);
366 0 : fprintf(stderr, " virial rel_diff: %le\n", rel_diff);
367 0 : fprintf(stderr, " virial value: %le\n", ref_value);
368 0 : fprintf(stderr, " virial ij: %i %i\n", i, j);
369 0 : abort();
370 : }
371 : }
372 : }
373 : }
374 :
375 26 : printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
376 : hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
377 26 : offload_free_buffer(hab_blocks_ref);
378 : }
379 210640 : }
380 :
381 : // EOF
|