Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2026 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include "grid_dgemm_coefficients.h"
9 : #include "../common/grid_common.h"
10 : #include "grid_dgemm_private_header.h"
11 : #include "grid_dgemm_tensor_local.h"
12 : #include <assert.h>
13 : #include <stdio.h>
14 : #include <stdlib.h>
15 : #include <string.h>
16 :
17 0 : void transform_xyz_to_triangular(const tensor *const coef,
18 : double *const coef_xyz) {
19 0 : assert(coef != NULL);
20 0 : assert(coef_xyz != NULL);
21 :
22 0 : int lxyz = 0;
23 0 : const int lp = (coef->size[0] - 1);
24 0 : for (int lzp = 0; lzp <= lp; lzp++) {
25 0 : for (int lyp = 0; lyp <= lp - lzp; lyp++) {
26 0 : for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
27 0 : coef_xyz[lxyz] = idx3(coef[0], lzp, lyp, lxp);
28 : }
29 : }
30 : }
31 0 : }
32 :
33 0 : void transform_yxz_to_triangular(const tensor *const coef,
34 : double *const coef_xyz) {
35 0 : assert(coef != NULL);
36 0 : assert(coef_xyz != NULL);
37 0 : int lxyz = 0;
38 0 : const int lp = (coef->size[0] - 1);
39 0 : for (int lzp = 0; lzp <= lp; lzp++) {
40 0 : for (int lyp = 0; lyp <= lp - lzp; lyp++) {
41 0 : for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
42 0 : coef_xyz[lxyz] = idx3(coef[0], lyp, lxp, lzp);
43 : }
44 : }
45 : }
46 0 : }
47 :
48 0 : void transform_triangular_to_xyz(const double *const coef_xyz,
49 : tensor *const coef) {
50 0 : assert(coef != NULL);
51 0 : assert(coef_xyz != NULL);
52 0 : int lxyz = 0;
53 0 : const int lp = coef->size[0] - 1;
54 0 : for (int lzp = 0; lzp <= lp; lzp++) {
55 0 : for (int lyp = 0; lyp <= lp - lzp; lyp++) {
56 0 : for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
57 0 : idx3(coef[0], lzp, lyp, lxp) = coef_xyz[lxyz];
58 : }
59 : /* initialize the remaining coefficients to zero */
60 0 : for (int lxp = lp - lzp - lyp + 1; lxp <= lp; lxp++)
61 0 : idx3(coef[0], lzp, lyp, lxp) = 0.0;
62 : }
63 : }
64 0 : }
65 :
66 : /* Rotate from the (x - x_1) ^ alpha_1 (x - x_2) ^ \alpha_2 to (x - x_{12}) ^ k
67 : * in all three directions */
68 :
69 45945 : void grid_compute_coefficients_dgemm(
70 : const int *lmin, const int *lmax, const int lp, const double prefactor,
71 : const tensor *const alpha, // [3][lb_max+1][la_max+1][lp+1]
72 : const tensor *const pab,
73 : tensor *coef_xyz) //[lp+1][lp+1][lp+1]
74 : {
75 : /* can be done with dgemms as well, since it is a change of basis from (x -
76 : * x1) (x - x2) to (x - x12)^alpha */
77 :
78 45945 : assert(alpha != NULL);
79 45945 : assert(coef_xyz != NULL);
80 45945 : assert(coef_xyz->data != NULL);
81 45945 : memset(coef_xyz->data, 0, coef_xyz->alloc_size_ * sizeof(double));
82 : // we need a proper fix for that. We can use the tensor structure for this
83 :
84 121995 : for (int lzb = 0; lzb <= lmax[1]; lzb++) {
85 185670 : for (int lyb = 0; lyb <= lmax[1] - lzb; lyb++) {
86 109620 : const int lxb_min = imax(lmin[1] - lzb - lyb, 0);
87 236145 : for (int lxb = lxb_min; lxb <= lmax[1] - lzb - lyb; lxb++) {
88 126525 : const int jco = coset(lxb, lyb, lzb);
89 345141 : for (int lza = 0; lza <= lmax[0]; lza++) {
90 539571 : for (int lya = 0; lya <= lmax[0] - lza; lya++) {
91 320955 : const int lxa_min = imax(lmin[0] - lza - lya, 0);
92 698721 : for (int lxa = lxa_min; lxa <= lmax[0] - lza - lya; lxa++) {
93 377766 : const int ico = coset(lxa, lya, lza);
94 377766 : const double pab_ = idx2(pab[0], jco, ico);
95 980706 : for (int lxp = 0; lxp <= lxa + lxb; lxp++) {
96 602940 : const double p1 =
97 602940 : idx4(alpha[0], 0, lxb, lxa, lxp) * pab_ * prefactor;
98 1980126 : for (int lzp = 0; lzp <= lp - lxa - lxb; lzp++) {
99 3852510 : for (int lyp = 0; lyp <= lp - lxa - lxb - lzp; lyp++) {
100 2475324 : const double p2 = idx4(alpha[0], 1, lyb, lya, lyp) *
101 2475324 : idx4(alpha[0], 2, lzb, lza, lzp) * p1;
102 2475324 : idx3(coef_xyz[0], lxp, lzp, lyp) += p2;
103 : }
104 : }
105 : }
106 : }
107 : }
108 : }
109 : }
110 : }
111 : }
112 45945 : }
113 :
114 : /* Rotate from (x - x_{12}) ^ k to (x - x_1) ^ alpha_1 (x - x_2) ^ \alpha_2 in
115 : * all three directions */
116 :
117 44389 : void grid_compute_vab(
118 : const int *const lmin, const int *const lmax, const int lp,
119 : const double prefactor,
120 : const tensor *const alpha, // transformation parameters (x - x_1)^n (x -
121 : // x_2)^m -> (x - x_12) ^ l
122 : const tensor *const coef_xyz, tensor *vab) {
123 : /* can be done with dgemms as well, since it is a change of basis from (x -
124 : * x1) (x - x2) to (x - x12)^alpha */
125 :
126 44389 : assert(alpha != NULL);
127 44389 : assert(coef_xyz != NULL);
128 44389 : assert(coef_xyz->data != NULL);
129 :
130 44389 : memset(vab->data, 0, sizeof(double) * vab->alloc_size_);
131 : // we need a proper fix for that. We can use the tensor structure for this
132 :
133 122231 : for (int lzb = 0; lzb <= lmax[1]; lzb++) {
134 195574 : for (int lyb = 0; lyb <= lmax[1] - lzb; lyb++) {
135 117732 : const int lxb_min = imax(lmin[1] - lzb - lyb, 0);
136 264086 : for (int lxb = lxb_min; lxb <= lmax[1] - lzb - lyb; lxb++) {
137 146354 : const int jco = coset(lxb, lyb, lzb);
138 476098 : for (int lza = 0; lza <= lmax[0]; lza++) {
139 947725 : for (int lya = 0; lya <= lmax[0] - lza; lya++) {
140 617981 : const int lxa_min = imax(lmin[0] - lza - lya, 0);
141 1608510 : for (int lxa = lxa_min; lxa <= lmax[0] - lza - lya; lxa++) {
142 990529 : const int ico = coset(lxa, lya, lza);
143 990529 : double pab_ = 0.0;
144 :
145 : /* this can be done with 3 dgemms actually but need to
146 : * set coef accordingly (triangular along the second
147 : * diagonal) */
148 :
149 2998946 : for (int lxp = 0; lxp <= lxa + lxb; lxp++) {
150 9418557 : for (int lzp = 0; lzp <= lp - lxa - lxb; lzp++) {
151 26966257 : for (int lyp = 0; lyp <= lp - lxa - lxb - lzp; lyp++) {
152 19556117 : const double p2 = idx4(alpha[0], 1, lyb, lya, lyp) *
153 19556117 : idx4(alpha[0], 2, lzb, lza, lzp) *
154 19556117 : idx4(alpha[0], 0, lxb, lxa, lxp) *
155 : prefactor;
156 19556117 : pab_ += idx3(coef_xyz[0], lyp, lxp, lzp) * p2;
157 : }
158 : }
159 : }
160 990529 : idx2(vab[0], jco, ico) += pab_;
161 : }
162 : }
163 : }
164 : }
165 : }
166 : }
167 44389 : }
168 :
169 : // *****************************************************************************
170 90334 : void grid_prepare_alpha_dgemm(const double ra[3], const double rb[3],
171 : const double rp[3], const int *lmax,
172 : tensor *alpha) {
173 90334 : assert(alpha != NULL);
174 : // Initialize with zeros.
175 90334 : memset(alpha->data, 0, alpha->alloc_size_ * sizeof(double));
176 :
177 : //
178 : // compute polynomial expansion coefs -> (x-a)**lxa (x-b)**lxb -> sum_{ls}
179 : // alpha(ls,lxa,lxb,1)*(x-p)**ls
180 : //
181 :
182 361336 : for (int iaxis = 0; iaxis < 3; iaxis++) {
183 271002 : const double drpa = rp[iaxis] - ra[iaxis];
184 271002 : const double drpb = rp[iaxis] - rb[iaxis];
185 749997 : for (int lxa = 0; lxa <= lmax[0]; lxa++) {
186 1338348 : for (int lxb = 0; lxb <= lmax[1]; lxb++) {
187 : double binomial_k_lxa = 1.0;
188 : double a = 1.0;
189 2244909 : for (int k = 0; k <= lxa; k++) {
190 : double binomial_l_lxb = 1.0;
191 : double b = 1.0;
192 3552828 : for (int l = 0; l <= lxb; l++) {
193 2167272 : idx4(alpha[0], iaxis, lxb, lxa, lxa - l + lxb - k) +=
194 2167272 : binomial_k_lxa * binomial_l_lxb * a * b;
195 2167272 : binomial_l_lxb *= ((double)(lxb - l)) / ((double)(l + 1));
196 2167272 : b *= drpb;
197 : }
198 1385556 : binomial_k_lxa *= ((double)(lxa - k)) / ((double)(k + 1));
199 1385556 : a *= drpa;
200 : }
201 : }
202 : }
203 : }
204 90334 : }
205 :
206 : /* this function computes the coefficients initially expressed in the cartesian
207 : * space to the grid space. It is inplane and can also be done with
208 : * matrix-matrix multiplication. It is in fact a tensor reduction. */
209 :
210 24831 : void grid_transform_coef_xzy_to_ikj(const double dh[3][3],
211 : const tensor *coef_xyz) {
212 24831 : assert(coef_xyz != NULL);
213 24831 : const int lp = coef_xyz->size[0] - 1;
214 24831 : tensor coef_ijk;
215 :
216 : /* this tensor corresponds to the term
217 : * $v_{11}^{k_{11}}v_{12}^{k_{12}}v_{13}^{k_{13}}
218 : * v_{21}^{k_{21}}v_{22}^{k_{22}}v_{23}^{k_{23}}
219 : * v_{31}^{k_{31}}v_{32}^{k_{32}} v_{33}^{k_{33}}$ in Eq.26 found section
220 : * III.A of the notes */
221 24831 : tensor hmatgridp;
222 :
223 49662 : initialize_tensor_3(&coef_ijk, coef_xyz->size[0], coef_xyz->size[1],
224 24831 : coef_xyz->size[2]);
225 :
226 24831 : coef_ijk.data = grid_allocate_scratch(sizeof(double) * coef_ijk.alloc_size_);
227 :
228 24831 : if (coef_ijk.data == NULL)
229 0 : abort();
230 :
231 24831 : memset(coef_ijk.data, 0, sizeof(double) * coef_ijk.alloc_size_);
232 24831 : initialize_tensor_3(&hmatgridp, coef_xyz->size[0], 3, 3);
233 :
234 24831 : hmatgridp.data =
235 24831 : grid_allocate_scratch(sizeof(double) * hmatgridp.alloc_size_);
236 :
237 : // transform using multinomials
238 99324 : for (int i = 0; i < 3; i++) {
239 297972 : for (int j = 0; j < 3; j++) {
240 223479 : idx3(hmatgridp, 0, j, i) = 1.0;
241 527877 : for (int k = 1; k <= lp; k++) {
242 304398 : idx3(hmatgridp, k, j, i) = idx3(hmatgridp, k - 1, j, i) * dh[j][i];
243 : }
244 : }
245 : }
246 :
247 : const int lpx = lp;
248 83484 : for (int klx = 0; klx <= lpx; klx++) {
249 167610 : for (int jlx = 0; jlx <= lpx - klx; jlx++) {
250 287076 : for (int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
251 178119 : const int lx = ilx + jlx + klx;
252 178119 : const int lpy = lp - lx;
253 178119 : const double tx = idx3(hmatgridp, ilx, 0, 0) *
254 178119 : idx3(hmatgridp, jlx, 1, 0) *
255 178119 : idx3(hmatgridp, klx, 2, 0) * fac(lx) * inv_fac[klx] *
256 178119 : inv_fac[jlx] * inv_fac[ilx];
257 :
258 446766 : for (int kly = 0; kly <= lpy; kly++) {
259 651828 : for (int jly = 0; jly <= lpy - kly; jly++) {
260 907674 : for (int ily = 0; ily <= lpy - kly - jly; ily++) {
261 524493 : const int ly = ily + jly + kly;
262 524493 : const int lpz = lp - lx - ly;
263 524493 : const double ty = tx * idx3(hmatgridp, ily, 0, 1) *
264 524493 : idx3(hmatgridp, jly, 1, 1) *
265 524493 : idx3(hmatgridp, kly, 2, 1) * fac(ly) *
266 524493 : inv_fac[kly] * inv_fac[jly] * inv_fac[ily];
267 1219980 : for (int klz = 0; klz <= lpz; klz++) {
268 1594686 : for (int jlz = 0; jlz <= lpz - klz; jlz++) {
269 2037996 : for (int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
270 1138797 : const int lz = ilz + jlz + klz;
271 1138797 : const int il = ilx + ily + ilz;
272 1138797 : const int jl = jlx + jly + jlz;
273 1138797 : const int kl = klx + kly + klz;
274 : // const int lijk= coef_map[kl][jl][il];
275 : /* the fac table is the factorial. It
276 : * would be better to use the
277 : * multinomials. */
278 1138797 : idx3(coef_ijk, il, kl, jl) +=
279 1138797 : idx3(coef_xyz[0], lx, lz, ly) * ty *
280 1138797 : idx3(hmatgridp, ilz, 0, 2) *
281 1138797 : idx3(hmatgridp, jlz, 1, 2) *
282 1138797 : idx3(hmatgridp, klz, 2, 2) * fac(lz) * inv_fac[klz] *
283 1138797 : inv_fac[jlz] * inv_fac[ilz];
284 : }
285 : }
286 : }
287 : }
288 : }
289 : }
290 : }
291 : }
292 : }
293 :
294 24831 : memcpy(coef_xyz->data, coef_ijk.data, sizeof(double) * coef_ijk.alloc_size_);
295 24831 : grid_free_scratch(coef_ijk.data);
296 24831 : grid_free_scratch(hmatgridp.data);
297 24831 : }
298 :
299 : /* Rotate the coefficients computed in the local grid coordinates to the
300 : * cartesians coorinates. The order of the indices indicates how the
301 : * coefficients are stored */
302 23275 : void grid_transform_coef_jik_to_yxz(const double dh[3][3],
303 : const tensor *coef_xyz) {
304 23275 : assert(coef_xyz);
305 23275 : const int lp = coef_xyz->size[0] - 1;
306 23275 : tensor coef_ijk;
307 :
308 : /* this tensor corresponds to the term
309 : * $v_{11}^{k_{11}}v_{12}^{k_{12}}v_{13}^{k_{13}}
310 : * v_{21}^{k_{21}}v_{22}^{k_{22}}v_{23}^{k_{23}}
311 : * v_{31}^{k_{31}}v_{32}^{k_{32}} v_{33}^{k_{33}}$ in Eq.26 found section
312 : * III.A of the notes */
313 23275 : tensor hmatgridp;
314 :
315 46550 : initialize_tensor_3(&coef_ijk, coef_xyz->size[0], coef_xyz->size[1],
316 23275 : coef_xyz->size[2]);
317 :
318 23275 : coef_ijk.data = grid_allocate_scratch(sizeof(double) * coef_ijk.alloc_size_);
319 23275 : if (coef_ijk.data == NULL)
320 0 : abort();
321 :
322 23275 : memset(coef_ijk.data, 0, sizeof(double) * coef_ijk.alloc_size_);
323 23275 : initialize_tensor_3(&hmatgridp, coef_xyz->size[0], 3, 3);
324 :
325 23275 : hmatgridp.data =
326 23275 : grid_allocate_scratch(sizeof(double) * hmatgridp.alloc_size_);
327 :
328 : // transform using multinomials
329 93100 : for (int i = 0; i < 3; i++) {
330 279300 : for (int j = 0; j < 3; j++) {
331 209475 : idx3(hmatgridp, 0, j, i) = 1.0;
332 558855 : for (int k = 1; k <= lp; k++) {
333 349380 : idx3(hmatgridp, k, j, i) = idx3(hmatgridp, k - 1, j, i) * dh[j][i];
334 : }
335 : }
336 : }
337 :
338 : const int lpx = lp;
339 85370 : for (int klx = 0; klx <= lpx; klx++) {
340 195728 : for (int jlx = 0; jlx <= lpx - klx; jlx++) {
341 390320 : for (int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
342 256687 : const int lx = ilx + jlx + klx;
343 256687 : const int lpy = lp - lx;
344 715002 : for (int kly = 0; kly <= lpy; kly++) {
345 1234494 : for (int jly = 0; jly <= lpy - kly; jly++) {
346 2037404 : for (int ily = 0; ily <= lpy - kly - jly; ily++) {
347 1261225 : const int ly = ily + jly + kly;
348 1261225 : const int lpz = lp - lx - ly;
349 3241940 : for (int klz = 0; klz <= lpz; klz++) {
350 5002342 : for (int jlz = 0; jlz <= lpz - klz; jlz++) {
351 7516066 : for (int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
352 4494439 : const int lz = ilz + jlz + klz;
353 4494439 : const int il = ilx + ily + ilz;
354 4494439 : const int jl = jlx + jly + jlz;
355 4494439 : const int kl = klx + kly + klz;
356 : // const int lijk= coef_map[kl][jl][il];
357 : /* the fac table is the factorial. It
358 : * would be better to use the
359 : * multinomials. */
360 4494439 : idx3(coef_ijk, ly, lx, lz) +=
361 4494439 : idx3(coef_xyz[0], jl, il, kl) *
362 4494439 : idx3(hmatgridp, ilx, 0, 0) *
363 4494439 : idx3(hmatgridp, jlx, 1, 0) *
364 4494439 : idx3(hmatgridp, klx, 2, 0) *
365 4494439 : idx3(hmatgridp, ily, 0, 1) *
366 4494439 : idx3(hmatgridp, jly, 1, 1) *
367 4494439 : idx3(hmatgridp, kly, 2, 1) *
368 4494439 : idx3(hmatgridp, ilz, 0, 2) *
369 4494439 : idx3(hmatgridp, jlz, 1, 2) *
370 4494439 : idx3(hmatgridp, klz, 2, 2) * fac(lx) * fac(ly) *
371 4494439 : fac(lz) /
372 4494439 : (fac(ilx) * fac(ily) * fac(ilz) * fac(jlx) * fac(jly) *
373 4494439 : fac(jlz) * fac(klx) * fac(kly) * fac(klz));
374 : }
375 : }
376 : }
377 : }
378 : }
379 : }
380 : }
381 : }
382 : }
383 23275 : memcpy(coef_xyz->data, coef_ijk.data, sizeof(double) * coef_ijk.alloc_size_);
384 23275 : grid_free_scratch(coef_ijk.data);
385 23275 : grid_free_scratch(hmatgridp.data);
386 23275 : }
|