LCOV - code coverage report
Current view: top level - src/grid/dgemm - grid_dgemm_coefficients.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:85b8a9b) Lines: 83.6 % 207 173
Test Date: 2026-06-14 06:48:14 Functions: 62.5 % 8 5

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2026 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : 
       8              : #include "grid_dgemm_coefficients.h"
       9              : #include "../common/grid_common.h"
      10              : #include "grid_dgemm_private_header.h"
      11              : #include "grid_dgemm_tensor_local.h"
      12              : #include <assert.h>
      13              : #include <stdio.h>
      14              : #include <stdlib.h>
      15              : #include <string.h>
      16              : 
      17            0 : void transform_xyz_to_triangular(const tensor *const coef,
      18              :                                  double *const coef_xyz) {
      19            0 :   assert(coef != NULL);
      20            0 :   assert(coef_xyz != NULL);
      21              : 
      22            0 :   int lxyz = 0;
      23            0 :   const int lp = (coef->size[0] - 1);
      24            0 :   for (int lzp = 0; lzp <= lp; lzp++) {
      25            0 :     for (int lyp = 0; lyp <= lp - lzp; lyp++) {
      26            0 :       for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
      27            0 :         coef_xyz[lxyz] = idx3(coef[0], lzp, lyp, lxp);
      28              :       }
      29              :     }
      30              :   }
      31            0 : }
      32              : 
      33            0 : void transform_yxz_to_triangular(const tensor *const coef,
      34              :                                  double *const coef_xyz) {
      35            0 :   assert(coef != NULL);
      36            0 :   assert(coef_xyz != NULL);
      37            0 :   int lxyz = 0;
      38            0 :   const int lp = (coef->size[0] - 1);
      39            0 :   for (int lzp = 0; lzp <= lp; lzp++) {
      40            0 :     for (int lyp = 0; lyp <= lp - lzp; lyp++) {
      41            0 :       for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
      42            0 :         coef_xyz[lxyz] = idx3(coef[0], lyp, lxp, lzp);
      43              :       }
      44              :     }
      45              :   }
      46            0 : }
      47              : 
      48            0 : void transform_triangular_to_xyz(const double *const coef_xyz,
      49              :                                  tensor *const coef) {
      50            0 :   assert(coef != NULL);
      51            0 :   assert(coef_xyz != NULL);
      52            0 :   int lxyz = 0;
      53            0 :   const int lp = coef->size[0] - 1;
      54            0 :   for (int lzp = 0; lzp <= lp; lzp++) {
      55            0 :     for (int lyp = 0; lyp <= lp - lzp; lyp++) {
      56            0 :       for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
      57            0 :         idx3(coef[0], lzp, lyp, lxp) = coef_xyz[lxyz];
      58              :       }
      59              :       /* initialize the remaining coefficients to zero */
      60            0 :       for (int lxp = lp - lzp - lyp + 1; lxp <= lp; lxp++)
      61            0 :         idx3(coef[0], lzp, lyp, lxp) = 0.0;
      62              :     }
      63              :   }
      64            0 : }
      65              : 
      66              : /* Rotate from the (x - x_1) ^ alpha_1 (x - x_2) ^ \alpha_2 to (x - x_{12}) ^ k
      67              :  * in all three directions */
      68              : 
      69        45945 : void grid_compute_coefficients_dgemm(
      70              :     const int *lmin, const int *lmax, const int lp, const double prefactor,
      71              :     const tensor *const alpha, // [3][lb_max+1][la_max+1][lp+1]
      72              :     const tensor *const pab,
      73              :     tensor *coef_xyz) //[lp+1][lp+1][lp+1]
      74              : {
      75              :   /* can be done with dgemms as well, since it is a change of basis from (x -
      76              :    * x1) (x - x2) to (x - x12)^alpha */
      77              : 
      78        45945 :   assert(alpha != NULL);
      79        45945 :   assert(coef_xyz != NULL);
      80        45945 :   assert(coef_xyz->data != NULL);
      81        45945 :   memset(coef_xyz->data, 0, coef_xyz->alloc_size_ * sizeof(double));
      82              :   // we need a proper fix for that. We can use the tensor structure for this
      83              : 
      84       121995 :   for (int lzb = 0; lzb <= lmax[1]; lzb++) {
      85       185670 :     for (int lyb = 0; lyb <= lmax[1] - lzb; lyb++) {
      86       109620 :       const int lxb_min = imax(lmin[1] - lzb - lyb, 0);
      87       236145 :       for (int lxb = lxb_min; lxb <= lmax[1] - lzb - lyb; lxb++) {
      88       126525 :         const int jco = coset(lxb, lyb, lzb);
      89       345141 :         for (int lza = 0; lza <= lmax[0]; lza++) {
      90       539571 :           for (int lya = 0; lya <= lmax[0] - lza; lya++) {
      91       320955 :             const int lxa_min = imax(lmin[0] - lza - lya, 0);
      92       698721 :             for (int lxa = lxa_min; lxa <= lmax[0] - lza - lya; lxa++) {
      93       377766 :               const int ico = coset(lxa, lya, lza);
      94       377766 :               const double pab_ = idx2(pab[0], jco, ico);
      95       980706 :               for (int lxp = 0; lxp <= lxa + lxb; lxp++) {
      96       602940 :                 const double p1 =
      97       602940 :                     idx4(alpha[0], 0, lxb, lxa, lxp) * pab_ * prefactor;
      98      1980126 :                 for (int lzp = 0; lzp <= lp - lxa - lxb; lzp++) {
      99      3852510 :                   for (int lyp = 0; lyp <= lp - lxa - lxb - lzp; lyp++) {
     100      2475324 :                     const double p2 = idx4(alpha[0], 1, lyb, lya, lyp) *
     101      2475324 :                                       idx4(alpha[0], 2, lzb, lza, lzp) * p1;
     102      2475324 :                     idx3(coef_xyz[0], lxp, lzp, lyp) += p2;
     103              :                   }
     104              :                 }
     105              :               }
     106              :             }
     107              :           }
     108              :         }
     109              :       }
     110              :     }
     111              :   }
     112        45945 : }
     113              : 
     114              : /* Rotate from (x - x_{12}) ^ k to (x - x_1) ^ alpha_1 (x - x_2) ^ \alpha_2 in
     115              :  * all three directions */
     116              : 
     117        44389 : void grid_compute_vab(
     118              :     const int *const lmin, const int *const lmax, const int lp,
     119              :     const double prefactor,
     120              :     const tensor *const alpha, // transformation parameters (x - x_1)^n (x -
     121              :                                // x_2)^m -> (x - x_12) ^ l
     122              :     const tensor *const coef_xyz, tensor *vab) {
     123              :   /* can be done with dgemms as well, since it is a change of basis from (x -
     124              :    * x1) (x - x2) to (x - x12)^alpha */
     125              : 
     126        44389 :   assert(alpha != NULL);
     127        44389 :   assert(coef_xyz != NULL);
     128        44389 :   assert(coef_xyz->data != NULL);
     129              : 
     130        44389 :   memset(vab->data, 0, sizeof(double) * vab->alloc_size_);
     131              :   // we need a proper fix for that. We can use the tensor structure for this
     132              : 
     133       122231 :   for (int lzb = 0; lzb <= lmax[1]; lzb++) {
     134       195574 :     for (int lyb = 0; lyb <= lmax[1] - lzb; lyb++) {
     135       117732 :       const int lxb_min = imax(lmin[1] - lzb - lyb, 0);
     136       264086 :       for (int lxb = lxb_min; lxb <= lmax[1] - lzb - lyb; lxb++) {
     137       146354 :         const int jco = coset(lxb, lyb, lzb);
     138       476098 :         for (int lza = 0; lza <= lmax[0]; lza++) {
     139       947725 :           for (int lya = 0; lya <= lmax[0] - lza; lya++) {
     140       617981 :             const int lxa_min = imax(lmin[0] - lza - lya, 0);
     141      1608510 :             for (int lxa = lxa_min; lxa <= lmax[0] - lza - lya; lxa++) {
     142       990529 :               const int ico = coset(lxa, lya, lza);
     143       990529 :               double pab_ = 0.0;
     144              : 
     145              :               /* this can be done with 3 dgemms actually but need to
     146              :                * set coef accordingly (triangular along the second
     147              :                * diagonal) */
     148              : 
     149      2998946 :               for (int lxp = 0; lxp <= lxa + lxb; lxp++) {
     150      9418557 :                 for (int lzp = 0; lzp <= lp - lxa - lxb; lzp++) {
     151     26966257 :                   for (int lyp = 0; lyp <= lp - lxa - lxb - lzp; lyp++) {
     152     19556117 :                     const double p2 = idx4(alpha[0], 1, lyb, lya, lyp) *
     153     19556117 :                                       idx4(alpha[0], 2, lzb, lza, lzp) *
     154     19556117 :                                       idx4(alpha[0], 0, lxb, lxa, lxp) *
     155              :                                       prefactor;
     156     19556117 :                     pab_ += idx3(coef_xyz[0], lyp, lxp, lzp) * p2;
     157              :                   }
     158              :                 }
     159              :               }
     160       990529 :               idx2(vab[0], jco, ico) += pab_;
     161              :             }
     162              :           }
     163              :         }
     164              :       }
     165              :     }
     166              :   }
     167        44389 : }
     168              : 
     169              : // *****************************************************************************
     170        90334 : void grid_prepare_alpha_dgemm(const double ra[3], const double rb[3],
     171              :                               const double rp[3], const int *lmax,
     172              :                               tensor *alpha) {
     173        90334 :   assert(alpha != NULL);
     174              :   // Initialize with zeros.
     175        90334 :   memset(alpha->data, 0, alpha->alloc_size_ * sizeof(double));
     176              : 
     177              :   //
     178              :   //   compute polynomial expansion coefs -> (x-a)**lxa (x-b)**lxb -> sum_{ls}
     179              :   //   alpha(ls,lxa,lxb,1)*(x-p)**ls
     180              :   //
     181              : 
     182       361336 :   for (int iaxis = 0; iaxis < 3; iaxis++) {
     183       271002 :     const double drpa = rp[iaxis] - ra[iaxis];
     184       271002 :     const double drpb = rp[iaxis] - rb[iaxis];
     185       749997 :     for (int lxa = 0; lxa <= lmax[0]; lxa++) {
     186      1338348 :       for (int lxb = 0; lxb <= lmax[1]; lxb++) {
     187              :         double binomial_k_lxa = 1.0;
     188              :         double a = 1.0;
     189      2244909 :         for (int k = 0; k <= lxa; k++) {
     190              :           double binomial_l_lxb = 1.0;
     191              :           double b = 1.0;
     192      3552828 :           for (int l = 0; l <= lxb; l++) {
     193      2167272 :             idx4(alpha[0], iaxis, lxb, lxa, lxa - l + lxb - k) +=
     194      2167272 :                 binomial_k_lxa * binomial_l_lxb * a * b;
     195      2167272 :             binomial_l_lxb *= ((double)(lxb - l)) / ((double)(l + 1));
     196      2167272 :             b *= drpb;
     197              :           }
     198      1385556 :           binomial_k_lxa *= ((double)(lxa - k)) / ((double)(k + 1));
     199      1385556 :           a *= drpa;
     200              :         }
     201              :       }
     202              :     }
     203              :   }
     204        90334 : }
     205              : 
     206              : /* this function computes the coefficients initially expressed in the cartesian
     207              :  * space to the grid space. It is inplane and can also be done with
     208              :  * matrix-matrix multiplication. It is in fact a tensor reduction. */
     209              : 
     210        24831 : void grid_transform_coef_xzy_to_ikj(const double dh[3][3],
     211              :                                     const tensor *coef_xyz) {
     212        24831 :   assert(coef_xyz != NULL);
     213        24831 :   const int lp = coef_xyz->size[0] - 1;
     214        24831 :   tensor coef_ijk;
     215              : 
     216              :   /* this tensor corresponds to the term
     217              :    * $v_{11}^{k_{11}}v_{12}^{k_{12}}v_{13}^{k_{13}}
     218              :    * v_{21}^{k_{21}}v_{22}^{k_{22}}v_{23}^{k_{23}}
     219              :    * v_{31}^{k_{31}}v_{32}^{k_{32}} v_{33}^{k_{33}}$ in Eq.26 found section
     220              :    * III.A of the notes */
     221        24831 :   tensor hmatgridp;
     222              : 
     223        49662 :   initialize_tensor_3(&coef_ijk, coef_xyz->size[0], coef_xyz->size[1],
     224        24831 :                       coef_xyz->size[2]);
     225              : 
     226        24831 :   coef_ijk.data = grid_allocate_scratch(sizeof(double) * coef_ijk.alloc_size_);
     227              : 
     228        24831 :   if (coef_ijk.data == NULL)
     229            0 :     abort();
     230              : 
     231        24831 :   memset(coef_ijk.data, 0, sizeof(double) * coef_ijk.alloc_size_);
     232        24831 :   initialize_tensor_3(&hmatgridp, coef_xyz->size[0], 3, 3);
     233              : 
     234        24831 :   hmatgridp.data =
     235        24831 :       grid_allocate_scratch(sizeof(double) * hmatgridp.alloc_size_);
     236              : 
     237              :   // transform using multinomials
     238        99324 :   for (int i = 0; i < 3; i++) {
     239       297972 :     for (int j = 0; j < 3; j++) {
     240       223479 :       idx3(hmatgridp, 0, j, i) = 1.0;
     241       527877 :       for (int k = 1; k <= lp; k++) {
     242       304398 :         idx3(hmatgridp, k, j, i) = idx3(hmatgridp, k - 1, j, i) * dh[j][i];
     243              :       }
     244              :     }
     245              :   }
     246              : 
     247              :   const int lpx = lp;
     248        83484 :   for (int klx = 0; klx <= lpx; klx++) {
     249       167610 :     for (int jlx = 0; jlx <= lpx - klx; jlx++) {
     250       287076 :       for (int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
     251       178119 :         const int lx = ilx + jlx + klx;
     252       178119 :         const int lpy = lp - lx;
     253       178119 :         const double tx = idx3(hmatgridp, ilx, 0, 0) *
     254       178119 :                           idx3(hmatgridp, jlx, 1, 0) *
     255       178119 :                           idx3(hmatgridp, klx, 2, 0) * fac(lx) * inv_fac[klx] *
     256       178119 :                           inv_fac[jlx] * inv_fac[ilx];
     257              : 
     258       446766 :         for (int kly = 0; kly <= lpy; kly++) {
     259       651828 :           for (int jly = 0; jly <= lpy - kly; jly++) {
     260       907674 :             for (int ily = 0; ily <= lpy - kly - jly; ily++) {
     261       524493 :               const int ly = ily + jly + kly;
     262       524493 :               const int lpz = lp - lx - ly;
     263       524493 :               const double ty = tx * idx3(hmatgridp, ily, 0, 1) *
     264       524493 :                                 idx3(hmatgridp, jly, 1, 1) *
     265       524493 :                                 idx3(hmatgridp, kly, 2, 1) * fac(ly) *
     266       524493 :                                 inv_fac[kly] * inv_fac[jly] * inv_fac[ily];
     267      1219980 :               for (int klz = 0; klz <= lpz; klz++) {
     268      1594686 :                 for (int jlz = 0; jlz <= lpz - klz; jlz++) {
     269      2037996 :                   for (int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
     270      1138797 :                     const int lz = ilz + jlz + klz;
     271      1138797 :                     const int il = ilx + ily + ilz;
     272      1138797 :                     const int jl = jlx + jly + jlz;
     273      1138797 :                     const int kl = klx + kly + klz;
     274              :                     // const int lijk= coef_map[kl][jl][il];
     275              :                     /* the fac table is the factorial. It
     276              :                      * would be better to use the
     277              :                      * multinomials. */
     278      1138797 :                     idx3(coef_ijk, il, kl, jl) +=
     279      1138797 :                         idx3(coef_xyz[0], lx, lz, ly) * ty *
     280      1138797 :                         idx3(hmatgridp, ilz, 0, 2) *
     281      1138797 :                         idx3(hmatgridp, jlz, 1, 2) *
     282      1138797 :                         idx3(hmatgridp, klz, 2, 2) * fac(lz) * inv_fac[klz] *
     283      1138797 :                         inv_fac[jlz] * inv_fac[ilz];
     284              :                   }
     285              :                 }
     286              :               }
     287              :             }
     288              :           }
     289              :         }
     290              :       }
     291              :     }
     292              :   }
     293              : 
     294        24831 :   memcpy(coef_xyz->data, coef_ijk.data, sizeof(double) * coef_ijk.alloc_size_);
     295        24831 :   grid_free_scratch(coef_ijk.data);
     296        24831 :   grid_free_scratch(hmatgridp.data);
     297        24831 : }
     298              : 
     299              : /* Rotate the coefficients computed in the local grid coordinates to the
     300              :  * cartesians coorinates. The order of the indices indicates how the
     301              :  * coefficients are stored */
     302        23275 : void grid_transform_coef_jik_to_yxz(const double dh[3][3],
     303              :                                     const tensor *coef_xyz) {
     304        23275 :   assert(coef_xyz);
     305        23275 :   const int lp = coef_xyz->size[0] - 1;
     306        23275 :   tensor coef_ijk;
     307              : 
     308              :   /* this tensor corresponds to the term
     309              :    * $v_{11}^{k_{11}}v_{12}^{k_{12}}v_{13}^{k_{13}}
     310              :    * v_{21}^{k_{21}}v_{22}^{k_{22}}v_{23}^{k_{23}}
     311              :    * v_{31}^{k_{31}}v_{32}^{k_{32}} v_{33}^{k_{33}}$ in Eq.26 found section
     312              :    * III.A of the notes */
     313        23275 :   tensor hmatgridp;
     314              : 
     315        46550 :   initialize_tensor_3(&coef_ijk, coef_xyz->size[0], coef_xyz->size[1],
     316        23275 :                       coef_xyz->size[2]);
     317              : 
     318        23275 :   coef_ijk.data = grid_allocate_scratch(sizeof(double) * coef_ijk.alloc_size_);
     319        23275 :   if (coef_ijk.data == NULL)
     320            0 :     abort();
     321              : 
     322        23275 :   memset(coef_ijk.data, 0, sizeof(double) * coef_ijk.alloc_size_);
     323        23275 :   initialize_tensor_3(&hmatgridp, coef_xyz->size[0], 3, 3);
     324              : 
     325        23275 :   hmatgridp.data =
     326        23275 :       grid_allocate_scratch(sizeof(double) * hmatgridp.alloc_size_);
     327              : 
     328              :   // transform using multinomials
     329        93100 :   for (int i = 0; i < 3; i++) {
     330       279300 :     for (int j = 0; j < 3; j++) {
     331       209475 :       idx3(hmatgridp, 0, j, i) = 1.0;
     332       558855 :       for (int k = 1; k <= lp; k++) {
     333       349380 :         idx3(hmatgridp, k, j, i) = idx3(hmatgridp, k - 1, j, i) * dh[j][i];
     334              :       }
     335              :     }
     336              :   }
     337              : 
     338              :   const int lpx = lp;
     339        85370 :   for (int klx = 0; klx <= lpx; klx++) {
     340       195728 :     for (int jlx = 0; jlx <= lpx - klx; jlx++) {
     341       390320 :       for (int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
     342       256687 :         const int lx = ilx + jlx + klx;
     343       256687 :         const int lpy = lp - lx;
     344       715002 :         for (int kly = 0; kly <= lpy; kly++) {
     345      1234494 :           for (int jly = 0; jly <= lpy - kly; jly++) {
     346      2037404 :             for (int ily = 0; ily <= lpy - kly - jly; ily++) {
     347      1261225 :               const int ly = ily + jly + kly;
     348      1261225 :               const int lpz = lp - lx - ly;
     349      3241940 :               for (int klz = 0; klz <= lpz; klz++) {
     350      5002342 :                 for (int jlz = 0; jlz <= lpz - klz; jlz++) {
     351      7516066 :                   for (int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
     352      4494439 :                     const int lz = ilz + jlz + klz;
     353      4494439 :                     const int il = ilx + ily + ilz;
     354      4494439 :                     const int jl = jlx + jly + jlz;
     355      4494439 :                     const int kl = klx + kly + klz;
     356              :                     // const int lijk= coef_map[kl][jl][il];
     357              :                     /* the fac table is the factorial. It
     358              :                      * would be better to use the
     359              :                      * multinomials. */
     360      4494439 :                     idx3(coef_ijk, ly, lx, lz) +=
     361      4494439 :                         idx3(coef_xyz[0], jl, il, kl) *
     362      4494439 :                         idx3(hmatgridp, ilx, 0, 0) *
     363      4494439 :                         idx3(hmatgridp, jlx, 1, 0) *
     364      4494439 :                         idx3(hmatgridp, klx, 2, 0) *
     365      4494439 :                         idx3(hmatgridp, ily, 0, 1) *
     366      4494439 :                         idx3(hmatgridp, jly, 1, 1) *
     367      4494439 :                         idx3(hmatgridp, kly, 2, 1) *
     368      4494439 :                         idx3(hmatgridp, ilz, 0, 2) *
     369      4494439 :                         idx3(hmatgridp, jlz, 1, 2) *
     370      4494439 :                         idx3(hmatgridp, klz, 2, 2) * fac(lx) * fac(ly) *
     371      4494439 :                         fac(lz) /
     372      4494439 :                         (fac(ilx) * fac(ily) * fac(ilz) * fac(jlx) * fac(jly) *
     373      4494439 :                          fac(jlz) * fac(klx) * fac(kly) * fac(klz));
     374              :                   }
     375              :                 }
     376              :               }
     377              :             }
     378              :           }
     379              :         }
     380              :       }
     381              :     }
     382              :   }
     383        23275 :   memcpy(coef_xyz->data, coef_ijk.data, sizeof(double) * coef_ijk.alloc_size_);
     384        23275 :   grid_free_scratch(coef_ijk.data);
     385        23275 :   grid_free_scratch(hmatgridp.data);
     386        23275 : }
        

Generated by: LCOV version 2.0-1