LCOV - code coverage report
Current view: top level - src/grid/dgemm - grid_dgemm_coefficients.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:a2cdc02) Lines: 173 207 83.6 %
Date: 2025-04-17 08:15:26 Functions: 5 8 62.5 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <assert.h>
       9             : #include <stdio.h>
      10             : #include <stdlib.h>
      11             : #include <string.h>
      12             : #if defined(__LIBXSMM)
      13             : #include <libxsmm.h>
      14             : #endif
      15             : #include "../common/grid_common.h"
      16             : #include "grid_dgemm_coefficients.h"
      17             : #include "grid_dgemm_private_header.h"
      18             : #include "grid_dgemm_tensor_local.h"
      19             : 
      20           0 : void transform_xyz_to_triangular(const tensor *const coef,
      21             :                                  double *const coef_xyz) {
      22           0 :   assert(coef != NULL);
      23           0 :   assert(coef_xyz != NULL);
      24             : 
      25           0 :   int lxyz = 0;
      26           0 :   const int lp = (coef->size[0] - 1);
      27           0 :   for (int lzp = 0; lzp <= lp; lzp++) {
      28           0 :     for (int lyp = 0; lyp <= lp - lzp; lyp++) {
      29           0 :       for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
      30           0 :         coef_xyz[lxyz] = idx3(coef[0], lzp, lyp, lxp);
      31             :       }
      32             :     }
      33             :   }
      34           0 : }
      35             : 
      36           0 : void transform_yxz_to_triangular(const tensor *const coef,
      37             :                                  double *const coef_xyz) {
      38           0 :   assert(coef != NULL);
      39           0 :   assert(coef_xyz != NULL);
      40           0 :   int lxyz = 0;
      41           0 :   const int lp = (coef->size[0] - 1);
      42           0 :   for (int lzp = 0; lzp <= lp; lzp++) {
      43           0 :     for (int lyp = 0; lyp <= lp - lzp; lyp++) {
      44           0 :       for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
      45           0 :         coef_xyz[lxyz] = idx3(coef[0], lyp, lxp, lzp);
      46             :       }
      47             :     }
      48             :   }
      49           0 : }
      50             : 
      51           0 : void transform_triangular_to_xyz(const double *const coef_xyz,
      52             :                                  tensor *const coef) {
      53           0 :   assert(coef != NULL);
      54           0 :   assert(coef_xyz != NULL);
      55           0 :   int lxyz = 0;
      56           0 :   const int lp = coef->size[0] - 1;
      57           0 :   for (int lzp = 0; lzp <= lp; lzp++) {
      58           0 :     for (int lyp = 0; lyp <= lp - lzp; lyp++) {
      59           0 :       for (int lxp = 0; lxp <= lp - lzp - lyp; lxp++, lxyz++) {
      60           0 :         idx3(coef[0], lzp, lyp, lxp) = coef_xyz[lxyz];
      61             :       }
      62             :       /* initialize the remaining coefficients to zero */
      63           0 :       for (int lxp = lp - lzp - lyp + 1; lxp <= lp; lxp++)
      64           0 :         idx3(coef[0], lzp, lyp, lxp) = 0.0;
      65             :     }
      66             :   }
      67           0 : }
      68             : 
      69             : /* Rotate from the (x - x_1) ^ alpha_1 (x - x_2) ^ \alpha_2 to (x - x_{12}) ^ k
      70             :  * in all three directions */
      71             : 
      72       45945 : void grid_compute_coefficients_dgemm(
      73             :     const int *lmin, const int *lmax, const int lp, const double prefactor,
      74             :     const tensor *const alpha, // [3][lb_max+1][la_max+1][lp+1]
      75             :     const tensor *const pab,
      76             :     tensor *coef_xyz) //[lp+1][lp+1][lp+1]
      77             : {
      78             :   /* can be done with dgemms as well, since it is a change of basis from (x -
      79             :    * x1) (x - x2) to (x - x12)^alpha */
      80             : 
      81       45945 :   assert(alpha != NULL);
      82       45945 :   assert(coef_xyz != NULL);
      83       45945 :   assert(coef_xyz->data != NULL);
      84       45945 :   memset(coef_xyz->data, 0, coef_xyz->alloc_size_ * sizeof(double));
      85             :   // we need a proper fix for that. We can use the tensor structure for this
      86             : 
      87      121995 :   for (int lzb = 0; lzb <= lmax[1]; lzb++) {
      88      185670 :     for (int lyb = 0; lyb <= lmax[1] - lzb; lyb++) {
      89      109620 :       const int lxb_min = imax(lmin[1] - lzb - lyb, 0);
      90      236145 :       for (int lxb = lxb_min; lxb <= lmax[1] - lzb - lyb; lxb++) {
      91      126525 :         const int jco = coset(lxb, lyb, lzb);
      92      345141 :         for (int lza = 0; lza <= lmax[0]; lza++) {
      93      539571 :           for (int lya = 0; lya <= lmax[0] - lza; lya++) {
      94      320955 :             const int lxa_min = imax(lmin[0] - lza - lya, 0);
      95      698721 :             for (int lxa = lxa_min; lxa <= lmax[0] - lza - lya; lxa++) {
      96      377766 :               const int ico = coset(lxa, lya, lza);
      97      377766 :               const double pab_ = idx2(pab[0], jco, ico);
      98      980706 :               for (int lxp = 0; lxp <= lxa + lxb; lxp++) {
      99      602940 :                 const double p1 =
     100      602940 :                     idx4(alpha[0], 0, lxb, lxa, lxp) * pab_ * prefactor;
     101     1980126 :                 for (int lzp = 0; lzp <= lp - lxa - lxb; lzp++) {
     102     3852510 :                   for (int lyp = 0; lyp <= lp - lxa - lxb - lzp; lyp++) {
     103     2475324 :                     const double p2 = idx4(alpha[0], 1, lyb, lya, lyp) *
     104     2475324 :                                       idx4(alpha[0], 2, lzb, lza, lzp) * p1;
     105     2475324 :                     idx3(coef_xyz[0], lxp, lzp, lyp) += p2;
     106             :                   }
     107             :                 }
     108             :               }
     109             :             }
     110             :           }
     111             :         }
     112             :       }
     113             :     }
     114             :   }
     115       45945 : }
     116             : 
     117             : /* Rotate from (x - x_{12}) ^ k to (x - x_1) ^ alpha_1 (x - x_2) ^ \alpha_2 in
     118             :  * all three directions */
     119             : 
     120       44389 : void grid_compute_vab(
     121             :     const int *const lmin, const int *const lmax, const int lp,
     122             :     const double prefactor,
     123             :     const tensor *const alpha, // transformation parameters (x - x_1)^n (x -
     124             :                                // x_2)^m -> (x - x_12) ^ l
     125             :     const tensor *const coef_xyz, tensor *vab) {
     126             :   /* can be done with dgemms as well, since it is a change of basis from (x -
     127             :    * x1) (x - x2) to (x - x12)^alpha */
     128             : 
     129       44389 :   assert(alpha != NULL);
     130       44389 :   assert(coef_xyz != NULL);
     131       44389 :   assert(coef_xyz->data != NULL);
     132             : 
     133       44389 :   memset(vab->data, 0, sizeof(double) * vab->alloc_size_);
     134             :   // we need a proper fix for that. We can use the tensor structure for this
     135             : 
     136      122231 :   for (int lzb = 0; lzb <= lmax[1]; lzb++) {
     137      195574 :     for (int lyb = 0; lyb <= lmax[1] - lzb; lyb++) {
     138      117732 :       const int lxb_min = imax(lmin[1] - lzb - lyb, 0);
     139      264086 :       for (int lxb = lxb_min; lxb <= lmax[1] - lzb - lyb; lxb++) {
     140      146354 :         const int jco = coset(lxb, lyb, lzb);
     141      476098 :         for (int lza = 0; lza <= lmax[0]; lza++) {
     142      947725 :           for (int lya = 0; lya <= lmax[0] - lza; lya++) {
     143      617981 :             const int lxa_min = imax(lmin[0] - lza - lya, 0);
     144     1608510 :             for (int lxa = lxa_min; lxa <= lmax[0] - lza - lya; lxa++) {
     145      990529 :               const int ico = coset(lxa, lya, lza);
     146      990529 :               double pab_ = 0.0;
     147             : 
     148             :               /* this can be done with 3 dgemms actually but need to
     149             :                * set coef accordingly (triangular along the second
     150             :                * diagonal) */
     151             : 
     152     2998946 :               for (int lxp = 0; lxp <= lxa + lxb; lxp++) {
     153     9418557 :                 for (int lzp = 0; lzp <= lp - lxa - lxb; lzp++) {
     154    26966257 :                   for (int lyp = 0; lyp <= lp - lxa - lxb - lzp; lyp++) {
     155    19556117 :                     const double p2 = idx4(alpha[0], 1, lyb, lya, lyp) *
     156    19556117 :                                       idx4(alpha[0], 2, lzb, lza, lzp) *
     157    19556117 :                                       idx4(alpha[0], 0, lxb, lxa, lxp) *
     158             :                                       prefactor;
     159    19556117 :                     pab_ += idx3(coef_xyz[0], lyp, lxp, lzp) * p2;
     160             :                   }
     161             :                 }
     162             :               }
     163      990529 :               idx2(vab[0], jco, ico) += pab_;
     164             :             }
     165             :           }
     166             :         }
     167             :       }
     168             :     }
     169             :   }
     170       44389 : }
     171             : 
     172             : // *****************************************************************************
     173       90334 : void grid_prepare_alpha_dgemm(const double ra[3], const double rb[3],
     174             :                               const double rp[3], const int *lmax,
     175             :                               tensor *alpha) {
     176       90334 :   assert(alpha != NULL);
     177             :   // Initialize with zeros.
     178       90334 :   memset(alpha->data, 0, alpha->alloc_size_ * sizeof(double));
     179             : 
     180             :   //
     181             :   //   compute polynomial expansion coefs -> (x-a)**lxa (x-b)**lxb -> sum_{ls}
     182             :   //   alpha(ls,lxa,lxb,1)*(x-p)**ls
     183             :   //
     184             : 
     185      361336 :   for (int iaxis = 0; iaxis < 3; iaxis++) {
     186      271002 :     const double drpa = rp[iaxis] - ra[iaxis];
     187      271002 :     const double drpb = rp[iaxis] - rb[iaxis];
     188      749997 :     for (int lxa = 0; lxa <= lmax[0]; lxa++) {
     189     1338348 :       for (int lxb = 0; lxb <= lmax[1]; lxb++) {
     190             :         double binomial_k_lxa = 1.0;
     191             :         double a = 1.0;
     192     2244909 :         for (int k = 0; k <= lxa; k++) {
     193             :           double binomial_l_lxb = 1.0;
     194             :           double b = 1.0;
     195     3552828 :           for (int l = 0; l <= lxb; l++) {
     196     2167272 :             idx4(alpha[0], iaxis, lxb, lxa, lxa - l + lxb - k) +=
     197     2167272 :                 binomial_k_lxa * binomial_l_lxb * a * b;
     198     2167272 :             binomial_l_lxb *= ((double)(lxb - l)) / ((double)(l + 1));
     199     2167272 :             b *= drpb;
     200             :           }
     201     1385556 :           binomial_k_lxa *= ((double)(lxa - k)) / ((double)(k + 1));
     202     1385556 :           a *= drpa;
     203             :         }
     204             :       }
     205             :     }
     206             :   }
     207       90334 : }
     208             : 
     209             : /* this function computes the coefficients initially expressed in the cartesian
     210             :  * space to the grid space. It is inplane and can also be done with
     211             :  * matrix-matrix multiplication. It is in fact a tensor reduction. */
     212             : 
     213       24831 : void grid_transform_coef_xzy_to_ikj(const double dh[3][3],
     214             :                                     const tensor *coef_xyz) {
     215       24831 :   assert(coef_xyz != NULL);
     216       24831 :   const int lp = coef_xyz->size[0] - 1;
     217       24831 :   tensor coef_ijk;
     218             : 
     219             :   /* this tensor corresponds to the term
     220             :    * $v_{11}^{k_{11}}v_{12}^{k_{12}}v_{13}^{k_{13}}
     221             :    * v_{21}^{k_{21}}v_{22}^{k_{22}}v_{23}^{k_{23}}
     222             :    * v_{31}^{k_{31}}v_{32}^{k_{32}} v_{33}^{k_{33}}$ in Eq.26 found section
     223             :    * III.A of the notes */
     224       24831 :   tensor hmatgridp;
     225             : 
     226       49662 :   initialize_tensor_3(&coef_ijk, coef_xyz->size[0], coef_xyz->size[1],
     227       24831 :                       coef_xyz->size[2]);
     228             : 
     229       24831 :   coef_ijk.data = grid_allocate_scratch(sizeof(double) * coef_ijk.alloc_size_);
     230             : 
     231       24831 :   if (coef_ijk.data == NULL)
     232           0 :     abort();
     233             : 
     234       24831 :   memset(coef_ijk.data, 0, sizeof(double) * coef_ijk.alloc_size_);
     235       24831 :   initialize_tensor_3(&hmatgridp, coef_xyz->size[0], 3, 3);
     236             : 
     237       49662 :   hmatgridp.data =
     238       24831 :       grid_allocate_scratch(sizeof(double) * hmatgridp.alloc_size_);
     239             : 
     240             :   // transform using multinomials
     241       99324 :   for (int i = 0; i < 3; i++) {
     242      297972 :     for (int j = 0; j < 3; j++) {
     243      223479 :       idx3(hmatgridp, 0, j, i) = 1.0;
     244      527877 :       for (int k = 1; k <= lp; k++) {
     245      304398 :         idx3(hmatgridp, k, j, i) = idx3(hmatgridp, k - 1, j, i) * dh[j][i];
     246             :       }
     247             :     }
     248             :   }
     249             : 
     250             :   const int lpx = lp;
     251       83484 :   for (int klx = 0; klx <= lpx; klx++) {
     252      167610 :     for (int jlx = 0; jlx <= lpx - klx; jlx++) {
     253      287076 :       for (int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
     254      178119 :         const int lx = ilx + jlx + klx;
     255      178119 :         const int lpy = lp - lx;
     256      178119 :         const double tx = idx3(hmatgridp, ilx, 0, 0) *
     257      178119 :                           idx3(hmatgridp, jlx, 1, 0) *
     258      178119 :                           idx3(hmatgridp, klx, 2, 0) * fac(lx) * inv_fac[klx] *
     259      178119 :                           inv_fac[jlx] * inv_fac[ilx];
     260             : 
     261      446766 :         for (int kly = 0; kly <= lpy; kly++) {
     262      651828 :           for (int jly = 0; jly <= lpy - kly; jly++) {
     263      907674 :             for (int ily = 0; ily <= lpy - kly - jly; ily++) {
     264      524493 :               const int ly = ily + jly + kly;
     265      524493 :               const int lpz = lp - lx - ly;
     266      524493 :               const double ty = tx * idx3(hmatgridp, ily, 0, 1) *
     267      524493 :                                 idx3(hmatgridp, jly, 1, 1) *
     268      524493 :                                 idx3(hmatgridp, kly, 2, 1) * fac(ly) *
     269      524493 :                                 inv_fac[kly] * inv_fac[jly] * inv_fac[ily];
     270     1219980 :               for (int klz = 0; klz <= lpz; klz++) {
     271     1594686 :                 for (int jlz = 0; jlz <= lpz - klz; jlz++) {
     272     2037996 :                   for (int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
     273     1138797 :                     const int lz = ilz + jlz + klz;
     274     1138797 :                     const int il = ilx + ily + ilz;
     275     1138797 :                     const int jl = jlx + jly + jlz;
     276     1138797 :                     const int kl = klx + kly + klz;
     277             :                     // const int lijk= coef_map[kl][jl][il];
     278             :                     /* the fac table is the factorial. It
     279             :                      * would be better to use the
     280             :                      * multinomials. */
     281     1138797 :                     idx3(coef_ijk, il, kl, jl) +=
     282     1138797 :                         idx3(coef_xyz[0], lx, lz, ly) * ty *
     283     1138797 :                         idx3(hmatgridp, ilz, 0, 2) *
     284     1138797 :                         idx3(hmatgridp, jlz, 1, 2) *
     285     1138797 :                         idx3(hmatgridp, klz, 2, 2) * fac(lz) * inv_fac[klz] *
     286     1138797 :                         inv_fac[jlz] * inv_fac[ilz];
     287             :                   }
     288             :                 }
     289             :               }
     290             :             }
     291             :           }
     292             :         }
     293             :       }
     294             :     }
     295             :   }
     296             : 
     297       24831 :   memcpy(coef_xyz->data, coef_ijk.data, sizeof(double) * coef_ijk.alloc_size_);
     298       24831 :   grid_free_scratch(coef_ijk.data);
     299       24831 :   grid_free_scratch(hmatgridp.data);
     300       24831 : }
     301             : 
     302             : /* Rotate the coefficients computed in the local grid coordinates to the
     303             :  * cartesians coorinates. The order of the indices indicates how the
     304             :  * coefficients are stored */
     305       23275 : void grid_transform_coef_jik_to_yxz(const double dh[3][3],
     306             :                                     const tensor *coef_xyz) {
     307       23275 :   assert(coef_xyz);
     308       23275 :   const int lp = coef_xyz->size[0] - 1;
     309       23275 :   tensor coef_ijk;
     310             : 
     311             :   /* this tensor corresponds to the term
     312             :    * $v_{11}^{k_{11}}v_{12}^{k_{12}}v_{13}^{k_{13}}
     313             :    * v_{21}^{k_{21}}v_{22}^{k_{22}}v_{23}^{k_{23}}
     314             :    * v_{31}^{k_{31}}v_{32}^{k_{32}} v_{33}^{k_{33}}$ in Eq.26 found section
     315             :    * III.A of the notes */
     316       23275 :   tensor hmatgridp;
     317             : 
     318       46550 :   initialize_tensor_3(&coef_ijk, coef_xyz->size[0], coef_xyz->size[1],
     319       23275 :                       coef_xyz->size[2]);
     320             : 
     321       23275 :   coef_ijk.data = grid_allocate_scratch(sizeof(double) * coef_ijk.alloc_size_);
     322       23275 :   if (coef_ijk.data == NULL)
     323           0 :     abort();
     324             : 
     325       23275 :   memset(coef_ijk.data, 0, sizeof(double) * coef_ijk.alloc_size_);
     326       23275 :   initialize_tensor_3(&hmatgridp, coef_xyz->size[0], 3, 3);
     327             : 
     328       46550 :   hmatgridp.data =
     329       23275 :       grid_allocate_scratch(sizeof(double) * hmatgridp.alloc_size_);
     330             : 
     331             :   // transform using multinomials
     332       93100 :   for (int i = 0; i < 3; i++) {
     333      279300 :     for (int j = 0; j < 3; j++) {
     334      209475 :       idx3(hmatgridp, 0, j, i) = 1.0;
     335      558855 :       for (int k = 1; k <= lp; k++) {
     336      349380 :         idx3(hmatgridp, k, j, i) = idx3(hmatgridp, k - 1, j, i) * dh[j][i];
     337             :       }
     338             :     }
     339             :   }
     340             : 
     341             :   const int lpx = lp;
     342       85370 :   for (int klx = 0; klx <= lpx; klx++) {
     343      195728 :     for (int jlx = 0; jlx <= lpx - klx; jlx++) {
     344      390320 :       for (int ilx = 0; ilx <= lpx - klx - jlx; ilx++) {
     345      256687 :         const int lx = ilx + jlx + klx;
     346      256687 :         const int lpy = lp - lx;
     347      715002 :         for (int kly = 0; kly <= lpy; kly++) {
     348     1234494 :           for (int jly = 0; jly <= lpy - kly; jly++) {
     349     2037404 :             for (int ily = 0; ily <= lpy - kly - jly; ily++) {
     350     1261225 :               const int ly = ily + jly + kly;
     351     1261225 :               const int lpz = lp - lx - ly;
     352     3241940 :               for (int klz = 0; klz <= lpz; klz++) {
     353     5002342 :                 for (int jlz = 0; jlz <= lpz - klz; jlz++) {
     354     7516066 :                   for (int ilz = 0; ilz <= lpz - klz - jlz; ilz++) {
     355     4494439 :                     const int lz = ilz + jlz + klz;
     356     4494439 :                     const int il = ilx + ily + ilz;
     357     4494439 :                     const int jl = jlx + jly + jlz;
     358     4494439 :                     const int kl = klx + kly + klz;
     359             :                     // const int lijk= coef_map[kl][jl][il];
     360             :                     /* the fac table is the factorial. It
     361             :                      * would be better to use the
     362             :                      * multinomials. */
     363     4494439 :                     idx3(coef_ijk, ly, lx, lz) +=
     364     4494439 :                         idx3(coef_xyz[0], jl, il, kl) *
     365     4494439 :                         idx3(hmatgridp, ilx, 0, 0) *
     366     4494439 :                         idx3(hmatgridp, jlx, 1, 0) *
     367     4494439 :                         idx3(hmatgridp, klx, 2, 0) *
     368     4494439 :                         idx3(hmatgridp, ily, 0, 1) *
     369     4494439 :                         idx3(hmatgridp, jly, 1, 1) *
     370     4494439 :                         idx3(hmatgridp, kly, 2, 1) *
     371     4494439 :                         idx3(hmatgridp, ilz, 0, 2) *
     372     4494439 :                         idx3(hmatgridp, jlz, 1, 2) *
     373     4494439 :                         idx3(hmatgridp, klz, 2, 2) * fac(lx) * fac(ly) *
     374     4494439 :                         fac(lz) /
     375     4494439 :                         (fac(ilx) * fac(ily) * fac(ilz) * fac(jlx) * fac(jly) *
     376     4494439 :                          fac(jlz) * fac(klx) * fac(kly) * fac(klz));
     377             :                   }
     378             :                 }
     379             :               }
     380             :             }
     381             :           }
     382             :         }
     383             :       }
     384             :     }
     385             :   }
     386       23275 :   memcpy(coef_xyz->data, coef_ijk.data, sizeof(double) * coef_ijk.alloc_size_);
     387       23275 :   grid_free_scratch(coef_ijk.data);
     388       23275 :   grid_free_scratch(hmatgridp.data);
     389       23275 : }

Generated by: LCOV version 1.15