LCOV - code coverage report
Current view: top level - src/grid/dgemm - grid_dgemm_utils.h Coverage Total Hit
Test: CP2K Regtests (git:85b8a9b) Lines: 100.0 % 2 2
Test Date: 2026-06-14 06:48:14 Functions: - 0 0

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2026 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : 
       8              : #ifndef GRID_DGEMM_UTILS_H
       9              : #define GRID_DGEMM_UTILS_H
      10              : 
      11              : #include <stdbool.h>
      12              : #include <stdio.h>
      13              : #include <string.h>
      14              : 
      15              : #if defined(__MKL)
      16              : #include <mkl.h>
      17              : #include <mkl_cblas.h>
      18              : #endif
      19              : 
      20              : #include "../common/grid_common.h"
      21              : #include "grid_dgemm_private_header.h"
      22              : #include "grid_dgemm_tensor_local.h"
      23              : 
      24              : /* inverse of the factorials */
      25              : static const double inv_fac[] = {1.0,
      26              :                                  1.0,
      27              :                                  0.5,
      28              :                                  0.166666666666666666666666666667,
      29              :                                  0.0416666666666666666666666666667,
      30              :                                  0.00833333333333333333333333333333,
      31              :                                  0.00138888888888888888888888888889,
      32              :                                  0.000198412698412698412698412698413,
      33              :                                  0.0000248015873015873015873015873016,
      34              :                                  2.7557319223985890652557319224e-6,
      35              :                                  2.7557319223985890652557319224e-7,
      36              :                                  2.50521083854417187750521083854e-8,
      37              :                                  2.08767569878680989792100903212e-9,
      38              :                                  1.60590438368216145993923771702e-10,
      39              :                                  1.14707455977297247138516979787e-11,
      40              :                                  7.64716373181981647590113198579e-13,
      41              :                                  4.77947733238738529743820749112e-14,
      42              :                                  2.81145725434552076319894558301e-15,
      43              :                                  1.56192069685862264622163643501e-16,
      44              :                                  8.22063524662432971695598123687e-18,
      45              :                                  4.11031762331216485847799061844e-19,
      46              :                                  1.95729410633912612308475743735e-20,
      47              :                                  8.8967913924505732867488974425e-22,
      48              :                                  3.86817017063068403771691193152e-23,
      49              :                                  1.6117375710961183490487133048e-24,
      50              :                                  6.4469502843844733961948532192e-26,
      51              :                                  2.47959626322479746007494354585e-27,
      52              :                                  9.18368986379554614842571683647e-29,
      53              :                                  3.27988923706983791015204172731e-30,
      54              :                                  1.13099628864477169315587645769e-31,
      55              :                                  3.76998762881590564385292152565e-33};
      56              : 
      57              : inline int coset_without_offset(int lx, int ly, int lz) {
      58              :   const int l = lx + ly + lz;
      59              :   if (l == 0) {
      60              :     return 0;
      61              :   } else {
      62              :     return ((l - lx) * (l - lx + 1)) / 2 + lz;
      63              :   }
      64              : }
      65              : 
      66              : typedef struct dgemm_params_ {
      67              :   char storage;
      68              :   char op1;
      69              :   char op2;
      70              :   double alpha;
      71              :   double beta;
      72              :   double *a, *b, *c;
      73              :   int m, n, k, lda, ldb, ldc;
      74              :   int x, y, z;
      75              :   int x1, y1, z1;
      76              : } dgemm_params;
      77              : 
      78              : extern void dgemm_simplified(dgemm_params *const m);
      79              : 
      80              : /*******************************************************************************
      81              :  * \brief Prototype for BLAS dgemm.
      82              :  * \author Ole Schuett
      83              :  ******************************************************************************/
      84              : void dgemm_(const char *transa, const char *transb, const int *m, const int *n,
      85              :             const int *k, const double *alpha, const double *a, const int *lda,
      86              :             const double *b, const int *ldb, const double *beta, double *c,
      87              :             const int *ldc);
      88              : 
      89              : extern void extract_sub_grid(const int *lower_corner, const int *upper_corner,
      90              :                              const int *position, const tensor *const grid,
      91              :                              tensor *const subgrid);
      92              : extern void add_sub_grid(const int *lower_corner, const int *upper_corner,
      93              :                          const int *position, const tensor *subgrid,
      94              :                          tensor *grid);
      95              : extern void return_cube_position(const int *lb_grid, const int *cube_center,
      96              :                                  const int *lower_boundaries_cube,
      97              :                                  const int *period, int *const position);
      98              : 
      99              : extern void verify_orthogonality(const double dh[3][3], bool orthogonal[3]);
     100              : 
     101              : extern int compute_cube_properties(const bool ortho, const double radius,
     102              :                                    const double dh[3][3],
     103              :                                    const double dh_inv[3][3], const double *rp,
     104              :                                    double *disr_radius, double *roffset,
     105              :                                    int *cubecenter, int *lb_cube, int *ub_cube,
     106              :                                    int *cube_size);
     107              : 
     108              : inline int return_offset_l(const int l) {
     109              :   static const int offset_[] = {1,   4,   7,   11,  16,  22,  29,
     110              :                                 37,  46,  56,  67,  79,  92,  106,
     111              :                                 121, 137, 154, 172, 191, 211, 232};
     112              :   return offset_[l];
     113              : }
     114              : 
     115              : inline int return_linear_index_from_exponents(const int alpha, const int beta,
     116              :                                               const int gamma) {
     117              :   const int l = alpha + beta + gamma;
     118              :   return return_offset_l(l) + (l - alpha) * (l - alpha + 1) / 2 + gamma;
     119              : }
     120              : 
     121        96365 : static inline void *grid_allocate_scratch(size_t size) { return malloc(size); }
     122              : 
     123        96365 : static inline void grid_free_scratch(void *ptr) { free(ptr); }
     124              : 
     125              : /* even openblas and lapack has cblas versions of lapack and blas. */
     126              : #ifndef __MKL
     127              : enum CBLAS_LAYOUT { CblasRowMajor = 101, CblasColMajor = 102 };
     128              : enum CBLAS_TRANSPOSE {
     129              :   CblasNoTrans = 111,
     130              :   CblasTrans = 112,
     131              :   CblasConjTrans = 113
     132              : };
     133              : enum CBLAS_UPLO { CblasUpper = 121, CblasLower = 122 };
     134              : enum CBLAS_DIAG { CblasNonUnit = 131, CblasUnit = 132 };
     135              : enum CBLAS_SIDE { CblasLeft = 141, CblasRight = 142 };
     136              : 
     137              : typedef enum CBLAS_LAYOUT CBLAS_LAYOUT;
     138              : typedef enum CBLAS_TRANSPOSE CBLAS_TRANSPOSE;
     139              : typedef enum CBLAS_UPLO CBLAS_UPLO;
     140              : typedef enum CBLAS_DIAG CBLAS_DIAG;
     141              : 
     142              : double cblas_ddot(const int N, const double *X, const int incX, const double *Y,
     143              :                   const int incY);
     144              : 
     145              : void cblas_dger(const CBLAS_LAYOUT Layout, const int M, const int N,
     146              :                 const double alpha, const double *X, const int incX,
     147              :                 const double *Y, const int incY, double *A, const int lda);
     148              : 
     149              : void cblas_daxpy(const int N, const double alpha, const double *X,
     150              :                  const int incX, double *Y, const int incY);
     151              : 
     152              : void cblas_dgemv(const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE TransA,
     153              :                  const int M, const int N, const double alpha, const double *A,
     154              :                  const int lda, const double *X, const int incX,
     155              :                  const double beta, double *Y, const int incY);
     156              : 
     157              : #endif
     158              : 
     159              : extern void compute_interval(const int *const map, const int full_size,
     160              :                              const int size, const int cube_size, const int x1,
     161              :                              int *x, int *const lower_corner,
     162              :                              int *const upper_corner, Interval window);
     163              : #endif
        

Generated by: LCOV version 2.0-1