LCOV - code coverage report
Current view: top level - src - cp_dbcsr_cp2k_link.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 71.7 % 293 210
Test Date: 2025-07-25 12:55:17 Functions: 75.0 % 4 3

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief  Routines that link DBCSR and CP2K concepts together
      10              : !> \author Ole Schuett
      11              : !> \par History
      12              : !>         01.2014 created
      13              : ! **************************************************************************************************
      14              : MODULE cp_dbcsr_cp2k_link
      15              :    USE ao_util,                         ONLY: exp_radius
      16              :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      17              :                                               get_atomic_kind_set
      18              :    USE basis_set_types,                 ONLY: gto_basis_set_p_type,&
      19              :                                               gto_basis_set_type
      20              :    USE bibliography,                    ONLY: Borstnik2014,&
      21              :                                               Heinecke2016,&
      22              :                                               Schuett2016,&
      23              :                                               cite_reference
      24              :    USE cp_control_types,                ONLY: dft_control_type
      25              :    USE cp_dbcsr_api,                    ONLY: &
      26              :         dbcsr_finalize, dbcsr_get_block_p, dbcsr_get_default_config, dbcsr_get_matrix_type, &
      27              :         dbcsr_has_symmetry, dbcsr_reserve_blocks, dbcsr_set, dbcsr_set_config, dbcsr_type, &
      28              :         dbcsr_type_no_symmetry
      29              :    USE cp_dbcsr_operations,             ONLY: max_elements_per_block
      30              :    USE input_keyword_types,             ONLY: keyword_create,&
      31              :                                               keyword_release,&
      32              :                                               keyword_type
      33              :    USE input_section_types,             ONLY: section_add_keyword,&
      34              :                                               section_add_subsection,&
      35              :                                               section_create,&
      36              :                                               section_release,&
      37              :                                               section_type,&
      38              :                                               section_vals_get_subs_vals,&
      39              :                                               section_vals_type,&
      40              :                                               section_vals_val_get
      41              :    USE kinds,                           ONLY: dp,&
      42              :                                               real_8
      43              :    USE orbital_pointers,                ONLY: nso
      44              :    USE qs_integral_utils,               ONLY: basis_set_list_setup
      45              :    USE qs_kind_types,                   ONLY: qs_kind_type
      46              :    USE qs_ks_types,                     ONLY: get_ks_env,&
      47              :                                               qs_ks_env_type
      48              :    USE qs_neighbor_list_types,          ONLY: get_iterator_info,&
      49              :                                               get_neighbor_list_set_p,&
      50              :                                               neighbor_list_iterate,&
      51              :                                               neighbor_list_iterator_create,&
      52              :                                               neighbor_list_iterator_p_type,&
      53              :                                               neighbor_list_iterator_release,&
      54              :                                               neighbor_list_set_p_type
      55              :    USE string_utilities,                ONLY: s2a
      56              : #include "./base/base_uses.f90"
      57              : 
      58              :    IMPLICIT NONE
      59              : 
      60              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_dbcsr_cp2k_link'
      61              : 
      62              :    PUBLIC :: create_dbcsr_section
      63              :    PUBLIC :: cp_dbcsr_config
      64              :    PUBLIC :: cp_dbcsr_alloc_block_from_nbl
      65              :    PUBLIC :: cp_dbcsr_to_csr_screening
      66              : 
      67              :    PRIVATE
      68              : 
      69              :    ! Possible drivers to use for matrix multiplications
      70              :    INTEGER, PARAMETER :: mm_driver_auto = 0
      71              :    INTEGER, PARAMETER :: mm_driver_matmul = 1
      72              :    INTEGER, PARAMETER :: mm_driver_blas = 2
      73              :    INTEGER, PARAMETER :: mm_driver_smm = 3
      74              :    INTEGER, PARAMETER :: mm_driver_xsmm = 4
      75              : 
      76              :    CHARACTER(len=*), PARAMETER :: mm_name_auto = "AUTO", &
      77              :                                   mm_name_blas = "BLAS", &
      78              :                                   mm_name_matmul = "MATMUL", &
      79              :                                   mm_name_smm = "SMM", &
      80              :                                   mm_name_xsmm = "XSMM"
      81              : CONTAINS
      82              : 
      83              : ! **************************************************************************************************
      84              : !> \brief   Creates the dbcsr section for configuring DBCSR
      85              : !> \param section ...
      86              : !> \date    2011-04-05
      87              : !> \author  Urban Borstnik
      88              : ! **************************************************************************************************
      89       290206 :    SUBROUTINE create_dbcsr_section(section)
      90              :       TYPE(section_type), POINTER                        :: section
      91              : 
      92              :       INTEGER                                            :: idefault
      93              :       LOGICAL                                            :: ldefault
      94              :       REAL(KIND=dp)                                      :: rdefault
      95              :       TYPE(keyword_type), POINTER                        :: keyword
      96              :       TYPE(section_type), POINTER                        :: subsection
      97              : 
      98        15274 :       CPASSERT(.NOT. ASSOCIATED(section))
      99              :       CALL section_create(section, __LOCATION__, name="DBCSR", &
     100              :                           description="Configuration options for the DBCSR library.", &
     101              :                           n_keywords=1, n_subsections=0, repeats=.FALSE., &
     102        45822 :                           citations=(/Borstnik2014, Schuett2016/))
     103              : 
     104        15274 :       NULLIFY (keyword)
     105              : 
     106              :       CALL keyword_create(keyword, __LOCATION__, name="mm_stack_size", &
     107              :                           description="Size of multiplication parameter stack." &
     108              :                           //" A negative value leaves the decision up to DBCSR.", &
     109              :                           usage="mm_stack_size 1000", &
     110        15274 :                           default_i_val=-1)
     111        15274 :       CALL section_add_keyword(section, keyword)
     112        15274 :       CALL keyword_release(keyword)
     113              : 
     114              :       CALL keyword_create(keyword, __LOCATION__, name="mm_driver", &
     115              :                           description="Select which backend to use preferably "// &
     116              :                           "for matrix block multiplications on the host.", &
     117              :                           usage="mm_driver blas", &
     118              :                           default_i_val=mm_driver_auto, &
     119              :                           enum_c_vals=s2a("AUTO", "BLAS", "MATMUL", "SMM", "XSMM"), &
     120              :                           enum_i_vals=(/mm_driver_auto, mm_driver_blas, mm_driver_matmul, mm_driver_smm, &
     121              :                                         mm_driver_xsmm/), &
     122              :                           enum_desc=s2a("Choose automatically the best available driver", &
     123              :                                         "BLAS (requires the BLAS library at link time)", &
     124              :                                         "Fortran MATMUL", &
     125              :                                         "Library optimised for Small Matrix Multiplies "// &
     126              :                                         "(requires the SMM library at link time)", &
     127              :                                         "LIBXSMM"), &
     128        30548 :                           citations=(/Heinecke2016/))
     129        15274 :       CALL section_add_keyword(section, keyword)
     130        15274 :       CALL keyword_release(keyword)
     131              : 
     132        15274 :       CALL dbcsr_get_default_config(avg_elements_images=idefault)
     133              :       CALL keyword_create(keyword, __LOCATION__, name="avg_elements_images", &
     134              :                           description="Average number of elements (dense limit)" &
     135              :                           //" for each image, which also corresponds to" &
     136              :                           //" the average number of elements exchanged between MPI processes" &
     137              :                           //" during the operations." &
     138              :                           //" A negative or zero value means unlimited.", &
     139              :                           usage="avg_elements_images 10000", &
     140        15274 :                           default_i_val=idefault)
     141        15274 :       CALL section_add_keyword(section, keyword)
     142        15274 :       CALL keyword_release(keyword)
     143              : 
     144        15274 :       CALL dbcsr_get_default_config(num_mult_images=idefault)
     145              :       CALL keyword_create(keyword, __LOCATION__, name="num_mult_images", &
     146              :                           description="Multiplicative factor for number of virtual images.", &
     147              :                           usage="num_mult_images 2", &
     148        15274 :                           default_i_val=idefault)
     149        15274 :       CALL section_add_keyword(section, keyword)
     150        15274 :       CALL keyword_release(keyword)
     151              : 
     152        15274 :       CALL dbcsr_get_default_config(use_mpi_allocator=ldefault)
     153              :       CALL keyword_create(keyword, __LOCATION__, name="use_mpi_allocator", &
     154              :                           description="Use MPI allocator" &
     155              :                           //" to allocate buffers used in MPI communications.", &
     156              :                           usage="use_mpi_allocator T", &
     157        15274 :                           default_l_val=ldefault)
     158        15274 :       CALL section_add_keyword(section, keyword)
     159        15274 :       CALL keyword_release(keyword)
     160              : 
     161        15274 :       CALL dbcsr_get_default_config(use_mpi_rma=ldefault)
     162              :       CALL keyword_create(keyword, __LOCATION__, name="use_mpi_rma", &
     163              :                           description="Use RMA for MPI communications" &
     164              :                           //" for each image, which also corresponds to" &
     165              :                           //" the number of elements exchanged between MPI processes" &
     166              :                           //" during the operations.", &
     167              :                           usage="use_mpi_rma F", &
     168        15274 :                           default_l_val=ldefault)
     169        15274 :       CALL section_add_keyword(section, keyword)
     170        15274 :       CALL keyword_release(keyword)
     171              : 
     172        15274 :       CALL dbcsr_get_default_config(num_layers_3D=idefault)
     173              :       CALL keyword_create(keyword, __LOCATION__, name="num_layers_3D", &
     174              :                           description="Number of layers for the 3D multplication algorithm.", &
     175              :                           usage="num_layers_3D 1", &
     176        15274 :                           default_i_val=idefault)
     177        15274 :       CALL section_add_keyword(section, keyword)
     178        15274 :       CALL keyword_release(keyword)
     179              : 
     180        15274 :       CALL dbcsr_get_default_config(nstacks=idefault)
     181              :       CALL keyword_create(keyword, __LOCATION__, name="n_size_mnk_stacks", &
     182              :                           description="Number of stacks to use for distinct atomic sizes" &
     183              :                           //" (e.g., 2 for a system of mostly waters). ", &
     184              :                           usage="n_size_mnk_stacks 2", &
     185        15274 :                           default_i_val=idefault)
     186        15274 :       CALL section_add_keyword(section, keyword)
     187        15274 :       CALL keyword_release(keyword)
     188              : 
     189        15274 :       CALL dbcsr_get_default_config(use_comm_thread=ldefault)
     190              :       CALL keyword_create(keyword, __LOCATION__, name="use_comm_thread", &
     191              :                           description="During multiplication, use a thread to periodically poll" &
     192              :                           //" MPI to progress outstanding message completions.  This is" &
     193              :                           //" beneficial on systems without a DMA-capable network adapter" &
     194              :                           //" e.g. Cray XE6.", &
     195              :                           usage="use_comm_thread T", &
     196        15274 :                           default_l_val=ldefault)
     197        15274 :       CALL section_add_keyword(section, keyword)
     198        15274 :       CALL keyword_release(keyword)
     199              : 
     200              :       CALL keyword_create(keyword, __LOCATION__, name="MAX_ELEMENTS_PER_BLOCK", &
     201              :                           description="Default block size for turning dense matrices in blocked ones", &
     202              :                           usage="MAX_ELEMENTS_PER_BLOCK 32", &
     203        15274 :                           default_i_val=max_elements_per_block)
     204        15274 :       CALL section_add_keyword(section, keyword)
     205        15274 :       CALL keyword_release(keyword)
     206              : 
     207              :       CALL keyword_create(keyword, __LOCATION__, name="comm_thread_load", &
     208              :                           description="If a communications thread is used, specify how much " &
     209              :                           //"multiplication workload (%) the thread should perform in " &
     210              :                           //"addition to communication tasks. " &
     211              :                           //"A negative value leaves the decision up to DBCSR.", &
     212              :                           usage="comm_thread_load 50", &
     213        15274 :                           default_i_val=-1)
     214        15274 :       CALL section_add_keyword(section, keyword)
     215        15274 :       CALL keyword_release(keyword)
     216              : 
     217        15274 :       CALL dbcsr_get_default_config(multrec_limit=idefault)
     218              :       CALL keyword_create(keyword, __LOCATION__, name="multrec_limit", &
     219              :                           description="Recursion limit of cache oblivious multrec algorithm.", &
     220        15274 :                           default_i_val=idefault)
     221        15274 :       CALL section_add_keyword(section, keyword)
     222        15274 :       CALL keyword_release(keyword)
     223              : 
     224        15274 :       CALL dbcsr_get_default_config(use_mempools_cpu=ldefault)
     225              :       CALL keyword_create(keyword, __LOCATION__, name="use_mempools_cpu", &
     226              :                           description="Enable memory pools on the CPU.", &
     227        15274 :                           default_l_val=ldefault)
     228        15274 :       CALL section_add_keyword(section, keyword)
     229        15274 :       CALL keyword_release(keyword)
     230              : 
     231        15274 :       NULLIFY (subsection)
     232              :       CALL section_create(subsection, __LOCATION__, name="TENSOR", &
     233              :                           description="Configuration options for Tensors.", &
     234        15274 :                           n_keywords=1, n_subsections=0, repeats=.FALSE.)
     235              : 
     236        15274 :       CALL dbcsr_get_default_config(tas_split_factor=rdefault)
     237              :       CALL keyword_create(keyword, __LOCATION__, name="TAS_SPLIT_FACTOR", &
     238              :                           description="Parameter for hybrid DBCSR-TAS matrix-matrix multiplication algorithm: "// &
     239              :                           "a TAS matrix is split into s submatrices with s = N_max/(N_min*f) with f "// &
     240              :                           "given by this parameter and N_max/N_min the max/min occupancies of the matrices "// &
     241              :                           "involved in a multiplication. A large value makes the multiplication Cannon-based "// &
     242              :                           "(s=1) and a small value (> 0) makes the multiplication based on TAS algorithm "// &
     243              :                           "(s=number of MPI ranks)", &
     244        15274 :                           default_r_val=rdefault)
     245        15274 :       CALL section_add_keyword(subsection, keyword)
     246        15274 :       CALL keyword_release(keyword)
     247              : 
     248        15274 :       CALL section_add_subsection(section, subsection)
     249        15274 :       CALL section_release(subsection)
     250              : 
     251              :       !---------------------------------------------------------------------------
     252        15274 :       NULLIFY (subsection)
     253              :       CALL section_create(subsection, __LOCATION__, name="ACC", &
     254              :                           description="Configuration options for the ACC-Driver.", &
     255        15274 :                           n_keywords=1, n_subsections=0, repeats=.FALSE.)
     256              : 
     257        15274 :       CALL dbcsr_get_default_config(accdrv_thread_buffers=idefault)
     258              :       CALL keyword_create(keyword, __LOCATION__, name="thread_buffers", &
     259              :                           description="Number of transfer-buffers associated with each thread and corresponding stream.", &
     260        15274 :                           default_i_val=idefault)
     261        15274 :       CALL section_add_keyword(subsection, keyword)
     262        15274 :       CALL keyword_release(keyword)
     263              : 
     264        15274 :       CALL dbcsr_get_default_config(accdrv_avoid_after_busy=ldefault)
     265              :       CALL keyword_create(keyword, __LOCATION__, name="avoid_after_busy", &
     266              :                           description="If enabled, stacks are not processed by the acc-driver " &
     267              :                           //"after it has signaled congestion during a round of flushing. " &
     268              :                           //"For the next round of flusing the driver is used again.", &
     269        15274 :                           default_l_val=ldefault)
     270        15274 :       CALL section_add_keyword(subsection, keyword)
     271        15274 :       CALL keyword_release(keyword)
     272              : 
     273        15274 :       CALL dbcsr_get_default_config(accdrv_min_flop_process=idefault)
     274              :       CALL keyword_create(keyword, __LOCATION__, name="min_flop_process", &
     275              :                           description="Only process stacks with more than the given number of " &
     276              :                           //"floating-point operations per stack-entry (2*m*n*k).", &
     277        15274 :                           default_i_val=idefault)
     278        15274 :       CALL section_add_keyword(subsection, keyword)
     279        15274 :       CALL keyword_release(keyword)
     280              : 
     281        15274 :       CALL dbcsr_get_default_config(accdrv_stack_sort=ldefault)
     282              :       CALL keyword_create(keyword, __LOCATION__, name="stack_sort", &
     283              :                           description="Sort multiplication stacks according to C-access.", &
     284        15274 :                           default_l_val=ldefault)
     285        15274 :       CALL section_add_keyword(subsection, keyword)
     286        15274 :       CALL keyword_release(keyword)
     287              : 
     288        15274 :       CALL dbcsr_get_default_config(accdrv_min_flop_sort=idefault)
     289              :       CALL keyword_create(keyword, __LOCATION__, name="min_flop_sort", &
     290              :                           description="Only sort stacks with more than the given number of " &
     291              :                           //"floating-point operations per stack-entry (2*m*n*k). " &
     292              :                           //"Alternatively, the stacks are roughly ordered through a " &
     293              :                           //"binning-scheme by Peter Messmer. (Depends on ACC%STACK_SORT)", &
     294        15274 :                           default_i_val=idefault)
     295        15274 :       CALL section_add_keyword(subsection, keyword)
     296        15274 :       CALL keyword_release(keyword)
     297              : 
     298        15274 :       CALL dbcsr_get_default_config(accdrv_do_inhomogenous=ldefault)
     299              :       CALL keyword_create(keyword, __LOCATION__, name="process_inhomogenous", &
     300              :                           description="If enabled, inhomogenous stacks are also processed by the acc driver.", &
     301        15274 :                           default_l_val=ldefault)
     302        15274 :       CALL section_add_keyword(subsection, keyword)
     303        15274 :       CALL keyword_release(keyword)
     304              : 
     305        15274 :       CALL dbcsr_get_default_config(accdrv_binning_nbins=idefault)
     306              :       CALL keyword_create(keyword, __LOCATION__, name="binning_nbins", &
     307              :                           description="Number of bins used when ordering " &
     308              :                           //"the stacks with the binning-scheme.", &
     309        15274 :                           default_i_val=idefault)
     310        15274 :       CALL section_add_keyword(subsection, keyword)
     311        15274 :       CALL keyword_release(keyword)
     312              : 
     313        15274 :       CALL dbcsr_get_default_config(accdrv_binning_binsize=idefault)
     314              :       CALL keyword_create(keyword, __LOCATION__, name="binning_binsize", &
     315              :                           description="Size of bins used when ordering " &
     316              :                           //"the stacks with the binning-scheme.", &
     317        15274 :                           default_i_val=idefault)
     318        15274 :       CALL section_add_keyword(subsection, keyword)
     319        15274 :       CALL keyword_release(keyword)
     320              : 
     321        15274 :       CALL section_add_subsection(section, subsection)
     322        15274 :       CALL section_release(subsection)
     323              : 
     324        15274 :    END SUBROUTINE create_dbcsr_section
     325              : 
     326              : ! **************************************************************************************************
     327              : !> \brief Configures options for DBCSR
     328              : !> \param root_section ...
     329              : ! **************************************************************************************************
     330       215188 :    SUBROUTINE cp_dbcsr_config(root_section)
     331              :       TYPE(section_vals_type), POINTER                   :: root_section
     332              : 
     333              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'cp_dbcsr_config'
     334              : 
     335              :       INTEGER                                            :: handle, ival
     336              :       LOGICAL                                            :: lval
     337              :       REAL(kind=real_8)                                  :: rval
     338              :       TYPE(section_vals_type), POINTER                   :: dbcsr_section
     339              : 
     340         9356 :       CALL timeset(routineN, handle)
     341              : 
     342         9356 :       CALL cite_reference(Borstnik2014)
     343         9356 :       CALL cite_reference(Schuett2016)
     344              : 
     345         9356 :       dbcsr_section => section_vals_get_subs_vals(root_section, "GLOBAL%DBCSR")
     346              : 
     347         9356 :       CALL section_vals_val_get(dbcsr_section, "mm_stack_size", i_val=ival)
     348         9356 :       CALL dbcsr_set_config(mm_stack_size=ival)
     349              : 
     350         9356 :       CALL section_vals_val_get(dbcsr_section, "MAX_ELEMENTS_PER_BLOCK", i_val=max_elements_per_block)
     351              : 
     352         9356 :       CALL section_vals_val_get(dbcsr_section, "avg_elements_images", i_val=ival)
     353         9356 :       CALL dbcsr_set_config(avg_elements_images=ival)
     354              : 
     355         9356 :       CALL section_vals_val_get(dbcsr_section, "num_mult_images", i_val=ival)
     356         9356 :       CALL dbcsr_set_config(num_mult_images=ival)
     357              : 
     358         9356 :       CALL section_vals_val_get(dbcsr_section, "n_size_mnk_stacks", i_val=ival)
     359         9356 :       CALL dbcsr_set_config(nstacks=ival)
     360              : 
     361         9356 :       CALL section_vals_val_get(dbcsr_section, "use_mpi_allocator", l_val=lval)
     362         9356 :       CALL dbcsr_set_config(use_mpi_allocator=lval)
     363              : 
     364         9356 :       CALL section_vals_val_get(dbcsr_section, "use_mpi_rma", l_val=lval)
     365         9356 :       CALL dbcsr_set_config(use_mpi_rma=lval)
     366              : 
     367         9356 :       CALL section_vals_val_get(dbcsr_section, "num_layers_3D", i_val=ival)
     368         9356 :       CALL dbcsr_set_config(num_layers_3D=ival)
     369              : 
     370         9356 :       CALL section_vals_val_get(dbcsr_section, "use_comm_thread", l_val=lval)
     371         9356 :       CALL dbcsr_set_config(use_comm_thread=lval)
     372              : 
     373         9356 :       CALL section_vals_val_get(dbcsr_section, "comm_thread_load", i_val=ival)
     374         9356 :       CALL dbcsr_set_config(comm_thread_load=ival)
     375              : 
     376         9356 :       CALL section_vals_val_get(dbcsr_section, "multrec_limit", i_val=ival)
     377         9356 :       CALL dbcsr_set_config(multrec_limit=ival)
     378              : 
     379         9356 :       CALL section_vals_val_get(dbcsr_section, "use_mempools_cpu", l_val=lval)
     380         9356 :       CALL dbcsr_set_config(use_mempools_cpu=lval)
     381              : 
     382         9356 :       CALL section_vals_val_get(dbcsr_section, "TENSOR%tas_split_factor", r_val=rval)
     383         9356 :       CALL dbcsr_set_config(tas_split_factor=rval)
     384              : 
     385         9356 :       CALL section_vals_val_get(dbcsr_section, "ACC%thread_buffers", i_val=ival)
     386         9356 :       CALL dbcsr_set_config(accdrv_thread_buffers=ival)
     387              : 
     388         9356 :       CALL section_vals_val_get(dbcsr_section, "ACC%min_flop_process", i_val=ival)
     389         9356 :       CALL dbcsr_set_config(accdrv_min_flop_process=ival)
     390              : 
     391         9356 :       CALL section_vals_val_get(dbcsr_section, "ACC%stack_sort", l_val=lval)
     392         9356 :       CALL dbcsr_set_config(accdrv_stack_sort=lval)
     393              : 
     394         9356 :       CALL section_vals_val_get(dbcsr_section, "ACC%min_flop_sort", i_val=ival)
     395         9356 :       CALL dbcsr_set_config(accdrv_min_flop_sort=ival)
     396              : 
     397         9356 :       CALL section_vals_val_get(dbcsr_section, "ACC%process_inhomogenous", l_val=lval)
     398         9356 :       CALL dbcsr_set_config(accdrv_do_inhomogenous=lval)
     399              : 
     400         9356 :       CALL section_vals_val_get(dbcsr_section, "ACC%avoid_after_busy", l_val=lval)
     401         9356 :       CALL dbcsr_set_config(accdrv_avoid_after_busy=lval)
     402              : 
     403         9356 :       CALL section_vals_val_get(dbcsr_section, "ACC%binning_nbins", i_val=ival)
     404         9356 :       CALL dbcsr_set_config(accdrv_binning_nbins=ival)
     405              : 
     406         9356 :       CALL section_vals_val_get(dbcsr_section, "ACC%binning_binsize", i_val=ival)
     407         9356 :       CALL dbcsr_set_config(accdrv_binning_binsize=ival)
     408              : 
     409         9356 :       CALL section_vals_val_get(dbcsr_section, "mm_driver", i_val=ival)
     410         9354 :       SELECT CASE (ival)
     411              :       CASE (mm_driver_auto)
     412         9354 :          CALL dbcsr_set_config(mm_driver="AUTO")
     413              : #if defined(__LIBXSMM)
     414         9354 :          CALL cite_reference(Heinecke2016)
     415              : #endif
     416              :       CASE (mm_driver_blas)
     417            2 :          CALL dbcsr_set_config(mm_driver="BLAS")
     418              :       CASE (mm_driver_matmul)
     419            0 :          CALL dbcsr_set_config(mm_driver="MATMUL")
     420              :       CASE (mm_driver_smm)
     421            0 :          CALL dbcsr_set_config(mm_driver="SMM")
     422              :       CASE (mm_driver_xsmm)
     423            0 :          CALL dbcsr_set_config(mm_driver="XSMM")
     424            0 :          CALL cite_reference(Heinecke2016)
     425              :       CASE DEFAULT
     426         9356 :          CPABORT("Unknown mm_driver")
     427              :       END SELECT
     428              : 
     429         9356 :       CALL timestop(handle)
     430         9356 :    END SUBROUTINE cp_dbcsr_config
     431              : 
     432              : ! **************************************************************************************************
     433              : !> \brief allocate the blocks of a dbcsr based on the neighbor list
     434              : !> \param matrix        the matrix
     435              : !> \param sab_orb       the corresponding neighbor list
     436              : !> \param desymmetrize  Allocate all block of a non-symmetric matrix from a symmetric list
     437              : !> \par History
     438              : !>      11.2009 created vw
     439              : !>      01.2014 moved here from cp_dbcsr_operations (Ole Schuett)
     440              : !> \author vw
     441              : !> \note
     442              : ! **************************************************************************************************
     443              : 
     444       901164 :    SUBROUTINE cp_dbcsr_alloc_block_from_nbl(matrix, sab_orb, desymmetrize)
     445              : 
     446              :       TYPE(dbcsr_type)                                   :: matrix
     447              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     448              :          POINTER                                         :: sab_orb
     449              :       LOGICAL, INTENT(IN), OPTIONAL                      :: desymmetrize
     450              : 
     451              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_alloc_block_from_nbl'
     452              : 
     453              :       CHARACTER(LEN=1)                                   :: symmetry
     454              :       INTEGER                                            :: blk_cnt, handle, iatom, icol, inode, &
     455              :                                                             irow, jatom, last_jatom, nadd
     456       901164 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cols, rows, tmp
     457              :       LOGICAL                                            :: alloc_full, is_symmetric, new_atom_b
     458              :       TYPE(neighbor_list_iterator_p_type), &
     459       901164 :          DIMENSION(:), POINTER                           :: nl_iterator
     460              : 
     461       901164 :       CALL timeset(routineN, handle)
     462              : 
     463       901164 :       symmetry = dbcsr_get_matrix_type(matrix)
     464              : 
     465       901164 :       CPASSERT(ASSOCIATED(sab_orb))
     466              : 
     467       901164 :       CALL get_neighbor_list_set_p(neighbor_list_sets=sab_orb, symmetric=is_symmetric)
     468       901164 :       alloc_full = .FALSE.
     469       901164 :       IF (PRESENT(desymmetrize)) THEN
     470           96 :          IF (desymmetrize .AND. (symmetry == dbcsr_type_no_symmetry)) THEN
     471           96 :             IF (is_symmetric) alloc_full = .TRUE.
     472              :          END IF
     473              :       END IF
     474              : 
     475       901164 :       CALL dbcsr_finalize(matrix)
     476       901164 :       ALLOCATE (rows(3), cols(3))
     477       901164 :       blk_cnt = 0
     478       901164 :       nadd = 1
     479       901164 :       IF (alloc_full) nadd = 2
     480              : 
     481       901164 :       CALL neighbor_list_iterator_create(nl_iterator, sab_orb)
     482    791393754 :       DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
     483    790492590 :          CALL get_iterator_info(nl_iterator, iatom=iatom, jatom=jatom, inode=inode)
     484    790492590 :          IF (inode == 1) last_jatom = 0
     485    790492590 :          IF (jatom /= last_jatom) THEN
     486     17628652 :             new_atom_b = .TRUE.
     487     17628652 :             last_jatom = jatom
     488              :          ELSE
     489    791393754 :             new_atom_b = .FALSE.
     490              :             CYCLE
     491              :          END IF
     492     17628652 :          IF (blk_cnt + nadd .GT. SIZE(rows)) THEN
     493      3228729 :             ALLOCATE (tmp(blk_cnt + nadd))
     494     18301871 :             tmp(1:blk_cnt) = rows(1:blk_cnt)
     495      1076243 :             DEALLOCATE (rows)
     496      3228729 :             ALLOCATE (rows((blk_cnt + nadd)*2))
     497     18301871 :             rows(1:blk_cnt) = tmp(1:blk_cnt)
     498     18301871 :             tmp(1:blk_cnt) = cols(1:blk_cnt)
     499      1076243 :             DEALLOCATE (cols)
     500      2152486 :             ALLOCATE (cols((blk_cnt + nadd)*2))
     501     18301871 :             cols(1:blk_cnt) = tmp(1:blk_cnt)
     502      1076243 :             DEALLOCATE (tmp)
     503              :          END IF
     504     18529816 :          IF (alloc_full) THEN
     505         1368 :             blk_cnt = blk_cnt + 1
     506         1368 :             rows(blk_cnt) = iatom
     507         1368 :             cols(blk_cnt) = jatom
     508         1368 :             IF (iatom /= jatom) THEN
     509         1035 :                blk_cnt = blk_cnt + 1
     510         1035 :                rows(blk_cnt) = jatom
     511         1035 :                cols(blk_cnt) = iatom
     512              :             END IF
     513              :          ELSE
     514     17627284 :             blk_cnt = blk_cnt + 1
     515     17627284 :             IF (symmetry == dbcsr_type_no_symmetry) THEN
     516        95323 :                rows(blk_cnt) = iatom
     517        95323 :                cols(blk_cnt) = jatom
     518              :             ELSE
     519     17531961 :                IF (iatom <= jatom) THEN
     520              :                   irow = iatom
     521              :                   icol = jatom
     522              :                ELSE
     523      8069993 :                   irow = jatom
     524      8069993 :                   icol = iatom
     525              :                END IF
     526     17531961 :                rows(blk_cnt) = irow
     527     17531961 :                cols(blk_cnt) = icol
     528              :             END IF
     529              :          END IF
     530              : 
     531              :       END DO
     532       901164 :       CALL neighbor_list_iterator_release(nl_iterator)
     533              : 
     534              :       !
     535       901164 :       CALL dbcsr_reserve_blocks(matrix, rows(1:blk_cnt), cols(1:blk_cnt))
     536       901164 :       DEALLOCATE (rows)
     537       901164 :       DEALLOCATE (cols)
     538       901164 :       CALL dbcsr_finalize(matrix)
     539              : 
     540       901164 :       CALL timestop(handle)
     541              : 
     542      2703492 :    END SUBROUTINE cp_dbcsr_alloc_block_from_nbl
     543              : 
     544              : ! **************************************************************************************************
     545              : !> \brief Apply distance screening to refine sparsity pattern of matrices in CSR
     546              : !>        format (using eps_pgf_orb). Currently this is used for the external
     547              : !>        library PEXSI.
     548              : !> \param ks_env ...
     549              : !> \param[in, out] csr_sparsity DBCSR matrix defining CSR sparsity pattern.
     550              : !>                              This matrix must be initialized and allocated
     551              : !>                              with exactly the same DBCSR sparsity pattern as
     552              : !>                              the DBCSR matrix that is used to create the CSR
     553              : !>                              matrix. It must have symmetric DBCSR format and
     554              : !>                              must not be filtered.
     555              : !> \par History
     556              : !>      02.2015 created [Patrick Seewald]
     557              : !> \author Patrick Seewald
     558              : ! **************************************************************************************************
     559            0 :    SUBROUTINE cp_dbcsr_to_csr_screening(ks_env, csr_sparsity)
     560              :       TYPE(qs_ks_env_type), POINTER                      :: ks_env
     561              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: csr_sparsity
     562              : 
     563              :       CHARACTER(len=*), PARAMETER :: routineN = 'cp_dbcsr_to_csr_screening'
     564              : 
     565              :       INTEGER :: atom_a, atom_b, handle, iatom, icol, ikind, ipgf, irow, iset, isgf, ishell, &
     566              :          jatom, jkind, jpgf, jset, jsgf, jshell, nkind, nset_a, nset_b
     567            0 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_of_kind
     568            0 :       INTEGER, DIMENSION(:), POINTER                     :: npgf_a, npgf_b, nshell_a, nshell_b
     569            0 :       INTEGER, DIMENSION(:, :), POINTER                  :: l_a, l_b
     570              :       LOGICAL                                            :: do_symmetric, found
     571              :       REAL(KIND=dp)                                      :: dab, eps_pgf_orb, r_a, r_b
     572              :       REAL(KIND=dp), DIMENSION(3)                        :: rab
     573            0 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: rpgfa, rpgfb, zet_a, zet_b
     574            0 :       REAL(KIND=dp), DIMENSION(:, :, :), POINTER         :: gcc_a, gcc_b
     575            0 :       REAL(KIND=real_8), DIMENSION(:, :), POINTER        :: screen_blk
     576            0 :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     577              :       TYPE(dft_control_type), POINTER                    :: dft_control
     578            0 :       TYPE(gto_basis_set_p_type), DIMENSION(:), POINTER  :: basis_set_list_a, basis_set_list_b
     579              :       TYPE(gto_basis_set_type), POINTER                  :: basis_set_a, basis_set_b
     580              :       TYPE(neighbor_list_iterator_p_type), &
     581            0 :          DIMENSION(:), POINTER                           :: nl_iterator
     582              :       TYPE(neighbor_list_set_p_type), DIMENSION(:), &
     583            0 :          POINTER                                         :: neighbour_list
     584            0 :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     585              : 
     586            0 :       NULLIFY (screen_blk, atomic_kind_set, basis_set_list_a, &
     587            0 :                basis_set_list_b, basis_set_a, basis_set_b, nl_iterator, &
     588            0 :                qs_kind_set, dft_control)
     589              : 
     590            0 :       CALL timeset(routineN, handle)
     591              : 
     592            0 :       CPASSERT(dbcsr_has_symmetry(csr_sparsity))
     593              : 
     594              :       CALL get_ks_env(ks_env, &
     595              :                       sab_orb=neighbour_list, &
     596              :                       atomic_kind_set=atomic_kind_set, &
     597              :                       qs_kind_set=qs_kind_set, &
     598            0 :                       dft_control=dft_control)
     599              : 
     600            0 :       eps_pgf_orb = dft_control%qs_control%eps_pgf_orb
     601              : 
     602            0 :       nkind = SIZE(qs_kind_set)
     603            0 :       CALL get_atomic_kind_set(atomic_kind_set, atom_of_kind=atom_of_kind)
     604            0 :       CPASSERT(SIZE(neighbour_list) > 0)
     605            0 :       CALL get_neighbor_list_set_p(neighbor_list_sets=neighbour_list, symmetric=do_symmetric)
     606            0 :       CPASSERT(do_symmetric)
     607            0 :       ALLOCATE (basis_set_list_a(nkind), basis_set_list_b(nkind))
     608            0 :       CALL basis_set_list_setup(basis_set_list_a, "ORB", qs_kind_set)
     609            0 :       CALL basis_set_list_setup(basis_set_list_b, "ORB", qs_kind_set)
     610              : 
     611              :       ! csr_sparsity can obtain values 0 (if zero element) or 1 (if non-zero element)
     612            0 :       CALL dbcsr_set(csr_sparsity, 0.0_dp)
     613              : 
     614            0 :       CALL neighbor_list_iterator_create(nl_iterator, neighbour_list)
     615              : 
     616              :       ! Iterate over interacting pairs of atoms corresponding to non-zero
     617              :       ! DBCSR blocks
     618            0 :       DO WHILE (neighbor_list_iterate(nl_iterator) == 0)
     619              :          CALL get_iterator_info(nl_iterator, &
     620              :                                 ikind=ikind, jkind=jkind, &
     621              :                                 iatom=iatom, jatom=jatom, &
     622            0 :                                 r=rab)
     623              : 
     624            0 :          basis_set_a => basis_set_list_a(ikind)%gto_basis_set
     625            0 :          IF (.NOT. ASSOCIATED(basis_set_a)) CYCLE
     626            0 :          basis_set_b => basis_set_list_b(jkind)%gto_basis_set
     627            0 :          IF (.NOT. ASSOCIATED(basis_set_b)) CYCLE
     628              : 
     629            0 :          atom_a = atom_of_kind(iatom)
     630            0 :          atom_b = atom_of_kind(jatom)
     631              : 
     632            0 :          nset_a = basis_set_a%nset
     633            0 :          nset_b = basis_set_b%nset
     634            0 :          npgf_a => basis_set_a%npgf
     635            0 :          npgf_b => basis_set_b%npgf
     636            0 :          nshell_a => basis_set_a%nshell
     637            0 :          nshell_b => basis_set_b%nshell
     638              : 
     639            0 :          l_a => basis_set_a%l
     640            0 :          l_b => basis_set_b%l
     641            0 :          gcc_a => basis_set_a%gcc
     642            0 :          gcc_b => basis_set_b%gcc
     643            0 :          zet_a => basis_set_a%zet
     644            0 :          zet_b => basis_set_b%zet
     645              : 
     646            0 :          rpgfa => basis_set_a%pgf_radius
     647            0 :          rpgfb => basis_set_b%pgf_radius
     648              : 
     649            0 :          IF (iatom <= jatom) THEN
     650            0 :             irow = iatom
     651            0 :             icol = jatom
     652              :          ELSE
     653            0 :             irow = jatom
     654            0 :             icol = iatom
     655              :          END IF
     656              : 
     657              :          CALL dbcsr_get_block_p(matrix=csr_sparsity, row=irow, col=icol, &
     658            0 :                                 block=screen_blk, found=found)
     659              : 
     660            0 :          CPASSERT(found)
     661              : 
     662              :          ! Distance between atoms a and b
     663            0 :          dab = SQRT(rab(1)**2 + rab(2)**2 + rab(3)**2)
     664              : 
     665              :          ! iterate over pairs of primitive GTOs i,j, get their radii r_i, r_j according
     666              :          ! to eps_pgf_orb. Define all matrix elements as non-zero to which a
     667              :          ! contribution from two Gaussians i,j exists with r_i + r_j >= dab.
     668              : 
     669            0 :          isgf = 0
     670            0 :          DO iset = 1, nset_a
     671            0 :             DO ishell = 1, nshell_a(iset)
     672              :                jsgf = 0
     673            0 :                DO jset = 1, nset_b
     674            0 :                   DO jshell = 1, nshell_b(jset)
     675            0 :                      gto_loop: DO ipgf = 1, npgf_a(iset)
     676            0 :                         DO jpgf = 1, npgf_b(jset)
     677            0 :                            IF (rpgfa(ipgf, iset) + rpgfb(jpgf, jset) .GE. dab) THEN
     678              :                               ! more selective screening with radius calculated for each primitive GTO
     679              :                               r_a = exp_radius(l_a(ishell, iset), &
     680              :                                                zet_a(ipgf, iset), &
     681              :                                                eps_pgf_orb, &
     682            0 :                                                gcc_a(ipgf, ishell, iset))
     683              :                               r_b = exp_radius(l_b(jshell, jset), &
     684              :                                                zet_b(jpgf, jset), &
     685              :                                                eps_pgf_orb, &
     686            0 :                                                gcc_b(jpgf, jshell, jset))
     687            0 :                               IF (r_a + r_b .GE. dab) THEN
     688            0 :                                  IF (irow .EQ. iatom) THEN
     689              :                                     screen_blk(isgf + 1:isgf + nso(l_a(ishell, iset)), &
     690            0 :                                                jsgf + 1:jsgf + nso(l_b(jshell, jset))) = 1.0_dp
     691              :                                  ELSE
     692              :                                     screen_blk(jsgf + 1:jsgf + nso(l_b(jshell, jset)), &
     693            0 :                                                isgf + 1:isgf + nso(l_a(ishell, iset))) = 1.0_dp
     694              :                                  END IF
     695              :                                  EXIT gto_loop
     696              :                               END IF
     697              :                            END IF
     698              :                         END DO
     699              :                      END DO gto_loop
     700            0 :                      jsgf = jsgf + nso(l_b(jshell, jset))
     701              :                   END DO
     702              :                END DO
     703            0 :                isgf = isgf + nso(l_a(ishell, iset))
     704              :             END DO
     705              :          END DO
     706              :       END DO
     707              : 
     708            0 :       CALL neighbor_list_iterator_release(nl_iterator)
     709            0 :       DEALLOCATE (basis_set_list_a, basis_set_list_b)
     710              : 
     711            0 :       CALL timestop(handle)
     712            0 :    END SUBROUTINE cp_dbcsr_to_csr_screening
     713              : 
     714              : END MODULE cp_dbcsr_cp2k_link
        

Generated by: LCOV version 2.0-1