LCOV - code coverage report
Current view: top level - src - cp_dbcsr_operations.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:c24029e) Lines: 79.6 % 450 358
Test Date: 2026-07-04 06:36:57 Functions: 75.0 % 28 21

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2026 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief   DBCSR operations in CP2K
      10              : !> \author  Urban Borstnik
      11              : !> \date    2009-05-12
      12              : !> \version 0.8
      13              : !>
      14              : !> <b>Modification history:</b>
      15              : !> - Created 2009-05-12
      16              : !> - Generalized sm_fm_mulitply for matrices w/ different row/col block size (A. Bussy, 11.2018)
      17              : ! **************************************************************************************************
      18              : MODULE cp_dbcsr_operations
      19              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      20              :    USE cp_dbcsr_api,                    ONLY: &
      21              :         dbcsr_add, dbcsr_complete_redistribute, dbcsr_convert_sizes_to_offsets, dbcsr_copy, &
      22              :         dbcsr_create, dbcsr_deallocate_matrix, dbcsr_desymmetrize, dbcsr_distribution_get, &
      23              :         dbcsr_distribution_new, dbcsr_distribution_release, dbcsr_distribution_type, &
      24              :         dbcsr_get_info, dbcsr_get_matrix_type, dbcsr_iterator_blocks_left, &
      25              :         dbcsr_iterator_next_block, dbcsr_iterator_readonly_start, dbcsr_iterator_start, &
      26              :         dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_multiply, dbcsr_p_type, dbcsr_release, &
      27              :         dbcsr_scale, dbcsr_type, dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, &
      28              :         dbcsr_type_symmetric, dbcsr_valid_index, dbcsr_verify_matrix
      29              :    USE cp_dbcsr_contrib,                ONLY: dbcsr_frobenius_norm,&
      30              :                                               dbcsr_reserve_all_blocks
      31              :    USE cp_fm_basic_linalg,              ONLY: cp_fm_gemm
      32              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      33              :                                               cp_fm_struct_release,&
      34              :                                               cp_fm_struct_type
      35              :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      36              :                                               cp_fm_get_info,&
      37              :                                               cp_fm_release,&
      38              :                                               cp_fm_to_fm,&
      39              :                                               cp_fm_type
      40              :    USE distribution_2d_types,           ONLY: distribution_2d_get,&
      41              :                                               distribution_2d_type
      42              :    USE kinds,                           ONLY: default_string_length,&
      43              :                                               dp
      44              :    USE mathlib,                         ONLY: gcd,&
      45              :                                               lcm
      46              :    USE message_passing,                 ONLY: mp_para_env_type
      47              : 
      48              : !$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
      49              : #include "base/base_uses.f90"
      50              : 
      51              :    IMPLICIT NONE
      52              :    PRIVATE
      53              : 
      54              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_dbcsr_operations'
      55              :    LOGICAL, PARAMETER :: debug_mod = .FALSE.
      56              : 
      57              :    INTEGER, SAVE, PUBLIC :: max_elements_per_block = 32
      58              : 
      59              :    PUBLIC :: dbcsr_multiply_local
      60              : 
      61              :    ! CP2K API emulation
      62              :    PUBLIC :: copy_fm_to_dbcsr, copy_dbcsr_to_fm, &
      63              :              cp_dbcsr_sm_fm_multiply, cp_dbcsr_plus_fm_fm_t, &
      64              :              copy_dbcsr_to_fm_bc, copy_fm_to_dbcsr_bc, cp_fm_to_dbcsr_row_template, &
      65              :              cp_dbcsr_m_by_n_from_template, cp_dbcsr_m_by_n_from_row_template, &
      66              :              dbcsr_create_dist_r_unrot
      67              : 
      68              :    ! distribution_2d_type compatibility
      69              :    PUBLIC :: cp_dbcsr_dist2d_to_dist
      70              : 
      71              :    PUBLIC :: dbcsr_copy_columns_hack
      72              : 
      73              :    ! matrix set
      74              :    PUBLIC :: dbcsr_allocate_matrix_set
      75              :    PUBLIC :: dbcsr_deallocate_matrix_set
      76              : 
      77              :    INTERFACE dbcsr_allocate_matrix_set
      78              :       MODULE PROCEDURE allocate_dbcsr_matrix_set_1d
      79              :       MODULE PROCEDURE allocate_dbcsr_matrix_set_2d
      80              :       MODULE PROCEDURE allocate_dbcsr_matrix_set_3d
      81              :       MODULE PROCEDURE allocate_dbcsr_matrix_set_4d
      82              :       MODULE PROCEDURE allocate_dbcsr_matrix_set_5d
      83              :    END INTERFACE
      84              : 
      85              :    INTERFACE dbcsr_deallocate_matrix_set
      86              :       MODULE PROCEDURE deallocate_dbcsr_matrix_set_1d
      87              :       MODULE PROCEDURE deallocate_dbcsr_matrix_set_2d
      88              :       MODULE PROCEDURE deallocate_dbcsr_matrix_set_3d
      89              :       MODULE PROCEDURE deallocate_dbcsr_matrix_set_4d
      90              :       MODULE PROCEDURE deallocate_dbcsr_matrix_set_5d
      91              :    END INTERFACE
      92              : 
      93              : CONTAINS
      94              : 
      95              : ! **************************************************************************************************
      96              : !> \brief   Copy a BLACS matrix to a dbcsr matrix.
      97              : !>
      98              : !>          real_matrix=beta*real_matrix+alpha*fm
      99              : !>          beta defaults to 0, alpha to 1
     100              : !> \param[in] fm              full matrix
     101              : !> \param[out] matrix         DBCSR matrix
     102              : !> \param[in] keep_sparsity   (optional) retains the sparsity of the input
     103              : !>                            matrix
     104              : !> \date    2009-10-13
     105              : !> \par History
     106              : !>          2009-10-13 rewritten based on copy_dbcsr_to_fm
     107              : !> \author  Urban Borstnik
     108              : !> \version 2.0
     109              : ! **************************************************************************************************
     110      1389844 :    SUBROUTINE copy_fm_to_dbcsr(fm, matrix, keep_sparsity)
     111              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     112              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
     113              :       LOGICAL, INTENT(IN), OPTIONAL                      :: keep_sparsity
     114              : 
     115              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'copy_fm_to_dbcsr'
     116              : 
     117              :       INTEGER                                            :: handle
     118              :       LOGICAL                                            :: my_keep_sparsity
     119              :       TYPE(dbcsr_type)                                   :: bc_mat, redist_mat
     120              : 
     121      1389844 :       CALL timeset(routineN, handle)
     122              : 
     123      1389844 :       my_keep_sparsity = .FALSE.
     124      1389844 :       IF (PRESENT(keep_sparsity)) my_keep_sparsity = keep_sparsity
     125              : 
     126      1389844 :       CALL copy_fm_to_dbcsr_bc(fm, bc_mat)
     127              : 
     128      1389844 :       IF (my_keep_sparsity) THEN
     129       251634 :          CALL dbcsr_create(redist_mat, template=matrix)
     130       251634 :          CALL dbcsr_complete_redistribute(bc_mat, redist_mat)
     131       251634 :          CALL dbcsr_copy(matrix, redist_mat, keep_sparsity=.TRUE.)
     132       251634 :          CALL dbcsr_release(redist_mat)
     133              :       ELSE
     134      1138210 :          CALL dbcsr_complete_redistribute(bc_mat, matrix)
     135              :       END IF
     136              : 
     137      1389844 :       CALL dbcsr_release(bc_mat)
     138              : 
     139      1389844 :       CALL timestop(handle)
     140      1389844 :    END SUBROUTINE copy_fm_to_dbcsr
     141              : 
     142              : ! **************************************************************************************************
     143              : !> \brief   Copy a BLACS matrix to a dbcsr matrix with a special block-cyclic distribution,
     144              : !>           which requires no complete redistribution.
     145              : !> \param fm ...
     146              : !> \param bc_mat ...
     147              : ! **************************************************************************************************
     148      1390232 :    SUBROUTINE copy_fm_to_dbcsr_bc(fm, bc_mat)
     149              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm
     150              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: bc_mat
     151              : 
     152              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_fm_to_dbcsr_bc'
     153              : 
     154              :       INTEGER                                            :: col, handle, ncol_block, ncol_global, &
     155              :                                                             nrow_block, nrow_global, row
     156      1390232 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_col, first_row, last_col, last_row
     157      1390232 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, row_blk_size
     158      1390232 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
     159      1390232 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: dbcsr_block, fm_block
     160              :       TYPE(dbcsr_distribution_type)                      :: bc_dist
     161              :       TYPE(dbcsr_iterator_type)                          :: iter
     162              : 
     163      1390232 :       CALL timeset(routineN, handle)
     164              : 
     165              :       ! Create processor grid
     166      1390232 :       pgrid => fm%matrix_struct%context%blacs2mpi
     167              : 
     168              :       ! Create a block-cyclic distribution compatible with the FM matrix.
     169      1390232 :       nrow_block = fm%matrix_struct%nrow_block
     170      1390232 :       ncol_block = fm%matrix_struct%ncol_block
     171      1390232 :       nrow_global = fm%matrix_struct%nrow_global
     172      1390232 :       ncol_global = fm%matrix_struct%ncol_global
     173      1390232 :       NULLIFY (col_blk_size, row_blk_size)
     174              :       CALL dbcsr_create_dist_block_cyclic(bc_dist, &
     175              :                                           nrows=nrow_global, ncolumns=ncol_global, & ! Actual full matrix size
     176              :                                           nrow_block=nrow_block, ncol_block=ncol_block, & ! BLACS parameters
     177              :                                           group_handle=fm%matrix_struct%para_env%get_handle(), pgrid=pgrid, &
     178      1390232 :                                           row_blk_sizes=row_blk_size, col_blk_sizes=col_blk_size) ! block-cyclic row/col sizes
     179              : 
     180              :       ! Create the block-cyclic DBCSR matrix
     181              :       CALL dbcsr_create(bc_mat, "Block-cyclic ", bc_dist, &
     182      1390232 :                         dbcsr_type_no_symmetry, row_blk_size, col_blk_size, reuse_arrays=.TRUE.)
     183      1390232 :       CALL dbcsr_distribution_release(bc_dist)
     184              : 
     185              :       ! allocate all blocks
     186      1390232 :       CALL dbcsr_reserve_all_blocks(bc_mat)
     187              : 
     188      1390232 :       CALL calculate_fm_block_ranges(bc_mat, first_row, last_row, first_col, last_col)
     189              : 
     190              :       ! Copy the FM data to the block-cyclic DBCSR matrix.  This step
     191              :       ! could be skipped with appropriate DBCSR index manipulation.
     192      1390232 :       fm_block => fm%local_data
     193              : !$OMP PARALLEL DEFAULT(NONE) PRIVATE(iter, row, col, dbcsr_block) &
     194      1390232 : !$OMP SHARED(bc_mat, last_row, first_row, last_col, first_col, fm_block)
     195              :       CALL dbcsr_iterator_start(iter, bc_mat)
     196              :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     197              :          CALL dbcsr_iterator_next_block(iter, row, col, dbcsr_block)
     198              :          dbcsr_block(:, :) = fm_block(first_row(row):last_row(row), first_col(col):last_col(col))
     199              :       END DO
     200              :       CALL dbcsr_iterator_stop(iter)
     201              : !$OMP END PARALLEL
     202              : 
     203      1390232 :       CALL timestop(handle)
     204      2780464 :    END SUBROUTINE copy_fm_to_dbcsr_bc
     205              : 
     206              : ! **************************************************************************************************
     207              : !> \brief Copy a DBCSR matrix to a BLACS matrix
     208              : !> \param[in] matrix          DBCSR matrix
     209              : !> \param[out] fm             full matrix
     210              : ! **************************************************************************************************
     211      6047904 :    SUBROUTINE copy_dbcsr_to_fm(matrix, fm)
     212              :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix
     213              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm
     214              : 
     215              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'copy_dbcsr_to_fm'
     216              : 
     217              :       CHARACTER(len=default_string_length)               :: name
     218              :       INTEGER                                            :: group_handle, handle, ncol_block, &
     219              :                                                             nfullcols_total, nfullrows_total, &
     220              :                                                             nrow_block
     221      1511976 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, row_blk_size
     222      1511976 :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
     223              :       TYPE(dbcsr_distribution_type)                      :: bc_dist, dist
     224              :       TYPE(dbcsr_type)                                   :: bc_mat, matrix_nosym
     225              : 
     226      1511976 :       CALL timeset(routineN, handle)
     227              : 
     228              :       ! check compatibility
     229              :       CALL dbcsr_get_info(matrix, &
     230              :                           name=name, &
     231              :                           distribution=dist, &
     232              :                           nfullrows_total=nfullrows_total, &
     233      1511976 :                           nfullcols_total=nfullcols_total)
     234              : 
     235      1511976 :       CPASSERT(fm%matrix_struct%nrow_global == nfullrows_total)
     236      1511976 :       CPASSERT(fm%matrix_struct%ncol_global == nfullcols_total)
     237              : 
     238              :       ! info about the full matrix
     239      1511976 :       nrow_block = fm%matrix_struct%nrow_block
     240      1511976 :       ncol_block = fm%matrix_struct%ncol_block
     241              : 
     242              :       ! Convert DBCSR to a block-cyclic
     243      1511976 :       NULLIFY (col_blk_size, row_blk_size)
     244      1511976 :       CALL dbcsr_distribution_get(dist, group=group_handle, pgrid=pgrid)
     245              :       CALL dbcsr_create_dist_block_cyclic(bc_dist, &
     246              :                                           nrows=nfullrows_total, ncolumns=nfullcols_total, &
     247              :                                           nrow_block=nrow_block, ncol_block=ncol_block, &
     248              :                                           group_handle=group_handle, pgrid=pgrid, &
     249      1511976 :                                           row_blk_sizes=row_blk_size, col_blk_sizes=col_blk_size)
     250              : 
     251              :       CALL dbcsr_create(bc_mat, "Block-cyclic"//name, bc_dist, &
     252      1511976 :                         dbcsr_type_no_symmetry, row_blk_size, col_blk_size, reuse_arrays=.TRUE.)
     253      1511976 :       CALL dbcsr_distribution_release(bc_dist)
     254              : 
     255      1511976 :       CALL dbcsr_create(matrix_nosym, template=matrix, matrix_type="N")
     256      1511976 :       CALL dbcsr_desymmetrize(matrix, matrix_nosym)
     257      1511976 :       CALL dbcsr_complete_redistribute(matrix_nosym, bc_mat)
     258      1511976 :       CALL dbcsr_release(matrix_nosym)
     259              : 
     260      1511976 :       CALL copy_dbcsr_to_fm_bc(bc_mat, fm)
     261              : 
     262      1511976 :       CALL dbcsr_release(bc_mat)
     263              : 
     264      1511976 :       CALL timestop(handle)
     265      1511976 :    END SUBROUTINE copy_dbcsr_to_fm
     266              : 
     267              : ! **************************************************************************************************
     268              : !> \brief Copy a DBCSR_BLACS matrix to a BLACS matrix
     269              : !> \param bc_mat DBCSR matrix
     270              : !> \param[out] fm             full matrix
     271              : ! **************************************************************************************************
     272      1511976 :    SUBROUTINE copy_dbcsr_to_fm_bc(bc_mat, fm)
     273              :       TYPE(dbcsr_type), INTENT(IN)                       :: bc_mat
     274              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm
     275              : 
     276              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'copy_dbcsr_to_fm_bc'
     277              : 
     278              :       INTEGER                                            :: col, handle, row
     279      1511976 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: first_col, first_row, last_col, last_row
     280      1511976 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: dbcsr_block, fm_block
     281              :       TYPE(dbcsr_iterator_type)                          :: iter
     282              : 
     283      1511976 :       CALL timeset(routineN, handle)
     284              : 
     285      1511976 :       CALL calculate_fm_block_ranges(bc_mat, first_row, last_row, first_col, last_col)
     286              : 
     287              :       ! Now copy data to the FM matrix
     288      1511976 :       fm_block => fm%local_data
     289    737585242 :       fm_block = REAL(0.0, KIND=dp)
     290              : !$OMP PARALLEL DEFAULT(NONE) PRIVATE(iter, row, col, dbcsr_block) &
     291      1511976 : !$OMP SHARED(bc_mat, last_row, first_row, last_col, first_col, fm_block)
     292              :       CALL dbcsr_iterator_readonly_start(iter, bc_mat)
     293              :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     294              :          CALL dbcsr_iterator_next_block(iter, row, col, dbcsr_block)
     295              :          fm_block(first_row(row):last_row(row), first_col(col):last_col(col)) = dbcsr_block(:, :)
     296              :       END DO
     297              :       CALL dbcsr_iterator_stop(iter)
     298              : !$OMP END PARALLEL
     299              : 
     300      1511976 :       CALL timestop(handle)
     301      3023952 :    END SUBROUTINE copy_dbcsr_to_fm_bc
     302              : 
     303              : ! **************************************************************************************************
     304              : !> \brief Helper routine used to copy blocks from DBCSR into FM matrices and vice versa
     305              : !> \param bc_mat ...
     306              : !> \param first_row ...
     307              : !> \param last_row ...
     308              : !> \param first_col ...
     309              : !> \param last_col ...
     310              : !> \author Ole Schuett
     311              : ! **************************************************************************************************
     312      2902208 :    SUBROUTINE calculate_fm_block_ranges(bc_mat, first_row, last_row, first_col, last_col)
     313              :       TYPE(dbcsr_type), INTENT(IN)                       :: bc_mat
     314              :       INTEGER, ALLOCATABLE, DIMENSION(:), INTENT(OUT)    :: first_row, last_row, first_col, last_col
     315              : 
     316              :       INTEGER                                            :: col, nblkcols_local, nblkcols_total, &
     317              :                                                             nblkrows_local, nblkrows_total, row
     318              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: local_col_sizes, local_row_sizes
     319      2902208 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, local_cols, local_rows, &
     320      2902208 :                                                             row_blk_size
     321              : 
     322              :       CALL dbcsr_get_info(bc_mat, &
     323              :                           nblkrows_total=nblkrows_total, &
     324              :                           nblkcols_total=nblkcols_total, &
     325              :                           nblkrows_local=nblkrows_local, &
     326              :                           nblkcols_local=nblkcols_local, &
     327              :                           local_rows=local_rows, &
     328              :                           local_cols=local_cols, &
     329              :                           row_blk_size=row_blk_size, &
     330      2902208 :                           col_blk_size=col_blk_size)
     331              : 
     332              :       ! calculate first_row and last_row
     333      8706072 :       ALLOCATE (local_row_sizes(nblkrows_total))
     334      2902208 :       local_row_sizes(:) = 0
     335      2902208 :       IF (nblkrows_local >= 1) THEN
     336      7307111 :          DO row = 1, nblkrows_local
     337      7307111 :             local_row_sizes(local_rows(row)) = row_blk_size(local_rows(row))
     338              :          END DO
     339              :       END IF
     340      8705520 :       ALLOCATE (first_row(nblkrows_total), last_row(nblkrows_total))
     341      2902208 :       CALL dbcsr_convert_sizes_to_offsets(local_row_sizes, first_row, last_row)
     342      2902208 :       DEALLOCATE (local_row_sizes)
     343              : 
     344              :       ! calculate first_col and last_col
     345      8704824 :       ALLOCATE (local_col_sizes(nblkcols_total))
     346      2902208 :       local_col_sizes(:) = 0
     347      2902208 :       IF (nblkcols_local >= 1) THEN
     348      9447202 :          DO col = 1, nblkcols_local
     349      9447202 :             local_col_sizes(local_cols(col)) = col_blk_size(local_cols(col))
     350              :          END DO
     351              :       END IF
     352      8703024 :       ALLOCATE (first_col(nblkcols_total), last_col(nblkcols_total))
     353      2902208 :       CALL dbcsr_convert_sizes_to_offsets(local_col_sizes, first_col, last_col)
     354      2902208 :       DEALLOCATE (local_col_sizes)
     355              : 
     356      2902208 :    END SUBROUTINE calculate_fm_block_ranges
     357              : 
     358              : ! **************************************************************************************************
     359              : !> \brief hack for dbcsr_copy_columns
     360              : !> \param matrix_b ...
     361              : !> \param matrix_a ...
     362              : !> \param ncol ...
     363              : !> \param source_start ...
     364              : !> \param target_start ...
     365              : !> \param para_env ...
     366              : !> \param blacs_env ...
     367              : !> \author vw
     368              : ! **************************************************************************************************
     369         9448 :    SUBROUTINE dbcsr_copy_columns_hack(matrix_b, matrix_a, &
     370              :                                       ncol, source_start, target_start, para_env, blacs_env)
     371              : 
     372              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix_b
     373              :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_a
     374              :       INTEGER, INTENT(IN)                                :: ncol, source_start, target_start
     375              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     376              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     377              : 
     378              :       INTEGER                                            :: nfullcols_total, nfullrows_total
     379              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct
     380              :       TYPE(cp_fm_type)                                   :: fm_matrix_a, fm_matrix_b
     381              : 
     382         2362 :       NULLIFY (fm_struct)
     383         2362 :       CALL dbcsr_get_info(matrix_a, nfullrows_total=nfullrows_total, nfullcols_total=nfullcols_total)
     384              :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=nfullrows_total, &
     385         2362 :                                ncol_global=nfullcols_total, para_env=para_env)
     386         2362 :       CALL cp_fm_create(fm_matrix_a, fm_struct, name="fm_matrix_a")
     387         2362 :       CALL cp_fm_struct_release(fm_struct)
     388              : 
     389         2362 :       CALL dbcsr_get_info(matrix_b, nfullrows_total=nfullrows_total, nfullcols_total=nfullcols_total)
     390              :       CALL cp_fm_struct_create(fm_struct, context=blacs_env, nrow_global=nfullrows_total, &
     391         2362 :                                ncol_global=nfullcols_total, para_env=para_env)
     392         2362 :       CALL cp_fm_create(fm_matrix_b, fm_struct, name="fm_matrix_b")
     393         2362 :       CALL cp_fm_struct_release(fm_struct)
     394              : 
     395         2362 :       CALL copy_dbcsr_to_fm(matrix_a, fm_matrix_a)
     396         2362 :       CALL copy_dbcsr_to_fm(matrix_b, fm_matrix_b)
     397              : 
     398         2362 :       CALL cp_fm_to_fm(fm_matrix_a, fm_matrix_b, ncol, source_start, target_start)
     399              : 
     400         2362 :       CALL copy_fm_to_dbcsr(fm_matrix_b, matrix_b)
     401              : 
     402         2362 :       CALL cp_fm_release(fm_matrix_a)
     403         2362 :       CALL cp_fm_release(fm_matrix_b)
     404              : 
     405         2362 :    END SUBROUTINE dbcsr_copy_columns_hack
     406              : 
     407              : ! **************************************************************************************************
     408              : !> \brief Creates a DBCSR distribution from a distribution_2d
     409              : !> \param[in] dist2d          distribution_2d
     410              : !> \param[out] dist           DBCSR distribution
     411              : !> \par History
     412              : !>    move form dbcsr_operation 01.2010
     413              : ! **************************************************************************************************
     414        10952 :    SUBROUTINE cp_dbcsr_dist2d_to_dist(dist2d, dist)
     415              :       TYPE(distribution_2d_type), INTENT(IN), TARGET     :: dist2d
     416              :       TYPE(dbcsr_distribution_type), INTENT(OUT)         :: dist
     417              : 
     418        10952 :       INTEGER, DIMENSION(:), POINTER                     :: col_dist, row_dist
     419        10952 :       INTEGER, DIMENSION(:, :), POINTER                  :: col_dist_data, pgrid, row_dist_data
     420              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     421              :       TYPE(distribution_2d_type), POINTER                :: dist2d_p
     422              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     423              : 
     424        10952 :       dist2d_p => dist2d
     425              :       CALL distribution_2d_get(dist2d_p, &
     426              :                                row_distribution=row_dist_data, &
     427              :                                col_distribution=col_dist_data, &
     428        10952 :                                blacs_env=blacs_env)
     429        10952 :       CALL blacs_env%get(para_env=para_env, blacs2mpi=pgrid)
     430              : 
     431              :       ! map to 1D arrays
     432        10952 :       row_dist => row_dist_data(:, 1)
     433        10952 :       col_dist => col_dist_data(:, 1)
     434              :       !row_cluster => row_dist_data(:, 2)
     435              :       !col_cluster => col_dist_data(:, 2)
     436              : 
     437              :       CALL dbcsr_distribution_new(dist, &
     438              :                                   group=para_env%get_handle(), pgrid=pgrid, &
     439              :                                   row_dist=row_dist, &
     440        10952 :                                   col_dist=col_dist)
     441              : 
     442        10952 :    END SUBROUTINE cp_dbcsr_dist2d_to_dist
     443              : 
     444              : ! **************************************************************************************************
     445              : !> \brief multiply a dbcsr with a replicated array
     446              : !>        c = alpha_scalar * A (dbscr) * b + c
     447              : !> \param[in] matrix_a DBSCR matrxx
     448              : !> \param[in]  vec_b        vectors b
     449              : !> \param[inout] vec_c      vectors c
     450              : !> \param[in]  ncol         nbr of columns
     451              : !> \param[in]  alpha        alpha
     452              : !>
     453              : ! **************************************************************************************************
     454            0 :    SUBROUTINE dbcsr_multiply_local(matrix_a, vec_b, vec_c, ncol, alpha)
     455              :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix_a
     456              :       REAL(dp), DIMENSION(:, :), INTENT(IN)              :: vec_b
     457              :       REAL(dp), DIMENSION(:, :), INTENT(INOUT)           :: vec_c
     458              :       INTEGER, INTENT(in), OPTIONAL                      :: ncol
     459              :       REAL(dp), INTENT(IN), OPTIONAL                     :: alpha
     460              : 
     461              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'dbcsr_multiply_local'
     462              : 
     463              :       INTEGER                                            :: col, coloff, my_ncol, row, rowoff, &
     464              :                                                             timing_handle
     465              :       LOGICAL                                            :: has_symm
     466              :       REAL(dp)                                           :: my_alpha, my_alpha2
     467            0 :       REAL(dp), DIMENSION(:, :), POINTER                 :: data_d
     468              :       TYPE(dbcsr_iterator_type)                          :: iter
     469              : 
     470            0 :       CALL timeset(routineN, timing_handle)
     471              : 
     472            0 :       my_alpha = 1.0_dp
     473            0 :       IF (PRESENT(alpha)) my_alpha = alpha
     474              : 
     475            0 :       my_ncol = SIZE(vec_b, 2)
     476            0 :       IF (PRESENT(ncol)) my_ncol = ncol
     477              : 
     478            0 :       my_alpha2 = 0.0_dp
     479            0 :       IF (dbcsr_get_matrix_type(matrix_a) == dbcsr_type_symmetric) my_alpha2 = my_alpha
     480            0 :       IF (dbcsr_get_matrix_type(matrix_a) == dbcsr_type_antisymmetric) my_alpha2 = -my_alpha
     481              : 
     482              :       has_symm = (dbcsr_get_matrix_type(matrix_a) == dbcsr_type_symmetric .OR. &
     483            0 :                   dbcsr_get_matrix_type(matrix_a) == dbcsr_type_antisymmetric)
     484              : 
     485              : !$OMP     PARALLEL DEFAULT(NONE) SHARED(matrix_a,vec_b,vec_c,ncol,my_alpha2,my_alpha,my_ncol,has_symm) &
     486            0 : !$OMP              PRIVATE(iter,row,col,data_d,rowoff,coloff)
     487              :       CALL dbcsr_iterator_readonly_start(iter, matrix_a, dynamic=.TRUE., dynamic_byrows=.TRUE.)
     488              :       DO WHILE (dbcsr_iterator_blocks_left(iter))
     489              :          CALL dbcsr_iterator_next_block(iter, row, col, data_d, row_offset=rowoff, col_offset=coloff)
     490              :          IF (my_ncol /= 1) THEN
     491              :             CALL dgemm('N', 'N', &
     492              :                        SIZE(data_d, 1), my_ncol, SIZE(data_d, 2), &
     493              :                        my_alpha, data_d(1, 1), SIZE(data_d, 1), &
     494              :                        vec_b(coloff, 1), SIZE(vec_b, 1), &
     495              :                        1.0_dp, vec_c(rowoff, 1), SIZE(vec_c, 1))
     496              :          ELSE
     497              :             CALL dgemv('N', SIZE(data_d, 1), SIZE(data_d, 2), &
     498              :                        my_alpha, data_d(1, 1), SIZE(data_d, 1), &
     499              :                        vec_b(coloff, 1), 1, &
     500              :                        1.0_dp, vec_c(rowoff, 1), 1)
     501              :          END IF
     502              :       END DO
     503              :       CALL dbcsr_iterator_stop(iter)
     504              : !$OMP     END PARALLEL
     505              : 
     506              :       ! FIXME ... in the symmetric case, the writes to vec_c depend on the column, not the row. This makes OMP-ing more difficult
     507              :       ! needs e.g. a buffer for vec_c and a reduction of that buffer.
     508            0 :       IF (has_symm) THEN
     509            0 :          CALL dbcsr_iterator_readonly_start(iter, matrix_a)
     510            0 :          DO WHILE (dbcsr_iterator_blocks_left(iter))
     511            0 :             CALL dbcsr_iterator_next_block(iter, row, col, data_d, row_offset=rowoff, col_offset=coloff)
     512            0 :             IF (row /= col) THEN
     513            0 :                IF (my_ncol /= 1) THEN
     514              :                   CALL dgemm('T', 'N', &
     515              :                              SIZE(data_d, 2), my_ncol, SIZE(data_d, 1), &
     516              :                              my_alpha2, data_d(1, 1), SIZE(data_d, 1), &
     517              :                              vec_b(rowoff, 1), SIZE(vec_b, 1), &
     518            0 :                              1.0_dp, vec_c(coloff, 1), SIZE(vec_c, 1))
     519              :                ELSE
     520              :                   CALL dgemv('T', SIZE(data_d, 1), SIZE(data_d, 2), &
     521              :                              my_alpha2, data_d(1, 1), SIZE(data_d, 1), &
     522              :                              vec_b(rowoff, 1), 1, &
     523            0 :                              1.0_dp, vec_c(coloff, 1), 1)
     524              :                END IF
     525              :             END IF
     526              :          END DO
     527            0 :          CALL dbcsr_iterator_stop(iter)
     528              :       END IF
     529              : 
     530            0 :       CALL timestop(timing_handle)
     531            0 :    END SUBROUTINE dbcsr_multiply_local
     532              : 
     533              : ! **************************************************************************************************
     534              : !> \brief multiply a dbcsr with a fm matrix
     535              : !>
     536              : !> For backwards compatibility with BLAS XGEMM, this routine supports
     537              : !> the multiplication of matrices with incompatible dimensions.
     538              : !>
     539              : !> \param[in]  matrix         DBCSR matrix
     540              : !> \param fm_in full matrix
     541              : !> \param fm_out full matrix
     542              : !> \param[in]  ncol           nbr of columns
     543              : !> \param[in]  alpha          alpha
     544              : !> \param[in]  beta           beta
     545              : !>
     546              : ! **************************************************************************************************
     547      2989446 :    SUBROUTINE cp_dbcsr_sm_fm_multiply(matrix, fm_in, fm_out, ncol, alpha, beta)
     548              :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix
     549              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_in
     550              :       TYPE(cp_fm_type), INTENT(INOUT)                    :: fm_out
     551              :       INTEGER, INTENT(IN)                                :: ncol
     552              :       REAL(dp), INTENT(IN), OPTIONAL                     :: alpha, beta
     553              : 
     554              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_sm_fm_multiply'
     555              : 
     556              :       INTEGER                                            :: a_ncol, a_nrow, b_ncol, b_nrow, c_ncol, &
     557              :                                                             c_nrow, k_in, k_out, timing_handle, &
     558              :                                                             timing_handle_mult
     559       498241 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_blk_size_right_in, &
     560       498241 :                                                             col_blk_size_right_out, col_dist, &
     561       498241 :                                                             row_blk_size, row_dist
     562              :       TYPE(dbcsr_type)                                   :: in, out
     563              :       TYPE(dbcsr_distribution_type)                      :: dist, dist_right_in, product_dist
     564              :       REAL(dp)                                           :: my_alpha, my_beta
     565              : 
     566       498241 :       CALL timeset(routineN, timing_handle)
     567              : 
     568       498241 :       my_alpha = 1.0_dp
     569       498241 :       my_beta = 0.0_dp
     570       498241 :       IF (PRESENT(alpha)) my_alpha = alpha
     571       498241 :       IF (PRESENT(beta)) my_beta = beta
     572              : 
     573              :       ! TODO
     574       498241 :       CALL cp_fm_get_info(fm_in, ncol_global=b_ncol, nrow_global=b_nrow)
     575       498241 :       CALL cp_fm_get_info(fm_out, ncol_global=c_ncol, nrow_global=c_nrow)
     576       498241 :       CALL dbcsr_get_info(matrix, nfullrows_total=a_nrow, nfullcols_total=a_ncol)
     577              :       !WRITE(*,*) "cp_dbcsr_sm_fm_multiply: A ", a_nrow, "x", a_ncol
     578              :       !WRITE(*,*) "cp_dbcsr_sm_fm_multiply: B ", b_nrow, "x", b_ncol
     579              :       !WRITE(*,*) "cp_dbcsr_sm_fm_multiply: C ", c_nrow, "x", c_ncol
     580              : 
     581       498241 :       CALL cp_fm_get_info(fm_out, ncol_global=k_out)
     582              : 
     583       498241 :       CALL cp_fm_get_info(fm_in, ncol_global=k_in)
     584              :       !write(*,*)routineN//" -----------------------------------"
     585              :       !IF (k_in /= k_out) &
     586              :       !   WRITE(*,'(3(A,I5,1X),2(A,F5.2,1X))')&
     587              :       !   routineN//" ncol", ncol,'k_in',k_in,'k_out',k_out,&
     588              :       !   'alpha',my_alpha,'beta',my_beta
     589              : 
     590       498241 :       IF (ncol > 0 .AND. k_out > 0 .AND. k_in > 0) THEN
     591       497177 :          CALL dbcsr_get_info(matrix, row_blk_size=row_blk_size, col_blk_size=col_blk_size, distribution=dist)
     592       497177 :          CALL dbcsr_create_dist_r_unrot(dist_right_in, dist, k_in, col_blk_size_right_in)
     593              : 
     594              :          CALL dbcsr_create(in, "D", dist_right_in, dbcsr_type_no_symmetry, &
     595       497177 :                            col_blk_size, col_blk_size_right_in)
     596              : 
     597       497177 :          CALL dbcsr_distribution_get(dist, row_dist=row_dist)
     598       497177 :          CALL dbcsr_distribution_get(dist_right_in, col_dist=col_dist)
     599              :          CALL dbcsr_distribution_new(product_dist, template=dist, &
     600       497177 :                                      row_dist=row_dist, col_dist=col_dist)
     601      1491531 :          ALLOCATE (col_blk_size_right_out(SIZE(col_blk_size_right_in)))
     602      2025400 :          col_blk_size_right_out = col_blk_size_right_in
     603       497177 :          CALL match_col_sizes(col_blk_size_right_out, col_blk_size_right_in, k_out)
     604              : 
     605              :          !if (k_in .ne. k_out) then
     606              :          !   write(*,*)routineN//" in cs", col_blk_size_right_in
     607              :          !   write(*,*)routineN//" out cs", col_blk_size_right_out
     608              :          !endif
     609              : 
     610              :          CALL dbcsr_create(out, "D", product_dist, dbcsr_type_no_symmetry, &
     611       497177 :                            row_blk_size, col_blk_size_right_out)
     612              : 
     613       497177 :          CALL copy_fm_to_dbcsr(fm_in, in)
     614       497177 :          IF (ncol /= k_out .OR. my_beta /= 0.0_dp) &
     615       106998 :             CALL copy_fm_to_dbcsr(fm_out, out)
     616              : 
     617       497177 :          CALL timeset(routineN//'_core', timing_handle_mult)
     618              :          CALL dbcsr_multiply("N", "N", my_alpha, matrix, in, my_beta, out, &
     619       497177 :                              last_column=ncol)
     620       497177 :          CALL timestop(timing_handle_mult)
     621              : 
     622       497177 :          CALL copy_dbcsr_to_fm(out, fm_out)
     623              : 
     624       497177 :          CALL dbcsr_release(in)
     625       497177 :          CALL dbcsr_release(out)
     626       497177 :          DEALLOCATE (col_blk_size_right_in, col_blk_size_right_out)
     627       497177 :          CALL dbcsr_distribution_release(dist_right_in)
     628      1988708 :          CALL dbcsr_distribution_release(product_dist)
     629              : 
     630              :       END IF
     631              : 
     632       498241 :       CALL timestop(timing_handle)
     633              : 
     634       498241 :    END SUBROUTINE cp_dbcsr_sm_fm_multiply
     635              : 
     636              : ! **************************************************************************************************
     637              : !> \brief ...
     638              : !> \param sizes1 ...
     639              : !> \param sizes2 ...
     640              : !> \param full_num ...
     641              : ! **************************************************************************************************
     642       497177 :    SUBROUTINE match_col_sizes(sizes1, sizes2, full_num)
     643              :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: sizes1
     644              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: sizes2
     645              :       INTEGER, INTENT(IN)                                :: full_num
     646              : 
     647              :       INTEGER                                            :: left, n1, n2, p, rm, used
     648              : 
     649       497177 :       n1 = SIZE(sizes1)
     650       497177 :       n2 = SIZE(sizes2)
     651       497177 :       IF (n1 /= n2) &
     652            0 :          CPABORT("distributions must be equal!")
     653      1012700 :       sizes1(1:n1) = sizes2(1:n1)
     654      1012700 :       used = SUM(sizes1(1:n1))
     655              :       ! If sizes1 does not cover everything, then we increase the
     656              :       ! size of the last block; otherwise we reduce the blocks
     657              :       ! (from the end) until it is small enough.
     658       497177 :       IF (used < full_num) THEN
     659            0 :          sizes1(n1) = sizes1(n1) + full_num - used
     660              :       ELSE
     661       497177 :          left = used - full_num
     662       497177 :          p = n1
     663       497177 :          DO WHILE (left > 0 .AND. p > 0)
     664            0 :             rm = MIN(left, sizes1(p))
     665            0 :             sizes1(p) = sizes1(p) - rm
     666            0 :             left = left - rm
     667            0 :             p = p - 1
     668              :          END DO
     669              :       END IF
     670       497177 :    END SUBROUTINE match_col_sizes
     671              : 
     672              : ! **************************************************************************************************
     673              : !> \brief performs the multiplication sparse_matrix+dense_mat*dens_mat^T
     674              : !>        if matrix_g is not explicitly given, matrix_v^T will be used
     675              : !>        this can be important to save the necessary redistribute for a
     676              : !>        different matrix_g and increase performance.
     677              : !> \param sparse_matrix ...
     678              : !> \param matrix_v ...
     679              : !> \param matrix_g ...
     680              : !> \param ncol ...
     681              : !> \param alpha ...
     682              : !> \param keep_sparsity Determines if the sparsity of sparse_matrix is retained
     683              : !>        by default it is TRUE
     684              : !> \param symmetry_mode There are the following modes
     685              : !>        1:     sparse_matrix += 0.5*alpha*(v*g^T+g^T*v)    (symmetric update)
     686              : !>        -1:    sparse_matrix += 0.5*alpha*(v*g^T-g^T*v)    (skewsymmetric update)
     687              : !>        else:  sparse_matrix += alpha*v*g^T                (no symmetry, default)
     688              : !>        saves some redistribution steps
     689              : ! **************************************************************************************************
     690       243648 :    SUBROUTINE cp_dbcsr_plus_fm_fm_t(sparse_matrix, matrix_v, matrix_g, ncol, alpha, keep_sparsity, symmetry_mode)
     691              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: sparse_matrix
     692              :       TYPE(cp_fm_type), INTENT(IN)                       :: matrix_v
     693              :       TYPE(cp_fm_type), INTENT(IN), OPTIONAL             :: matrix_g
     694              :       INTEGER, INTENT(IN)                                :: ncol
     695              :       REAL(KIND=dp), INTENT(IN), OPTIONAL                :: alpha
     696              :       LOGICAL, INTENT(IN), OPTIONAL                      :: keep_sparsity
     697              :       INTEGER, INTENT(IN), OPTIONAL                      :: symmetry_mode
     698              : 
     699              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'cp_dbcsr_plus_fm_fm_t'
     700              : 
     701              :       INTEGER                                            :: k, my_symmetry_mode, nao, npcols, &
     702              :                                                             timing_handle
     703       243648 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size_left, col_dist_left, &
     704       243648 :                                                             row_blk_size, row_dist
     705              :       LOGICAL                                            :: check_product, my_keep_sparsity
     706              :       REAL(KIND=dp)                                      :: my_alpha, norm
     707              :       TYPE(cp_fm_struct_type), POINTER                   :: fm_struct_tmp
     708              :       TYPE(cp_fm_type)                                   :: fm_matrix
     709              :       TYPE(dbcsr_distribution_type)                      :: dist_left, sparse_dist
     710              :       TYPE(dbcsr_type)                                   :: mat_g, mat_v, sparse_matrix2, &
     711              :                                                             sparse_matrix3
     712              : 
     713       243648 :       check_product = .FALSE.
     714              : 
     715       243648 :       CALL timeset(routineN, timing_handle)
     716              : 
     717       243648 :       my_keep_sparsity = .TRUE.
     718       243648 :       IF (PRESENT(keep_sparsity)) my_keep_sparsity = keep_sparsity
     719              : 
     720       243648 :       my_symmetry_mode = 0
     721       243648 :       IF (PRESENT(symmetry_mode)) my_symmetry_mode = symmetry_mode
     722              : 
     723       243648 :       NULLIFY (col_dist_left)
     724              : 
     725       243648 :       IF (ncol > 0) THEN
     726       242074 :          IF (.NOT. dbcsr_valid_index(sparse_matrix)) &
     727            0 :             CPABORT("sparse_matrix must pre-exist")
     728              :          !
     729              :          ! Setup matrix_v
     730       242074 :          CALL cp_fm_get_info(matrix_v, ncol_global=k)
     731              :          !WRITE(*,*)routineN//'truncated mult k, ncol',k,ncol,' PRESENT (matrix_g)',PRESENT (matrix_g)
     732       242074 :          CALL dbcsr_get_info(sparse_matrix, distribution=sparse_dist)
     733       242074 :          CALL dbcsr_distribution_get(sparse_dist, npcols=npcols, row_dist=row_dist)
     734       242074 :          CALL create_bl_distribution(col_dist_left, col_blk_size_left, k, npcols)
     735              :          CALL dbcsr_distribution_new(dist_left, template=sparse_dist, &
     736       242074 :                                      row_dist=row_dist, col_dist=col_dist_left)
     737       242074 :          DEALLOCATE (col_dist_left)
     738       242074 :          CALL dbcsr_get_info(sparse_matrix, row_blk_size=row_blk_size)
     739              :          CALL dbcsr_create(mat_v, "DBCSR matrix_v", dist_left, dbcsr_type_no_symmetry, &
     740       242074 :                            row_blk_size, col_blk_size_left)
     741       242074 :          CALL copy_fm_to_dbcsr(matrix_v, mat_v)
     742       242074 :          CALL dbcsr_verify_matrix(mat_v)
     743              :          !
     744              :          ! Setup matrix_g
     745       242074 :          IF (PRESENT(matrix_g)) THEN
     746              :             CALL dbcsr_create(mat_g, "DBCSR matrix_g", dist_left, dbcsr_type_no_symmetry, &
     747       116561 :                               row_blk_size, col_blk_size_left)
     748       116561 :             CALL copy_fm_to_dbcsr(matrix_g, mat_g)
     749              :          END IF
     750              :          !
     751       242074 :          DEALLOCATE (col_blk_size_left)
     752       242074 :          CALL dbcsr_distribution_release(dist_left)
     753              :          !
     754              :          !
     755              :          IF (check_product) THEN
     756              :             CALL cp_fm_get_info(matrix_v, nrow_global=nao)
     757              :             CALL cp_fm_struct_create(fm_struct_tmp, context=matrix_v%matrix_struct%context, nrow_global=nao, &
     758              :                                      ncol_global=nao, para_env=matrix_v%matrix_struct%para_env)
     759              :             CALL cp_fm_create(fm_matrix, fm_struct_tmp, name="fm matrix")
     760              :             CALL cp_fm_struct_release(fm_struct_tmp)
     761              :             CALL copy_dbcsr_to_fm(sparse_matrix, fm_matrix)
     762              :             CALL dbcsr_copy(sparse_matrix3, sparse_matrix)
     763              :          END IF
     764              :          !
     765       242074 :          my_alpha = 1.0_dp
     766       242074 :          IF (PRESENT(alpha)) my_alpha = alpha
     767       242074 :          IF (PRESENT(matrix_g)) THEN
     768       116561 :             IF (my_symmetry_mode == 1) THEN
     769              :                ! Symmetric mode
     770              :                CALL dbcsr_multiply("N", "T", 0.5_dp*my_alpha, mat_v, mat_g, &
     771              :                                    1.0_dp, sparse_matrix, &
     772              :                                    retain_sparsity=my_keep_sparsity, &
     773        41544 :                                    last_k=ncol)
     774              :                CALL dbcsr_multiply("N", "T", 0.5_dp*my_alpha, mat_g, mat_v, &
     775              :                                    1.0_dp, sparse_matrix, &
     776              :                                    retain_sparsity=my_keep_sparsity, &
     777        41544 :                                    last_k=ncol)
     778        75017 :             ELSE IF (my_symmetry_mode == -1) THEN
     779              :                ! Skewsymmetric mode
     780              :                CALL dbcsr_multiply("N", "T", 0.5_dp*my_alpha, mat_v, mat_g, &
     781              :                                    1.0_dp, sparse_matrix, &
     782              :                                    retain_sparsity=my_keep_sparsity, &
     783         2362 :                                    last_k=ncol)
     784              :                CALL dbcsr_multiply("N", "T", -0.5_dp*my_alpha, mat_g, mat_v, &
     785              :                                    1.0_dp, sparse_matrix, &
     786              :                                    retain_sparsity=my_keep_sparsity, &
     787         2362 :                                    last_k=ncol)
     788              :             ELSE
     789              :                ! Normal mode
     790              :                CALL dbcsr_multiply("N", "T", my_alpha, mat_v, mat_g, &
     791              :                                    1.0_dp, sparse_matrix, &
     792              :                                    retain_sparsity=my_keep_sparsity, &
     793        72655 :                                    last_k=ncol)
     794              :             END IF
     795              :          ELSE
     796              :             CALL dbcsr_multiply("N", "T", my_alpha, mat_v, mat_v, &
     797              :                                 1.0_dp, sparse_matrix, &
     798              :                                 retain_sparsity=my_keep_sparsity, &
     799       125513 :                                 last_k=ncol)
     800              :          END IF
     801              : 
     802              :          IF (check_product) THEN
     803              :             IF (PRESENT(matrix_g)) THEN
     804              :                IF (my_symmetry_mode == 1) THEN
     805              :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, 0.5_dp*my_alpha, matrix_v, matrix_g, &
     806              :                                   1.0_dp, fm_matrix)
     807              :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, 0.5_dp*my_alpha, matrix_g, matrix_v, &
     808              :                                   1.0_dp, fm_matrix)
     809              :                ELSE IF (my_symmetry_mode == -1) THEN
     810              :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, 0.5_dp*my_alpha, matrix_v, matrix_g, &
     811              :                                   1.0_dp, fm_matrix)
     812              :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, -0.5_dp*my_alpha, matrix_g, matrix_v, &
     813              :                                   1.0_dp, fm_matrix)
     814              :                ELSE
     815              :                   CALL cp_fm_gemm("N", "T", nao, nao, ncol, my_alpha, matrix_v, matrix_g, &
     816              :                                   1.0_dp, fm_matrix)
     817              :                END IF
     818              :             ELSE
     819              :                CALL cp_fm_gemm("N", "T", nao, nao, ncol, my_alpha, matrix_v, matrix_v, &
     820              :                                1.0_dp, fm_matrix)
     821              :             END IF
     822              : 
     823              :             CALL dbcsr_copy(sparse_matrix2, sparse_matrix)
     824              :             CALL dbcsr_scale(sparse_matrix2, alpha_scalar=0.0_dp)
     825              :             CALL copy_fm_to_dbcsr(fm_matrix, sparse_matrix2, keep_sparsity=my_keep_sparsity)
     826              :             CALL dbcsr_add(sparse_matrix2, sparse_matrix, alpha_scalar=1.0_dp, &
     827              :                            beta_scalar=-1.0_dp)
     828              :             norm = dbcsr_frobenius_norm(sparse_matrix2)
     829              :             WRITE (*, *) 'nao=', nao, ' k=', k, ' ncol=', ncol, ' my_alpha=', my_alpha
     830              :             WRITE (*, *) 'PRESENT (matrix_g)', PRESENT(matrix_g)
     831              :             WRITE (*, *) 'matrix_type=', dbcsr_get_matrix_type(sparse_matrix)
     832              :             WRITE (*, *) 'norm(sm+alpha*v*g^t - fm+alpha*v*g^t)/n=', norm/REAL(nao, dp)
     833              :             IF (norm/REAL(nao, dp) > 1e-12_dp) THEN
     834              :                !WRITE(*,*) 'fm_matrix'
     835              :                !DO j=1,SIZE(fm_matrix%local_data,2)
     836              :                !   DO i=1,SIZE(fm_matrix%local_data,1)
     837              :                !      WRITE(*,'(A,I3,A,I3,A,E26.16,A)') 'a(',i,',',j,')=',fm_matrix%local_data(i,j),';'
     838              :                !   ENDDO
     839              :                !ENDDO
     840              :                !WRITE(*,*) 'mat_v'
     841              :                !CALL dbcsr_print(mat_v)
     842              :                !WRITE(*,*) 'mat_g'
     843              :                !CALL dbcsr_print(mat_g)
     844              :                !WRITE(*,*) 'sparse_matrix'
     845              :                !CALL dbcsr_print(sparse_matrix)
     846              :                !WRITE(*,*) 'sparse_matrix2 (-sm + sparse(fm))'
     847              :                !CALL dbcsr_print(sparse_matrix2)
     848              :                !WRITE(*,*) 'sparse_matrix3 (copy of sm input)'
     849              :                !CALL dbcsr_print(sparse_matrix3)
     850              :                !stop
     851              :             END IF
     852              :             CALL dbcsr_release(sparse_matrix2)
     853              :             CALL dbcsr_release(sparse_matrix3)
     854              :             CALL cp_fm_release(fm_matrix)
     855              :          END IF
     856       242074 :          CALL dbcsr_release(mat_v)
     857       242074 :          IF (PRESENT(matrix_g)) CALL dbcsr_release(mat_g)
     858              :       END IF
     859       243648 :       CALL timestop(timing_handle)
     860              : 
     861       243648 :    END SUBROUTINE cp_dbcsr_plus_fm_fm_t
     862              : 
     863              : ! **************************************************************************************************
     864              : !> \brief Utility function to copy a specially shaped fm to dbcsr_matrix
     865              : !>        The result matrix will be the matrix in dbcsr format
     866              : !>        with the row blocks sizes according to the block_sizes of the template
     867              : !>        and the col blocks sizes evenly blocked with the internal dbcsr conversion
     868              : !>        size (32 is the current default)
     869              : !> \param matrix ...
     870              : !> \param fm_in ...
     871              : !> \param template ...
     872              : ! **************************************************************************************************
     873        15255 :    SUBROUTINE cp_fm_to_dbcsr_row_template(matrix, fm_in, template)
     874              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
     875              :       TYPE(cp_fm_type), INTENT(IN)                       :: fm_in
     876              :       TYPE(dbcsr_type), INTENT(IN)                       :: template
     877              : 
     878              :       INTEGER                                            :: k_in
     879         5085 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size_right_in, row_blk_size
     880              :       TYPE(dbcsr_distribution_type)                      :: dist_right_in, tmpl_dist
     881              : 
     882         5085 :       CALL cp_fm_get_info(fm_in, ncol_global=k_in)
     883              : 
     884         5085 :       CALL dbcsr_get_info(template, distribution=tmpl_dist)
     885         5085 :       CALL dbcsr_create_dist_r_unrot(dist_right_in, tmpl_dist, k_in, col_blk_size_right_in)
     886         5085 :       CALL dbcsr_get_info(template, row_blk_size=row_blk_size)
     887              :       CALL dbcsr_create(matrix, "D", dist_right_in, dbcsr_type_no_symmetry, &
     888         5085 :                         row_blk_size, col_blk_size_right_in)
     889              : 
     890         5085 :       CALL copy_fm_to_dbcsr(fm_in, matrix)
     891         5085 :       DEALLOCATE (col_blk_size_right_in)
     892         5085 :       CALL dbcsr_distribution_release(dist_right_in)
     893              : 
     894         5085 :    END SUBROUTINE cp_fm_to_dbcsr_row_template
     895              : 
     896              : ! **************************************************************************************************
     897              : !> \brief Utility function to create an arbitrary shaped dbcsr matrix
     898              : !>        with the same processor grid as the template matrix
     899              : !>        both row sizes and col sizes are evenly blocked with the internal
     900              : !>        dbcsr_conversion size (32 is the current default)
     901              : !> \param matrix dbcsr matrix to be created
     902              : !> \param template template dbcsr matrix giving its mp_env
     903              : !> \param m global row size of output matrix
     904              : !> \param n global col size of output matrix
     905              : !> \param sym ...
     906              : ! **************************************************************************************************
     907       303456 :    SUBROUTINE cp_dbcsr_m_by_n_from_template(matrix, template, m, n, sym)
     908              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix, template
     909              :       INTEGER, INTENT(IN)                                :: m, n
     910              :       CHARACTER, INTENT(IN), OPTIONAL                    :: sym
     911              : 
     912              :       CHARACTER                                          :: mysym
     913              :       INTEGER                                            :: npcols, nprows
     914       151728 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
     915       151728 :                                                             row_dist
     916              :       TYPE(dbcsr_distribution_type)                      :: dist_m_n, tmpl_dist
     917              : 
     918       151728 :       CALL dbcsr_get_info(template, matrix_type=mysym, distribution=tmpl_dist)
     919              : 
     920       151728 :       IF (PRESENT(sym)) mysym = sym
     921              : 
     922       151728 :       NULLIFY (row_dist, col_dist)
     923       151728 :       NULLIFY (row_blk_size, col_blk_size)
     924              :       !NULLIFY (row_cluster, col_cluster)
     925              : 
     926       151728 :       CALL dbcsr_distribution_get(tmpl_dist, nprows=nprows, npcols=npcols)
     927       151728 :       CALL create_bl_distribution(row_dist, row_blk_size, m, nprows)
     928       151728 :       CALL create_bl_distribution(col_dist, col_blk_size, n, npcols)
     929              :       CALL dbcsr_distribution_new(dist_m_n, template=tmpl_dist, &
     930              :                                   row_dist=row_dist, col_dist=col_dist, &
     931              :                                   !row_cluster=row_cluster, col_cluster=col_cluster, &
     932       151728 :                                   reuse_arrays=.TRUE.)
     933              : 
     934              :       CALL dbcsr_create(matrix, "m_n_template", dist_m_n, mysym, &
     935       151728 :                         row_blk_size, col_blk_size, reuse_arrays=.TRUE.)
     936       151728 :       CALL dbcsr_distribution_release(dist_m_n)
     937              : 
     938       151728 :    END SUBROUTINE cp_dbcsr_m_by_n_from_template
     939              : 
     940              : ! **************************************************************************************************
     941              : !> \brief Utility function to create dbcsr matrix, m x n matrix (n arbitrary)
     942              : !>        with the same processor grid and row distribution  as the template matrix
     943              : !>        col sizes are evenly blocked with the internal
     944              : !>        dbcsr_conversion size (32 is the current default)
     945              : !> \param matrix dbcsr matrix to be created
     946              : !> \param template template dbcsr matrix giving its mp_env
     947              : !> \param n global col size of output matrix
     948              : !> \param sym ...
     949              : ! **************************************************************************************************
     950       556392 :    SUBROUTINE cp_dbcsr_m_by_n_from_row_template(matrix, template, n, sym)
     951              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix, template
     952              :       INTEGER                                            :: n
     953              :       CHARACTER, OPTIONAL                                :: sym
     954              : 
     955              :       CHARACTER                                          :: mysym
     956              :       INTEGER                                            :: npcols
     957       139098 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, col_dist, row_blk_size, &
     958       139098 :                                                             row_dist
     959              :       TYPE(dbcsr_distribution_type)                      :: dist_m_n, tmpl_dist
     960              : 
     961       278196 :       mysym = dbcsr_get_matrix_type(template)
     962       139098 :       IF (PRESENT(sym)) mysym = sym
     963              : 
     964       139098 :       CALL dbcsr_get_info(template, distribution=tmpl_dist)
     965              :       CALL dbcsr_distribution_get(tmpl_dist, &
     966              :                                   npcols=npcols, &
     967       139098 :                                   row_dist=row_dist)
     968              : 
     969       139098 :       NULLIFY (col_dist, col_blk_size)
     970       139098 :       CALL create_bl_distribution(col_dist, col_blk_size, n, npcols)
     971              :       CALL dbcsr_distribution_new(dist_m_n, template=tmpl_dist, &
     972       139098 :                                   row_dist=row_dist, col_dist=col_dist)
     973              : 
     974       139098 :       CALL dbcsr_get_info(template, row_blk_size=row_blk_size)
     975       139098 :       CALL dbcsr_create(matrix, "m_n_template", dist_m_n, mysym, row_blk_size, col_blk_size)
     976              : 
     977       139098 :       DEALLOCATE (col_dist, col_blk_size)
     978       139098 :       CALL dbcsr_distribution_release(dist_m_n)
     979              : 
     980       139098 :    END SUBROUTINE cp_dbcsr_m_by_n_from_row_template
     981              : 
     982              : ! **************************************************************************************************
     983              : !> \brief Distributes elements into blocks and into bins
     984              : !>
     985              : !> \param[out] block_distribution       block distribution to bins
     986              : !> \param[out] block_size       sizes of blocks
     987              : !> \param[in] nelements number of elements to bin
     988              : !> \param[in] nbins             number of bins
     989              : !> \par Term clarification
     990              : !>      An example: blocks are atom blocks and bins are process rows/columns.
     991              : ! **************************************************************************************************
     992      1186890 :    SUBROUTINE create_bl_distribution(block_distribution, &
     993              :                                      block_size, nelements, nbins)
     994              :       INTEGER, DIMENSION(:), INTENT(OUT), POINTER        :: block_distribution, block_size
     995              :       INTEGER, INTENT(IN)                                :: nelements, nbins
     996              : 
     997              :       CHARACTER(len=*), PARAMETER :: routineN = 'create_bl_distribution', &
     998              :          routineP = moduleN//':'//routineN
     999              : 
    1000              :       INTEGER                                            :: bin, blk_layer, element_stack, els, &
    1001              :                                                             estimated_blocks, max_blocks_per_bin, &
    1002              :                                                             nblks, nblocks, stat
    1003      1186890 :       INTEGER, DIMENSION(:), POINTER                     :: blk_dist, blk_sizes
    1004              : 
    1005              : !   ---------------------------------------------------------------------------
    1006              : 
    1007      1186890 :       NULLIFY (block_distribution)
    1008      1186890 :       NULLIFY (block_size)
    1009              :       ! Define the sizes on which we build the distribution.
    1010      1186890 :       IF (nelements > 0) THEN
    1011              : 
    1012      1178798 :          nblocks = CEILING(REAL(nelements, KIND=dp)/REAL(max_elements_per_block, KIND=dp))
    1013      1178798 :          max_blocks_per_bin = CEILING(REAL(nblocks, KIND=dp)/REAL(nbins, KIND=dp))
    1014              : 
    1015              :          IF (debug_mod) THEN
    1016              :             WRITE (*, '(1X,A,1X,A,I7,A,I7,A)') routineP, "For", nelements, &
    1017              :                " elements and", nbins, " bins"
    1018              :             WRITE (*, '(1X,A,1X,A,I7,A)') routineP, "There are", &
    1019              :                max_elements_per_block, " max elements per block"
    1020              :             WRITE (*, '(1X,A,1X,A,I7,A)') routineP, "There are", &
    1021              :                nblocks, " blocks"
    1022              :             WRITE (*, '(1X,A,1X,A,I7,A)') routineP, "There are", &
    1023              :                max_blocks_per_bin, " max blocks/bin"
    1024              :          END IF
    1025              : 
    1026      1178798 :          estimated_blocks = max_blocks_per_bin*nbins
    1027      3536394 :          ALLOCATE (blk_dist(estimated_blocks), stat=stat)
    1028      1178798 :          IF (stat /= 0) &
    1029            0 :             CPABORT("blk_dist")
    1030      2357596 :          ALLOCATE (blk_sizes(estimated_blocks), stat=stat)
    1031              :          IF (stat /= 0) &
    1032            0 :             CPABORT("blk_sizes")
    1033      1178798 :          element_stack = 0
    1034      1178798 :          nblks = 0
    1035      2437694 :          DO blk_layer = 1, max_blocks_per_bin
    1036      3828912 :             DO bin = 0, nbins - 1
    1037      1391218 :                els = MIN(max_elements_per_block, nelements - element_stack)
    1038      2650114 :                IF (els > 0) THEN
    1039      1269752 :                   element_stack = element_stack + els
    1040      1269752 :                   nblks = nblks + 1
    1041      1269752 :                   blk_dist(nblks) = bin
    1042      1269752 :                   blk_sizes(nblks) = els
    1043              :                   IF (debug_mod) WRITE (*, '(1X,A,I5,A,I5,A,I5)') routineP//" Assigning", &
    1044              :                      els, " elements as block", nblks, " to bin", bin
    1045              :                END IF
    1046              :             END DO
    1047              :          END DO
    1048              :          ! Create the output arrays.
    1049      1178798 :          IF (nblks == estimated_blocks) THEN
    1050      1057332 :             block_distribution => blk_dist
    1051      1057332 :             block_size => blk_sizes
    1052              :          ELSE
    1053       364398 :             ALLOCATE (block_distribution(nblks), stat=stat)
    1054              :             IF (stat /= 0) &
    1055            0 :                CPABORT("blk_dist")
    1056       489144 :             block_distribution(:) = blk_dist(1:nblks)
    1057       121466 :             DEALLOCATE (blk_dist)
    1058       242932 :             ALLOCATE (block_size(nblks), stat=stat)
    1059              :             IF (stat /= 0) &
    1060            0 :                CPABORT("blk_sizes")
    1061       489144 :             block_size(:) = blk_sizes(1:nblks)
    1062       121466 :             DEALLOCATE (blk_sizes)
    1063              :          END IF
    1064              :       ELSE
    1065         8092 :          ALLOCATE (block_distribution(0), stat=stat)
    1066              :          IF (stat /= 0) &
    1067            0 :             CPABORT("blk_dist")
    1068         8092 :          ALLOCATE (block_size(0), stat=stat)
    1069              :          IF (stat /= 0) &
    1070            0 :             CPABORT("blk_sizes")
    1071              :       END IF
    1072              : 1579  FORMAT(I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5, 1X, I5)
    1073              :       IF (debug_mod) THEN
    1074              :          WRITE (*, '(1X,A,A)') routineP//" Distribution"
    1075              :          WRITE (*, 1579) block_distribution(:)
    1076              :          WRITE (*, '(1X,A,A)') routineP//" Sizes"
    1077              :          WRITE (*, 1579) block_size(:)
    1078              :       END IF
    1079      1186890 :    END SUBROUTINE create_bl_distribution
    1080              : 
    1081              : ! **************************************************************************************************
    1082              : !> \brief Creates a new distribution for the right matrix in a matrix
    1083              : !>        multiplication with unrotated grid.
    1084              : !> \param[out] dist_right     new distribution for the right matrix
    1085              : !> \param[in] dist_left       the distribution of the left matrix
    1086              : !> \param[in] ncolumns        number of columns in right matrix
    1087              : !> \param[out] right_col_blk_sizes      sizes of blocks in the created column
    1088              : !> \par The new row distribution for the right matrix is the same as the row
    1089              : !>      distribution of the left matrix, while the column distribution is
    1090              : !>      created so that it is appropriate to the parallel environment.
    1091              : ! **************************************************************************************************
    1092       502262 :    SUBROUTINE dbcsr_create_dist_r_unrot(dist_right, dist_left, ncolumns, &
    1093              :                                         right_col_blk_sizes)
    1094              :       TYPE(dbcsr_distribution_type), INTENT(OUT)         :: dist_right
    1095              :       TYPE(dbcsr_distribution_type), INTENT(IN)          :: dist_left
    1096              :       INTEGER, INTENT(IN)                                :: ncolumns
    1097              :       INTEGER, DIMENSION(:), INTENT(OUT), POINTER        :: right_col_blk_sizes
    1098              : 
    1099              :       INTEGER                                            :: multiplicity, ncols, nimages, npcols, &
    1100              :                                                             nprows
    1101              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: tmp_images
    1102       502262 :       INTEGER, DIMENSION(:), POINTER                     :: old_col_dist, right_col_dist, &
    1103       502262 :                                                             right_row_dist
    1104              : 
    1105              :       CALL dbcsr_distribution_get(dist_left, &
    1106              :                                   ncols=ncols, &
    1107              :                                   col_dist=old_col_dist, &
    1108              :                                   nprows=nprows, &
    1109       502262 :                                   npcols=npcols)
    1110              : 
    1111              :       ! Create the column distribution
    1112       502262 :       CALL create_bl_distribution(right_col_dist, right_col_blk_sizes, ncolumns, npcols)
    1113              :       ! Create an even row distribution.
    1114      2009048 :       ALLOCATE (right_row_dist(ncols), tmp_images(ncols))
    1115       502262 :       nimages = lcm(nprows, npcols)/nprows
    1116       502262 :       multiplicity = nprows/gcd(nprows, npcols)
    1117       502262 :       CALL rebin_distribution(right_row_dist, tmp_images, old_col_dist, nprows, multiplicity, nimages)
    1118              : 
    1119              :       CALL dbcsr_distribution_new(dist_right, &
    1120              :                                   template=dist_left, &
    1121              :                                   row_dist=right_row_dist, &
    1122              :                                   col_dist=right_col_dist, &
    1123              :                                   !row_cluster=dummy,&
    1124              :                                   !col_cluster=dummy,&
    1125       502262 :                                   reuse_arrays=.TRUE.)
    1126       502262 :       DEALLOCATE (tmp_images)
    1127       502262 :    END SUBROUTINE dbcsr_create_dist_r_unrot
    1128              : 
    1129              : ! **************************************************************************************************
    1130              : !> \brief Makes new distribution with decimation and multiplicity
    1131              : !> \param[out] new_bins      new real distribution
    1132              : !> \param[out] images        new image distribution
    1133              : !> \param[in] source_bins    Basis for the new distribution and images
    1134              : !> \param[in] nbins          number of bins in the new real distribution
    1135              : !> \param[in] multiplicity   multiplicity
    1136              : !> \param[in] nimages        number of images in the new distribution
    1137              : !> \par Definition of multiplicity and nimages
    1138              : !>      Multiplicity and decimation (number of images) are used to
    1139              : !>      match process grid coordinates on non-square process
    1140              : !>      grids. Given source_nbins and target_nbins, their relation is
    1141              : !>                source_nbins * target_multiplicity
    1142              : !>              = target_nbins * target_nimages.
    1143              : !>      It is best when both multiplicity and nimages are small. To
    1144              : !>      get these two factors, then, one can use the following formulas:
    1145              : !>          nimages      = lcm(source_nbins, target_nbins) / target_nbins
    1146              : !>          multiplicity = target_nbins / gcd(source_nbins, target_nbins)
    1147              : !>      from the target's point of view (nimages = target_nimages).
    1148              : !> \par Mapping
    1149              : !>      The new distribution comprises of real bins and images within
    1150              : !>      bins. These can be view as target_nbins*nimages virtual
    1151              : !>      columns. These same virtual columns are also
    1152              : !>      source_nbins*multiplicity in number. Therefore these virtual
    1153              : !>      columns are mapped from source_nbins*multiplicity onto
    1154              : !>      target_bins*nimages (each target bin has nimages images):
    1155              : !>      Source 4: |1 2 3|4 5 6|7 8 9|A B C| (4*3)
    1156              : !>      Target 6: |1 2|3 4|5 6|7 8|9 A|B C| (6*2)
    1157              : !>      multiplicity=3, nimages=2, 12 virtual columns (1-C).
    1158              : !>      Source bin elements are evenly mapped into one of multiplicity
    1159              : !>      virtual columns. Other (non-even, block-size aware) mappings
    1160              : !>      could be better.
    1161              : ! **************************************************************************************************
    1162       502262 :    SUBROUTINE rebin_distribution(new_bins, images, source_bins, &
    1163              :                                  nbins, multiplicity, nimages)
    1164              :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: new_bins, images
    1165              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: source_bins
    1166              :       INTEGER, INTENT(IN)                                :: nbins, multiplicity, nimages
    1167              : 
    1168              :       INTEGER                                            :: bin, i, old_nbins, virtual_bin
    1169       502262 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: bin_multiplier
    1170              : 
    1171              : !   ---------------------------------------------------------------------------
    1172              : 
    1173       502262 :       IF (MOD(nbins*nimages, multiplicity) /= 0) THEN
    1174            0 :          CPWARN("mulitplicity is not divisor of new process grid coordinate")
    1175              :       END IF
    1176       502262 :       old_nbins = (nbins*nimages)/multiplicity
    1177      1506786 :       ALLOCATE (bin_multiplier(0:old_nbins - 1))
    1178       502262 :       bin_multiplier(:) = 0
    1179      2397899 :       DO i = 1, SIZE(new_bins)
    1180      1895637 :          IF (i <= SIZE(source_bins)) THEN
    1181      1895637 :             bin = source_bins(i)
    1182              :          ELSE
    1183              :             ! Fill remainder with a cyclic distribution
    1184            0 :             bin = MOD(i, old_nbins)
    1185              :          END IF
    1186      1895637 :          virtual_bin = bin*multiplicity + bin_multiplier(bin)
    1187      1895637 :          new_bins(i) = virtual_bin/nimages
    1188      1895637 :          images(i) = 1 + MOD(virtual_bin, nimages)
    1189      1895637 :          bin_multiplier(bin) = bin_multiplier(bin) + 1
    1190      2397899 :          IF (bin_multiplier(bin) >= multiplicity) THEN
    1191       813269 :             bin_multiplier(bin) = 0
    1192              :          END IF
    1193              :       END DO
    1194       502262 :    END SUBROUTINE rebin_distribution
    1195              : 
    1196              : ! **************************************************************************************************
    1197              : !> \brief Creates a block-cyclic compatible distribution
    1198              : !>
    1199              : !>        All blocks in a dimension, except for possibly the last
    1200              : !>        block, have the same size.
    1201              : !> \param[out] dist           the elemental distribution
    1202              : !> \param[in] nrows           number of full rows
    1203              : !> \param[in] ncolumns        number of full columns
    1204              : !> \param[in] nrow_block      size of row blocks
    1205              : !> \param[in] ncol_block      size of column blocks
    1206              : !> \param group_handle ...
    1207              : !> \param pgrid ...
    1208              : !> \param[out] row_blk_sizes  row block sizes
    1209              : !> \param[out] col_blk_sizes  column block sizes
    1210              : ! **************************************************************************************************
    1211      2902208 :    SUBROUTINE dbcsr_create_dist_block_cyclic(dist, nrows, ncolumns, &
    1212              :                                              nrow_block, ncol_block, group_handle, pgrid, row_blk_sizes, col_blk_sizes)
    1213              :       TYPE(dbcsr_distribution_type), INTENT(OUT)         :: dist
    1214              :       INTEGER, INTENT(IN)                                :: nrows, ncolumns, nrow_block, ncol_block, &
    1215              :                                                             group_handle
    1216              :       INTEGER, DIMENSION(:, :), POINTER                  :: pgrid
    1217              :       INTEGER, DIMENSION(:), INTENT(OUT), POINTER        :: row_blk_sizes, col_blk_sizes
    1218              : 
    1219              :       CHARACTER(len=*), PARAMETER :: routineN = 'dbcsr_create_dist_block_cyclic'
    1220              : 
    1221              :       INTEGER                                            :: nblkcols, nblkrows, npcols, nprows, &
    1222              :                                                             pdim, sz
    1223      2902208 :       INTEGER, DIMENSION(:), POINTER                     :: cd_data, rd_data
    1224              : 
    1225              :       ! Row sizes
    1226      2902208 :       IF (nrow_block == 0) THEN
    1227              :          nblkrows = 0
    1228              :          sz = 0
    1229              :       ELSE
    1230      2902208 :          nblkrows = nrows/nrow_block
    1231      2902208 :          sz = MOD(nrows, nrow_block)
    1232              :       END IF
    1233      2902208 :       IF (sz > 0) nblkrows = nblkrows + 1
    1234     11607728 :       ALLOCATE (row_blk_sizes(nblkrows), rd_data(nblkrows))
    1235     11638968 :       row_blk_sizes = nrow_block
    1236      2902208 :       IF (sz /= 0) row_blk_sizes(nblkrows) = sz
    1237              : 
    1238              :       ! Column sizes
    1239      2902208 :       IF (ncol_block == 0) THEN
    1240              :          nblkcols = 0
    1241              :          sz = 0
    1242              :       ELSE
    1243      2902208 :          nblkcols = ncolumns/ncol_block
    1244      2902208 :          sz = MOD(ncolumns, ncol_block)
    1245              :       END IF
    1246      2902208 :       IF (sz > 0) nblkcols = nblkcols + 1
    1247     11605232 :       ALLOCATE (col_blk_sizes(nblkcols), cd_data(nblkcols))
    1248      9449002 :       col_blk_sizes = ncol_block
    1249      2902208 :       IF (sz /= 0) col_blk_sizes(nblkcols) = sz
    1250              :       !
    1251              :       IF (debug_mod) THEN
    1252              :          WRITE (*, *) routineN//" nrows,nrow_block,nblkrows=", &
    1253              :             nrows, nrow_block, nblkrows
    1254              :          WRITE (*, *) routineN//" ncols,ncol_block,nblkcols=", &
    1255              :             ncolumns, ncol_block, nblkcols
    1256              :       END IF
    1257              :       ! Calculate process row distribution
    1258      2902208 :       nprows = SIZE(pgrid, 1)
    1259      8631984 :       DO pdim = 0, MIN(nprows - 1, nblkrows - 1)
    1260     17368744 :          rd_data(1 + pdim:nblkrows:nprows) = pdim
    1261              :       END DO
    1262              :       ! Calculate process column distribution
    1263      2902208 :       npcols = SIZE(pgrid, 2)
    1264      5802616 :       DO pdim = 0, MIN(npcols - 1, nblkcols - 1)
    1265     12349410 :          cd_data(1 + pdim:nblkcols:npcols) = pdim
    1266              :       END DO
    1267              :       !
    1268              :       IF (debug_mod) THEN
    1269              :          WRITE (*, *) routineN//" row_dist", &
    1270              :             rd_data
    1271              :          WRITE (*, *) routineN//" col_dist", &
    1272              :             cd_data
    1273              :       END IF
    1274              :       !
    1275              :       CALL dbcsr_distribution_new(dist, &
    1276              :                                   group=group_handle, pgrid=pgrid, &
    1277              :                                   row_dist=rd_data, &
    1278              :                                   col_dist=cd_data, &
    1279      2902208 :                                   reuse_arrays=.TRUE.)
    1280              : 
    1281      2902208 :    END SUBROUTINE dbcsr_create_dist_block_cyclic
    1282              : 
    1283              : ! **************************************************************************************************
    1284              : !> \brief   Allocate and initialize a real matrix 1-dimensional set.
    1285              : !> \param[in,out] matrix_set  Set containing the DBCSR matrices
    1286              : !> \param[in] nmatrix         Size of set
    1287              : !> \par History
    1288              : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1289              : ! **************************************************************************************************
    1290       195323 :    SUBROUTINE allocate_dbcsr_matrix_set_1d(matrix_set, nmatrix)
    1291              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_set
    1292              :       INTEGER, INTENT(IN)                                :: nmatrix
    1293              : 
    1294              :       INTEGER                                            :: imatrix
    1295              : 
    1296       195323 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1297      1292188 :       ALLOCATE (matrix_set(nmatrix))
    1298       901542 :       DO imatrix = 1, nmatrix
    1299       901542 :          NULLIFY (matrix_set(imatrix)%matrix)
    1300              :       END DO
    1301       195323 :    END SUBROUTINE allocate_dbcsr_matrix_set_1d
    1302              : 
    1303              : ! **************************************************************************************************
    1304              : !> \brief   Allocate and initialize a real matrix 2-dimensional set.
    1305              : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1306              : !> \param[in] nmatrix         Size of set
    1307              : !> \param mmatrix ...
    1308              : !> \par History
    1309              : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1310              : ! **************************************************************************************************
    1311       185952 :    SUBROUTINE allocate_dbcsr_matrix_set_2d(matrix_set, nmatrix, mmatrix)
    1312              :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: matrix_set
    1313              :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix
    1314              : 
    1315              :       INTEGER                                            :: imatrix, jmatrix
    1316              : 
    1317       185952 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1318      3900968 :       ALLOCATE (matrix_set(nmatrix, mmatrix))
    1319      1695592 :       DO jmatrix = 1, mmatrix
    1320      3343112 :          DO imatrix = 1, nmatrix
    1321      3157160 :             NULLIFY (matrix_set(imatrix, jmatrix)%matrix)
    1322              :          END DO
    1323              :       END DO
    1324       185952 :    END SUBROUTINE allocate_dbcsr_matrix_set_2d
    1325              : 
    1326              : ! **************************************************************************************************
    1327              : !> \brief   Allocate and initialize a real matrix 3-dimensional set.
    1328              : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1329              : !> \param[in] nmatrix         Size of set
    1330              : !> \param mmatrix ...
    1331              : !> \param pmatrix ...
    1332              : !> \par History
    1333              : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1334              : ! **************************************************************************************************
    1335            0 :    SUBROUTINE allocate_dbcsr_matrix_set_3d(matrix_set, nmatrix, mmatrix, pmatrix)
    1336              :       TYPE(dbcsr_p_type), DIMENSION(:, :, :), POINTER    :: matrix_set
    1337              :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix, pmatrix
    1338              : 
    1339              :       INTEGER                                            :: imatrix, jmatrix, kmatrix
    1340              : 
    1341            0 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1342            0 :       ALLOCATE (matrix_set(nmatrix, mmatrix, pmatrix))
    1343            0 :       DO kmatrix = 1, pmatrix
    1344            0 :          DO jmatrix = 1, mmatrix
    1345            0 :             DO imatrix = 1, nmatrix
    1346            0 :                NULLIFY (matrix_set(imatrix, jmatrix, kmatrix)%matrix)
    1347              :             END DO
    1348              :          END DO
    1349              :       END DO
    1350            0 :    END SUBROUTINE allocate_dbcsr_matrix_set_3d
    1351              : 
    1352              : ! **************************************************************************************************
    1353              : !> \brief   Allocate and initialize a real matrix 4-dimensional set.
    1354              : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1355              : !> \param[in] nmatrix         Size of set
    1356              : !> \param mmatrix ...
    1357              : !> \param pmatrix ...
    1358              : !> \param qmatrix ...
    1359              : !> \par History
    1360              : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1361              : ! **************************************************************************************************
    1362            0 :    SUBROUTINE allocate_dbcsr_matrix_set_4d(matrix_set, nmatrix, mmatrix, pmatrix, qmatrix)
    1363              :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :), POINTER :: matrix_set
    1364              :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix, pmatrix, qmatrix
    1365              : 
    1366              :       INTEGER                                            :: imatrix, jmatrix, kmatrix, lmatrix
    1367              : 
    1368            0 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1369            0 :       ALLOCATE (matrix_set(nmatrix, mmatrix, pmatrix, qmatrix))
    1370            0 :       DO lmatrix = 1, qmatrix
    1371            0 :       DO kmatrix = 1, pmatrix
    1372            0 :          DO jmatrix = 1, mmatrix
    1373            0 :             DO imatrix = 1, nmatrix
    1374            0 :                NULLIFY (matrix_set(imatrix, jmatrix, kmatrix, lmatrix)%matrix)
    1375              :             END DO
    1376              :          END DO
    1377              :       END DO
    1378              :       END DO
    1379            0 :    END SUBROUTINE allocate_dbcsr_matrix_set_4d
    1380              : 
    1381              : ! **************************************************************************************************
    1382              : !> \brief   Allocate and initialize a real matrix 5-dimensional set.
    1383              : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1384              : !> \param[in] nmatrix         Size of set
    1385              : !> \param mmatrix ...
    1386              : !> \param pmatrix ...
    1387              : !> \param qmatrix ...
    1388              : !> \param smatrix ...
    1389              : !> \par History
    1390              : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1391              : ! **************************************************************************************************
    1392            0 :    SUBROUTINE allocate_dbcsr_matrix_set_5d(matrix_set, nmatrix, mmatrix, pmatrix, qmatrix, smatrix)
    1393              :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :, :), &
    1394              :          POINTER                                         :: matrix_set
    1395              :       INTEGER, INTENT(IN)                                :: nmatrix, mmatrix, pmatrix, qmatrix, &
    1396              :                                                             smatrix
    1397              : 
    1398              :       INTEGER                                            :: hmatrix, imatrix, jmatrix, kmatrix, &
    1399              :                                                             lmatrix
    1400              : 
    1401            0 :       IF (ASSOCIATED(matrix_set)) CALL dbcsr_deallocate_matrix_set(matrix_set)
    1402            0 :       ALLOCATE (matrix_set(nmatrix, mmatrix, pmatrix, qmatrix, smatrix))
    1403            0 :       DO hmatrix = 1, smatrix
    1404            0 :       DO lmatrix = 1, qmatrix
    1405            0 :       DO kmatrix = 1, pmatrix
    1406            0 :          DO jmatrix = 1, mmatrix
    1407            0 :             DO imatrix = 1, nmatrix
    1408            0 :                NULLIFY (matrix_set(imatrix, jmatrix, kmatrix, lmatrix, hmatrix)%matrix)
    1409              :             END DO
    1410              :          END DO
    1411              :       END DO
    1412              :       END DO
    1413              :       END DO
    1414            0 :    END SUBROUTINE allocate_dbcsr_matrix_set_5d
    1415              : 
    1416              :    ! **************************************************************************************************
    1417              : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1418              : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1419              : !> \par History
    1420              : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1421              : ! **************************************************************************************************
    1422       194062 :    SUBROUTINE deallocate_dbcsr_matrix_set_1d(matrix_set)
    1423              : 
    1424              :       TYPE(dbcsr_p_type), DIMENSION(:), POINTER          :: matrix_set
    1425              : 
    1426              :       INTEGER                                            :: imatrix
    1427              : 
    1428       194062 :       IF (ASSOCIATED(matrix_set)) THEN
    1429       899418 :          DO imatrix = 1, SIZE(matrix_set)
    1430       899418 :             CALL dbcsr_deallocate_matrix(matrix_set(imatrix)%matrix)
    1431              :          END DO
    1432       192404 :          DEALLOCATE (matrix_set)
    1433              :       END IF
    1434              : 
    1435       194062 :    END SUBROUTINE deallocate_dbcsr_matrix_set_1d
    1436              : 
    1437              : ! **************************************************************************************************
    1438              : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1439              : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1440              : !> \par History
    1441              : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1442              : ! **************************************************************************************************
    1443       190405 :    SUBROUTINE deallocate_dbcsr_matrix_set_2d(matrix_set)
    1444              : 
    1445              :       TYPE(dbcsr_p_type), DIMENSION(:, :), POINTER       :: matrix_set
    1446              : 
    1447              :       INTEGER                                            :: imatrix, jmatrix
    1448              : 
    1449       190405 :       IF (ASSOCIATED(matrix_set)) THEN
    1450      1705804 :          DO jmatrix = 1, SIZE(matrix_set, 2)
    1451      3363625 :             DO imatrix = 1, SIZE(matrix_set, 1)
    1452      3176244 :                CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix)%matrix)
    1453              :             END DO
    1454              :          END DO
    1455       187381 :          DEALLOCATE (matrix_set)
    1456              :       END IF
    1457       190405 :    END SUBROUTINE deallocate_dbcsr_matrix_set_2d
    1458              : 
    1459              : ! **************************************************************************************************
    1460              : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1461              : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1462              : !> \par History
    1463              : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1464              : ! **************************************************************************************************
    1465            0 :    SUBROUTINE deallocate_dbcsr_matrix_set_3d(matrix_set)
    1466              : 
    1467              :       TYPE(dbcsr_p_type), DIMENSION(:, :, :), POINTER    :: matrix_set
    1468              : 
    1469              :       INTEGER                                            :: imatrix, jmatrix, kmatrix
    1470              : 
    1471            0 :       IF (ASSOCIATED(matrix_set)) THEN
    1472            0 :          DO kmatrix = 1, SIZE(matrix_set, 3)
    1473            0 :             DO jmatrix = 1, SIZE(matrix_set, 2)
    1474            0 :                DO imatrix = 1, SIZE(matrix_set, 1)
    1475            0 :                   CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix, kmatrix)%matrix)
    1476              :                END DO
    1477              :             END DO
    1478              :          END DO
    1479            0 :          DEALLOCATE (matrix_set)
    1480              :       END IF
    1481            0 :    END SUBROUTINE deallocate_dbcsr_matrix_set_3d
    1482              : 
    1483              : ! **************************************************************************************************
    1484              : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1485              : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1486              : !> \par History
    1487              : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1488              : ! **************************************************************************************************
    1489            0 :    SUBROUTINE deallocate_dbcsr_matrix_set_4d(matrix_set)
    1490              : 
    1491              :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :), POINTER :: matrix_set
    1492              : 
    1493              :       INTEGER                                            :: imatrix, jmatrix, kmatrix, lmatrix
    1494              : 
    1495            0 :       IF (ASSOCIATED(matrix_set)) THEN
    1496            0 :          DO lmatrix = 1, SIZE(matrix_set, 4)
    1497            0 :          DO kmatrix = 1, SIZE(matrix_set, 3)
    1498            0 :             DO jmatrix = 1, SIZE(matrix_set, 2)
    1499            0 :                DO imatrix = 1, SIZE(matrix_set, 1)
    1500            0 :                   CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix, kmatrix, lmatrix)%matrix)
    1501              :                END DO
    1502              :             END DO
    1503              :          END DO
    1504              :          END DO
    1505            0 :          DEALLOCATE (matrix_set)
    1506              :       END IF
    1507            0 :    END SUBROUTINE deallocate_dbcsr_matrix_set_4d
    1508              : 
    1509              : ! **************************************************************************************************
    1510              : !> \brief Deallocate a real matrix set and release all of the member matrices.
    1511              : !> \param[in,out] matrix_set  Set containing the DBCSR matrix pointer type
    1512              : !> \par History
    1513              : !>      2009-08-17 Adapted from sparse_matrix_type for DBCSR
    1514              : ! **************************************************************************************************
    1515            0 :    SUBROUTINE deallocate_dbcsr_matrix_set_5d(matrix_set)
    1516              : 
    1517              :       TYPE(dbcsr_p_type), DIMENSION(:, :, :, :, :), &
    1518              :          POINTER                                         :: matrix_set
    1519              : 
    1520              :       INTEGER                                            :: hmatrix, imatrix, jmatrix, kmatrix, &
    1521              :                                                             lmatrix
    1522              : 
    1523            0 :       IF (ASSOCIATED(matrix_set)) THEN
    1524            0 :          DO hmatrix = 1, SIZE(matrix_set, 5)
    1525            0 :             DO lmatrix = 1, SIZE(matrix_set, 4)
    1526            0 :             DO kmatrix = 1, SIZE(matrix_set, 3)
    1527            0 :                DO jmatrix = 1, SIZE(matrix_set, 2)
    1528            0 :                   DO imatrix = 1, SIZE(matrix_set, 1)
    1529            0 :                      CALL dbcsr_deallocate_matrix(matrix_set(imatrix, jmatrix, kmatrix, lmatrix, hmatrix)%matrix)
    1530              :                   END DO
    1531              :                END DO
    1532              :             END DO
    1533              :             END DO
    1534              :          END DO
    1535            0 :          DEALLOCATE (matrix_set)
    1536              :       END IF
    1537            0 :    END SUBROUTINE deallocate_dbcsr_matrix_set_5d
    1538              : 
    1539              : END MODULE cp_dbcsr_operations
        

Generated by: LCOV version 2.0-1