LCOV - code coverage report
Current view: top level - src - domain_submatrix_methods.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 83.6 % 675 564
Test Date: 2025-12-04 06:27:48 Functions: 70.0 % 20 14

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Subroutines to handle submatrices
      10              : !> \par History
      11              : !>       2013.01 created [Rustam Z Khaliullin]
      12              : !> \author Rustam Z Khaliullin
      13              : ! **************************************************************************************************
      14              : MODULE domain_submatrix_methods
      15              : 
      16              :    USE cp_dbcsr_api,                    ONLY: &
      17              :         dbcsr_distribution_get, dbcsr_distribution_type, dbcsr_filter, dbcsr_finalize, &
      18              :         dbcsr_get_block_p, dbcsr_get_info, dbcsr_get_matrix_type, dbcsr_get_stored_coordinates, &
      19              :         dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_start, &
      20              :         dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_put_block, dbcsr_type, &
      21              :         dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, dbcsr_type_symmetric, dbcsr_work_create
      22              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      23              :                                               cp_logger_get_default_unit_nr,&
      24              :                                               cp_logger_type
      25              :    USE domain_submatrix_types,          ONLY: domain_map_type,&
      26              :                                               domain_submatrix_type,&
      27              :                                               select_row_col
      28              :    USE kinds,                           ONLY: dp
      29              :    USE message_passing,                 ONLY: mp_comm_null,&
      30              :                                               mp_comm_type
      31              : #include "./base/base_uses.f90"
      32              : 
      33              :    IMPLICIT NONE
      34              : 
      35              :    PRIVATE
      36              : 
      37              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'domain_submatrix_methods'
      38              : 
      39              :    PUBLIC :: copy_submatrices, copy_submatrix_data, &
      40              :              release_submatrices, multiply_submatrices, add_submatrices, &
      41              :              construct_submatrices, init_submatrices, &
      42              :              construct_dbcsr_from_submatrices, &
      43              :              set_submatrices, &
      44              :              print_submatrices, maxnorm_submatrices
      45              : 
      46              :    INTERFACE init_submatrices
      47              :       MODULE PROCEDURE init_submatrices_0d
      48              :       MODULE PROCEDURE init_submatrices_1d
      49              :       MODULE PROCEDURE init_submatrices_2d
      50              :    END INTERFACE
      51              : 
      52              :    INTERFACE set_submatrices
      53              :       MODULE PROCEDURE set_submatrix_array
      54              :       MODULE PROCEDURE set_submatrix
      55              :    END INTERFACE
      56              : 
      57              :    INTERFACE copy_submatrices
      58              :       MODULE PROCEDURE copy_submatrix_array
      59              :       MODULE PROCEDURE copy_submatrix
      60              :    END INTERFACE
      61              : 
      62              :    INTERFACE release_submatrices
      63              :       MODULE PROCEDURE release_submatrix_array
      64              :       MODULE PROCEDURE release_submatrix
      65              :    END INTERFACE
      66              : 
      67              :    INTERFACE multiply_submatrices
      68              :       MODULE PROCEDURE multiply_submatrices_once
      69              :       MODULE PROCEDURE multiply_submatrices_array
      70              :    END INTERFACE
      71              : 
      72              :    INTERFACE add_submatrices
      73              :       MODULE PROCEDURE add_submatrices_once
      74              :       MODULE PROCEDURE add_submatrices_array
      75              :    END INTERFACE
      76              : 
      77              : CONTAINS
      78              : 
      79              : ! **************************************************************************************************
      80              : !> \brief ...
      81              : !> \param subm ...
      82              : ! **************************************************************************************************
      83            0 :    SUBROUTINE init_submatrices_0d(subm)
      84              : 
      85              :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: subm
      86              : 
      87            0 :       subm%domain = -1
      88            0 :       subm%nbrows = -1
      89            0 :       subm%nbcols = -1
      90            0 :       subm%nrows = -1
      91            0 :       subm%ncols = -1
      92            0 :       subm%nnodes = -1
      93            0 :       subm%group = mp_comm_null
      94              : 
      95            0 :    END SUBROUTINE init_submatrices_0d
      96              : 
      97              : ! **************************************************************************************************
      98              : !> \brief ...
      99              : !> \param subm ...
     100              : ! **************************************************************************************************
     101         7968 :    SUBROUTINE init_submatrices_1d(subm)
     102              : 
     103              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     104              :          INTENT(INOUT)                                   :: subm
     105              : 
     106        58884 :       subm(:)%domain = -1
     107        58884 :       subm(:)%nbrows = -1
     108        58884 :       subm(:)%nbcols = -1
     109        58884 :       subm(:)%nrows = -1
     110        58884 :       subm(:)%ncols = -1
     111        58884 :       subm(:)%nnodes = -1
     112        58884 :       subm(:)%group = mp_comm_null
     113              : 
     114         7968 :    END SUBROUTINE init_submatrices_1d
     115              : 
     116              : ! **************************************************************************************************
     117              : !> \brief ...
     118              : !> \param subm ...
     119              : ! **************************************************************************************************
     120         1142 :    SUBROUTINE init_submatrices_2d(subm)
     121              : 
     122              :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
     123              :          INTENT(INOUT)                                   :: subm
     124              : 
     125        10282 :       subm(:, :)%domain = -1
     126        10282 :       subm(:, :)%nbrows = -1
     127        10282 :       subm(:, :)%nbcols = -1
     128        10282 :       subm(:, :)%nrows = -1
     129        10282 :       subm(:, :)%ncols = -1
     130        10282 :       subm(:, :)%nnodes = -1
     131        10282 :       subm(:, :)%group = mp_comm_null
     132              : 
     133         1142 :    END SUBROUTINE init_submatrices_2d
     134              : 
     135              : ! **************************************************************************************************
     136              : !> \brief ...
     137              : !> \param original ...
     138              : !> \param copy ...
     139              : !> \param copy_data ...
     140              : ! **************************************************************************************************
     141         1164 :    SUBROUTINE copy_submatrix_array(original, copy, copy_data)
     142              : 
     143              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     144              :          INTENT(IN)                                      :: original
     145              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     146              :          INTENT(INOUT)                                   :: copy
     147              :       LOGICAL, INTENT(IN)                                :: copy_data
     148              : 
     149              :       CHARACTER(len=*), PARAMETER :: routineN = 'copy_submatrix_array'
     150              : 
     151              :       INTEGER                                            :: handle, idomain, ndomains, ndomainsB
     152              : 
     153         1164 :       CALL timeset(routineN, handle)
     154              : 
     155         1164 :       ndomains = SIZE(original)
     156         1164 :       ndomainsB = SIZE(copy)
     157         1164 :       CPASSERT(ndomains == ndomainsB)
     158         8246 :       copy(:)%nnodes = original(:)%nnodes
     159         8246 :       copy(:)%group = original(:)%group
     160         8246 :       DO idomain = 1, ndomains
     161         8246 :          IF (original(idomain)%domain > 0) THEN
     162         3541 :             CALL copy_submatrix(original(idomain), copy(idomain), copy_data)
     163              :          END IF
     164              :       END DO ! loop over domains
     165              : 
     166         1164 :       CALL timestop(handle)
     167              : 
     168         1164 :    END SUBROUTINE copy_submatrix_array
     169              : 
     170              : ! **************************************************************************************************
     171              : !> \brief ...
     172              : !> \param original ...
     173              : !> \param copy ...
     174              : !> \param copy_data ...
     175              : ! **************************************************************************************************
     176         4566 :    SUBROUTINE copy_submatrix(original, copy, copy_data)
     177              : 
     178              :       TYPE(domain_submatrix_type), INTENT(IN)            :: original
     179              :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: copy
     180              :       LOGICAL, INTENT(IN)                                :: copy_data
     181              : 
     182              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'copy_submatrix'
     183              : 
     184              :       INTEGER                                            :: handle, icol, irow
     185              : 
     186         4566 :       CALL timeset(routineN, handle)
     187              : 
     188         4566 :       copy%domain = original%domain
     189         4566 :       copy%nnodes = original%nnodes
     190         4566 :       copy%group = original%group
     191              : 
     192         4566 :       IF (original%domain > 0) THEN
     193              : 
     194         4566 :          copy%nbrows = original%nbrows
     195         4566 :          copy%nbcols = original%nbcols
     196         4566 :          copy%nrows = original%nrows
     197         4566 :          copy%ncols = original%ncols
     198              : 
     199         4566 :          IF (.NOT. ALLOCATED(copy%dbcsr_row)) THEN
     200         9906 :             ALLOCATE (copy%dbcsr_row(original%nbrows))
     201              :          ELSE
     202         1264 :             IF (SIZE(copy%dbcsr_row) /= SIZE(original%dbcsr_row)) THEN
     203            0 :                DEALLOCATE (copy%dbcsr_row)
     204            0 :                ALLOCATE (copy%dbcsr_row(original%nbrows))
     205              :             END IF
     206              :          END IF
     207         4566 :          IF (.NOT. ALLOCATED(copy%dbcsr_col)) THEN
     208         9906 :             ALLOCATE (copy%dbcsr_col(original%nbcols))
     209              :          ELSE
     210         1264 :             IF (SIZE(copy%dbcsr_col) /= SIZE(original%dbcsr_col)) THEN
     211            0 :                DEALLOCATE (copy%dbcsr_col)
     212            0 :                ALLOCATE (copy%dbcsr_col(original%nbcols))
     213              :             END IF
     214              :          END IF
     215         4566 :          IF (.NOT. ALLOCATED(copy%size_brow)) THEN
     216         9906 :             ALLOCATE (copy%size_brow(original%nbrows))
     217              :          ELSE
     218         1264 :             IF (SIZE(copy%size_brow) /= SIZE(original%size_brow)) THEN
     219            0 :                DEALLOCATE (copy%size_brow)
     220            0 :                ALLOCATE (copy%size_brow(original%nbrows))
     221              :             END IF
     222              :          END IF
     223         4566 :          IF (.NOT. ALLOCATED(copy%size_bcol)) THEN
     224         9906 :             ALLOCATE (copy%size_bcol(original%nbcols))
     225              :          ELSE
     226         1264 :             IF (SIZE(copy%size_bcol) /= SIZE(original%size_bcol)) THEN
     227            0 :                DEALLOCATE (copy%size_bcol)
     228            0 :                ALLOCATE (copy%size_bcol(original%nbcols))
     229              :             END IF
     230              :          END IF
     231              : 
     232        24024 :          DO irow = 1, original%nbrows
     233        19458 :             copy%dbcsr_row(irow) = original%dbcsr_row(irow)
     234        24024 :             copy%size_brow(irow) = original%size_brow(irow)
     235              :          END DO
     236              : 
     237        15012 :          DO icol = 1, original%nbcols
     238        10446 :             copy%dbcsr_col(icol) = original%dbcsr_col(icol)
     239        15012 :             copy%size_bcol(icol) = original%size_bcol(icol)
     240              :          END DO
     241              : 
     242         4566 :          IF (copy_data) THEN
     243         3541 :             CALL copy_submatrix_data(original%mdata, copy)
     244              :          END IF
     245              : 
     246              :       END IF ! do not copy empty submatrix
     247              : 
     248         4566 :       CALL timestop(handle)
     249              : 
     250         4566 :    END SUBROUTINE copy_submatrix
     251              : 
     252              : ! **************************************************************************************************
     253              : !> \brief ...
     254              : !> \param array ...
     255              : !> \param copy ...
     256              : ! **************************************************************************************************
     257         4566 :    SUBROUTINE copy_submatrix_data(array, copy)
     258              : 
     259              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: array
     260              :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: copy
     261              : 
     262              :       CHARACTER(len=*), PARAMETER :: routineN = 'copy_submatrix_data'
     263              : 
     264              :       INTEGER                                            :: ds1, ds2, handle, ms1, ms2
     265              : 
     266         4566 :       CALL timeset(routineN, handle)
     267              : 
     268         4566 :       CPASSERT(copy%domain > 0)
     269              : 
     270         4566 :       ds1 = SIZE(array, 1)
     271         4566 :       ds2 = SIZE(array, 2)
     272              : 
     273         4566 :       IF (.NOT. ALLOCATED(copy%mdata)) THEN
     274        13208 :          ALLOCATE (copy%mdata(ds1, ds2))
     275              :       ELSE
     276         1264 :          ms1 = SIZE(copy%mdata, 1)
     277         1264 :          ms2 = SIZE(copy%mdata, 2)
     278         1264 :          IF ((ds1 /= ms1) .OR. (ds2 /= ms2)) THEN
     279            0 :             DEALLOCATE (copy%mdata)
     280            0 :             ALLOCATE (copy%mdata(ds1, ds2))
     281              :          END IF
     282              :       END IF
     283              : 
     284      5710612 :       copy%mdata(:, :) = array(:, :)
     285              : 
     286         4566 :       CALL timestop(handle)
     287              : 
     288         4566 :    END SUBROUTINE copy_submatrix_data
     289              : 
     290              : ! **************************************************************************************************
     291              : !> \brief ...
     292              : !> \param submatrices ...
     293              : !> \param scalar ...
     294              : ! **************************************************************************************************
     295            0 :    SUBROUTINE set_submatrix_array(submatrices, scalar)
     296              : 
     297              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     298              :          INTENT(INOUT)                                   :: submatrices
     299              :       REAL(KIND=dp), INTENT(IN)                          :: scalar
     300              : 
     301              :       CHARACTER(len=*), PARAMETER :: routineN = 'set_submatrix_array'
     302              : 
     303              :       INTEGER                                            :: handle, idomain, ndomains
     304              : 
     305            0 :       CALL timeset(routineN, handle)
     306              : 
     307            0 :       ndomains = SIZE(submatrices)
     308            0 :       DO idomain = 1, ndomains
     309            0 :          IF (submatrices(idomain)%domain > 0) THEN
     310            0 :             CALL set_submatrix(submatrices(idomain), scalar)
     311              :          END IF
     312              :       END DO ! loop over domains
     313              : 
     314            0 :       CALL timestop(handle)
     315              : 
     316            0 :    END SUBROUTINE set_submatrix_array
     317              : 
     318              : ! **************************************************************************************************
     319              : !> \brief ...
     320              : !> \param submatrix ...
     321              : !> \param scalar ...
     322              : ! **************************************************************************************************
     323            0 :    SUBROUTINE set_submatrix(submatrix, scalar)
     324              : 
     325              :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: submatrix
     326              :       REAL(KIND=dp), INTENT(IN)                          :: scalar
     327              : 
     328              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'set_submatrix'
     329              : 
     330              :       INTEGER                                            :: ds1, ds2, handle, ms1, ms2
     331              : 
     332            0 :       CALL timeset(routineN, handle)
     333              : 
     334            0 :       CPASSERT(submatrix%domain > 0)
     335            0 :       CPASSERT(submatrix%nrows > 0)
     336            0 :       CPASSERT(submatrix%ncols > 0)
     337              : 
     338            0 :       ds1 = submatrix%nrows
     339            0 :       ds2 = submatrix%ncols
     340              : 
     341            0 :       IF (.NOT. ALLOCATED(submatrix%mdata)) THEN
     342            0 :          ALLOCATE (submatrix%mdata(ds1, ds2))
     343              :       ELSE
     344            0 :          ms1 = SIZE(submatrix%mdata, 1)
     345            0 :          ms2 = SIZE(submatrix%mdata, 2)
     346            0 :          IF ((ds1 /= ms1) .OR. (ds2 /= ms2)) THEN
     347            0 :             DEALLOCATE (submatrix%mdata)
     348            0 :             ALLOCATE (submatrix%mdata(ds1, ds2))
     349              :          END IF
     350              :       END IF
     351              : 
     352            0 :       submatrix%mdata(:, :) = scalar
     353              : 
     354            0 :       CALL timestop(handle)
     355              : 
     356            0 :    END SUBROUTINE set_submatrix
     357              : 
     358              : ! **************************************************************************************************
     359              : !> \brief ...
     360              : !> \param subm ...
     361              : ! **************************************************************************************************
     362        12118 :    SUBROUTINE release_submatrix_array(subm)
     363              : 
     364              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     365              :          INTENT(INOUT)                                   :: subm
     366              : 
     367              :       CHARACTER(len=*), PARAMETER :: routineN = 'release_submatrix_array'
     368              : 
     369              :       INTEGER                                            :: handle, idomain, ndomains
     370              : 
     371        12118 :       CALL timeset(routineN, handle)
     372              : 
     373        12118 :       ndomains = SIZE(subm)
     374        90314 :       DO idomain = 1, ndomains
     375        90314 :          CALL release_submatrix(subm(idomain))
     376              :       END DO ! loop over domains
     377              : 
     378        12118 :       CALL timestop(handle)
     379              : 
     380        12118 :    END SUBROUTINE release_submatrix_array
     381              : 
     382              : ! **************************************************************************************************
     383              : !> \brief ...
     384              : !> \param subm ...
     385              : ! **************************************************************************************************
     386        78196 :    SUBROUTINE release_submatrix(subm)
     387              : 
     388              :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: subm
     389              : 
     390              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'release_submatrix'
     391              : 
     392              :       INTEGER                                            :: handle
     393              : 
     394        78196 :       CALL timeset(routineN, handle)
     395              : 
     396        78196 :       subm%domain = -1
     397        78196 :       subm%nbrows = -1
     398        78196 :       subm%nbcols = -1
     399        78196 :       subm%nrows = -1
     400        78196 :       subm%ncols = -1
     401        78196 :       subm%nnodes = -1
     402        78196 :       subm%group = mp_comm_null
     403              : 
     404        78196 :       IF (ALLOCATED(subm%dbcsr_row)) THEN
     405        20999 :          DEALLOCATE (subm%dbcsr_row)
     406              :       END IF
     407        78196 :       IF (ALLOCATED(subm%dbcsr_col)) THEN
     408        20999 :          DEALLOCATE (subm%dbcsr_col)
     409              :       END IF
     410        78196 :       IF (ALLOCATED(subm%size_brow)) THEN
     411        20999 :          DEALLOCATE (subm%size_brow)
     412              :       END IF
     413        78196 :       IF (ALLOCATED(subm%size_bcol)) THEN
     414        20999 :          DEALLOCATE (subm%size_bcol)
     415              :       END IF
     416        78196 :       IF (ALLOCATED(subm%mdata)) THEN
     417        21004 :          DEALLOCATE (subm%mdata)
     418              :       END IF
     419              : 
     420        78196 :       CALL timestop(handle)
     421              : 
     422        78196 :    END SUBROUTINE release_submatrix
     423              : 
     424              :    ! more complex routine might be necessary if submatrices are distributed
     425              : ! **************************************************************************************************
     426              : !> \brief ...
     427              : !> \param transA ...
     428              : !> \param transB ...
     429              : !> \param alpha ...
     430              : !> \param A ...
     431              : !> \param B ...
     432              : !> \param beta ...
     433              : !> \param C ...
     434              : ! **************************************************************************************************
     435        10733 :    SUBROUTINE multiply_submatrices_once(transA, transB, alpha, A, B, beta, C)
     436              : 
     437              :       CHARACTER, INTENT(IN)                              :: transA, transB
     438              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     439              :       TYPE(domain_submatrix_type), INTENT(IN)            :: A, B
     440              :       REAL(KIND=dp), INTENT(IN)                          :: beta
     441              :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: C
     442              : 
     443              :       CHARACTER(len=*), PARAMETER :: routineN = 'multiply_submatrices_once'
     444              : 
     445              :       INTEGER                                            :: cs1, cs2, handle, icol, irow, K, K1, &
     446              :                                                             LDA, LDB, LDC, M, Mblocks, N, Nblocks
     447              :       LOGICAL                                            :: NOTA, NOTB
     448              : 
     449        10733 :       CALL timeset(routineN, handle)
     450              : 
     451        10733 :       CPASSERT(A%domain > 0)
     452        10733 :       CPASSERT(B%domain > 0)
     453        10733 :       CPASSERT(C%domain > 0)
     454              : 
     455        10733 :       LDA = SIZE(A%mdata, 1)
     456        10733 :       LDB = SIZE(B%mdata, 1)
     457              : 
     458        10733 :       NOTA = (transA == 'N') .OR. (transA == 'n')
     459        10733 :       NOTB = (transB == 'N') .OR. (transB == 'n')
     460              : 
     461        10733 :       IF (NOTA) THEN
     462        10733 :          M = A%nrows
     463        10733 :          K = A%ncols
     464        10733 :          Mblocks = A%nbrows
     465              :       ELSE
     466            0 :          M = A%ncols
     467            0 :          K = A%nrows
     468            0 :          Mblocks = A%nbcols
     469              :       END IF
     470              : 
     471        10733 :       IF (NOTB) THEN
     472        10638 :          K1 = B%nrows
     473        10638 :          N = B%ncols
     474        10638 :          Nblocks = B%nbcols
     475              :       ELSE
     476           95 :          K1 = B%ncols
     477           95 :          N = B%nrows
     478           95 :          Nblocks = B%nbrows
     479              :       END IF
     480              : 
     481              :       ! these checks are for debugging only
     482        10733 :       CPASSERT(K == K1)
     483              : 
     484              :       ! conform C matrix
     485        10733 :       C%nrows = M
     486        10733 :       C%ncols = N
     487        10733 :       C%nbrows = Mblocks
     488        10733 :       C%nbcols = Nblocks
     489        10733 :       IF (ALLOCATED(C%dbcsr_row)) THEN
     490         2723 :          DEALLOCATE (C%dbcsr_row)
     491              :       END IF
     492        32199 :       ALLOCATE (C%dbcsr_row(C%nbrows))
     493        10733 :       IF (ALLOCATED(C%dbcsr_col)) THEN
     494         2723 :          DEALLOCATE (C%dbcsr_col)
     495              :       END IF
     496        32199 :       ALLOCATE (C%dbcsr_col(C%nbcols))
     497        10733 :       IF (ALLOCATED(C%size_brow)) THEN
     498         2723 :          DEALLOCATE (C%size_brow)
     499              :       END IF
     500        32199 :       ALLOCATE (C%size_brow(C%nbrows))
     501        10733 :       IF (ALLOCATED(C%size_bcol)) THEN
     502         2723 :          DEALLOCATE (C%size_bcol)
     503              :       END IF
     504        32199 :       ALLOCATE (C%size_bcol(C%nbcols))
     505              : 
     506        57666 :       DO irow = 1, C%nbrows
     507        57666 :          IF (NOTA) THEN
     508        46933 :             C%dbcsr_row(irow) = A%dbcsr_row(irow)
     509        46933 :             C%size_brow(irow) = A%size_brow(irow)
     510              :          ELSE
     511            0 :             C%dbcsr_row(irow) = A%dbcsr_col(irow)
     512            0 :             C%size_brow(irow) = A%size_bcol(irow)
     513              :          END IF
     514              :       END DO
     515              : 
     516        22360 :       DO icol = 1, C%nbcols
     517        22360 :          IF (NOTB) THEN
     518        11234 :             C%dbcsr_col(icol) = B%dbcsr_col(icol)
     519        11234 :             C%size_bcol(icol) = B%size_bcol(icol)
     520              :          ELSE
     521          393 :             C%dbcsr_col(icol) = B%dbcsr_row(icol)
     522          393 :             C%size_bcol(icol) = B%size_brow(icol)
     523              :          END IF
     524              :       END DO
     525              : 
     526        10733 :       IF (.NOT. ALLOCATED(C%mdata)) THEN
     527              :          !!! cannot use non-zero beta if C is not allocated
     528         8010 :          CPASSERT(beta == 0.0_dp)
     529        32040 :          ALLOCATE (C%mdata(C%nrows, C%ncols))
     530              :       ELSE
     531         2723 :          cs1 = SIZE(C%mdata, 1)
     532         2723 :          cs2 = SIZE(C%mdata, 2)
     533         2723 :          IF ((C%nrows /= cs1) .OR. (C%ncols /= cs2)) THEN
     534              :             !!! cannot deallocate data if beta is non-zero
     535            0 :             CPASSERT(beta == 0.0_dp)
     536            0 :             DEALLOCATE (C%mdata)
     537            0 :             ALLOCATE (C%mdata(C%nrows, C%ncols))
     538              :          END IF
     539              :       END IF
     540              : 
     541        10733 :       LDC = C%nrows
     542              : 
     543        10733 :       CALL DGEMM(transA, transB, M, N, K, alpha, A%mdata, LDA, B%mdata, LDB, beta, C%mdata, LDC)
     544              : 
     545        10733 :       C%nnodes = A%nnodes
     546        10733 :       C%group = A%group
     547              : 
     548        10733 :       CALL timestop(handle)
     549              : 
     550        10733 :    END SUBROUTINE multiply_submatrices_once
     551              : 
     552              : ! **************************************************************************************************
     553              : !> \brief ...
     554              : !> \param transA ...
     555              : !> \param transB ...
     556              : !> \param alpha ...
     557              : !> \param A ...
     558              : !> \param B ...
     559              : !> \param beta ...
     560              : !> \param C ...
     561              : ! **************************************************************************************************
     562         3328 :    SUBROUTINE multiply_submatrices_array(transA, transB, alpha, A, B, beta, C)
     563              : 
     564              :       CHARACTER, INTENT(IN)                              :: transA, transB
     565              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     566              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     567              :          INTENT(IN)                                      :: A, B
     568              :       REAL(KIND=dp), INTENT(IN)                          :: beta
     569              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     570              :          INTENT(INOUT)                                   :: C
     571              : 
     572              :       CHARACTER(len=*), PARAMETER :: routineN = 'multiply_submatrices_array'
     573              : 
     574              :       INTEGER                                            :: handle, idomain, idomainA, idomainB, &
     575              :                                                             ndomains, ndomainsB, ndomainsC
     576              : 
     577         3328 :       CALL timeset(routineN, handle)
     578              : 
     579         3328 :       ndomains = SIZE(A)
     580         3328 :       ndomainsB = SIZE(B)
     581         3328 :       ndomainsC = SIZE(C)
     582              : 
     583         3328 :       CPASSERT(ndomains == ndomainsB)
     584         3328 :       CPASSERT(ndomainsB == ndomainsC)
     585              : 
     586        24794 :       DO idomain = 1, ndomains
     587              : 
     588        21466 :          idomainA = A(idomain)%domain
     589        21466 :          idomainB = B(idomain)%domain
     590              : 
     591        21466 :          CPASSERT(idomainA == idomainB)
     592              : 
     593        21466 :          C(idomain)%domain = idomainA
     594              : 
     595              :          ! check if the submatrix exists
     596        24794 :          IF (idomainA > 0) THEN
     597        10733 :             CALL multiply_submatrices_once(transA, transB, alpha, A(idomain), B(idomain), beta, C(idomain))
     598              :          END IF ! submatrix for the domain exists
     599              : 
     600              :       END DO ! loop over domains
     601              : 
     602         3328 :       CALL timestop(handle)
     603              : 
     604         3328 :    END SUBROUTINE multiply_submatrices_array
     605              : 
     606              :    ! more complex routine might be necessary if submatrices are distributed
     607              : ! **************************************************************************************************
     608              : !> \brief ...
     609              : !> \param alpha ...
     610              : !> \param A ...
     611              : !> \param beta ...
     612              : !> \param B ...
     613              : !> \param transB ...
     614              : ! **************************************************************************************************
     615          205 :    SUBROUTINE add_submatrices_once(alpha, A, beta, B, transB)
     616              : 
     617              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     618              :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: A
     619              :       REAL(KIND=dp), INTENT(IN)                          :: beta
     620              :       TYPE(domain_submatrix_type), INTENT(IN)            :: B
     621              :       CHARACTER, INTENT(IN)                              :: transB
     622              : 
     623              :       CHARACTER(len=*), PARAMETER :: routineN = 'add_submatrices_once'
     624              : 
     625              :       INTEGER                                            :: C1, C2, handle, icol, R1, R2
     626              :       LOGICAL                                            :: NOTB
     627              : 
     628          205 :       CALL timeset(routineN, handle)
     629              : 
     630          205 :       CPASSERT(A%domain > 0)
     631          205 :       CPASSERT(B%domain > 0)
     632              : 
     633          205 :       R1 = A%nrows
     634          205 :       C1 = A%ncols
     635              : 
     636          205 :       NOTB = (transB == 'N') .OR. (transB == 'n')
     637              : 
     638          205 :       IF (NOTB) THEN
     639          110 :          R2 = B%nrows
     640          110 :          C2 = B%ncols
     641              :       ELSE
     642           95 :          R2 = B%ncols
     643           95 :          C2 = B%nrows
     644              :       END IF
     645              : 
     646              :       ! these checks are for debugging only
     647          205 :       CPASSERT(C1 == C2)
     648          205 :       CPASSERT(R1 == R2)
     649              : 
     650          205 :       IF (NOTB) THEN
     651         7507 :          DO icol = 1, C1
     652       765846 :             A%mdata(:, icol) = alpha*A%mdata(:, icol) + beta*B%mdata(:, icol)
     653              :          END DO
     654              :       ELSE
     655         6690 :          DO icol = 1, C1
     656       701043 :             A%mdata(:, icol) = alpha*A%mdata(:, icol) + beta*B%mdata(icol, :)
     657              :          END DO
     658              :       END IF
     659              : 
     660          205 :       CALL timestop(handle)
     661              : 
     662          205 :    END SUBROUTINE add_submatrices_once
     663              : 
     664              : ! **************************************************************************************************
     665              : !> \brief ...
     666              : !> \param alpha ...
     667              : !> \param A ...
     668              : !> \param beta ...
     669              : !> \param B ...
     670              : !> \param transB ...
     671              : ! **************************************************************************************************
     672           66 :    SUBROUTINE add_submatrices_array(alpha, A, beta, B, transB)
     673              : 
     674              :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     675              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     676              :          INTENT(INOUT)                                   :: A
     677              :       REAL(KIND=dp), INTENT(IN)                          :: beta
     678              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     679              :          INTENT(IN)                                      :: B
     680              :       CHARACTER, INTENT(IN)                              :: transB
     681              : 
     682              :       CHARACTER(len=*), PARAMETER :: routineN = 'add_submatrices_array'
     683              : 
     684              :       INTEGER                                            :: handle, idomain, idomainA, idomainB, &
     685              :                                                             ndomains, ndomainsB
     686              : 
     687           66 :       CALL timeset(routineN, handle)
     688              : 
     689           66 :       ndomains = SIZE(A)
     690           66 :       ndomainsB = SIZE(B)
     691              : 
     692           66 :       CPASSERT(ndomains == ndomainsB)
     693              : 
     694          476 :       DO idomain = 1, ndomains
     695              : 
     696          410 :          idomainA = A(idomain)%domain
     697          410 :          idomainB = B(idomain)%domain
     698              : 
     699          410 :          CPASSERT(idomainA == idomainB)
     700              : 
     701              :          ! check if the submatrix exists
     702          476 :          IF (idomainA > 0) THEN
     703          205 :             CALL add_submatrices_once(alpha, A(idomain), beta, B(idomain), transB)
     704              :          END IF ! submatrix for the domain exists
     705              : 
     706              :       END DO ! loop over domains
     707              : 
     708           66 :       CALL timestop(handle)
     709              : 
     710           66 :    END SUBROUTINE add_submatrices_array
     711              : 
     712              : ! **************************************************************************************************
     713              : !> \brief Computes the max norm of the collection of submatrices
     714              : !> \param submatrices ...
     715              : !> \param norm ...
     716              : !> \par History
     717              : !>       2013.03 created [Rustam Z. Khaliullin]
     718              : !> \author Rustam Z. Khaliullin
     719              : ! **************************************************************************************************
     720            2 :    SUBROUTINE maxnorm_submatrices(submatrices, norm)
     721              : 
     722              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     723              :          INTENT(IN)                                      :: submatrices
     724              :       REAL(KIND=dp), INTENT(OUT)                         :: norm
     725              : 
     726              :       CHARACTER(len=*), PARAMETER :: routineN = 'maxnorm_submatrices'
     727              : 
     728              :       INTEGER                                            :: handle, idomain, ndomains
     729              :       REAL(KIND=dp)                                      :: curr_norm, send_norm
     730              :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_norm
     731              : 
     732            2 :       CALL timeset(routineN, handle)
     733              : 
     734            2 :       send_norm = 0.0_dp
     735              : 
     736            2 :       ndomains = SIZE(submatrices)
     737              : 
     738           12 :       DO idomain = 1, ndomains
     739              : 
     740              :          ! check if the submatrix is local
     741           12 :          IF (submatrices(idomain)%domain > 0) THEN
     742        31607 :             curr_norm = MAXVAL(ABS(submatrices(idomain)%mdata))
     743            5 :             IF (curr_norm > send_norm) send_norm = curr_norm
     744              :          END IF
     745              : 
     746              :       END DO ! loop over domains
     747              : 
     748              :       ! communicate local norm to the other nodes
     749            6 :       ALLOCATE (recv_norm(submatrices(1)%nnodes))
     750            2 :       CALL submatrices(1)%group%allgather(send_norm, recv_norm)
     751              : 
     752            8 :       norm = MAXVAL(recv_norm)
     753              : 
     754            2 :       DEALLOCATE (recv_norm)
     755              : 
     756            2 :       CALL timestop(handle)
     757              : 
     758            2 :    END SUBROUTINE maxnorm_submatrices
     759              : 
     760              : ! **************************************************************************************************
     761              : !> \brief Computes the sum of traces of the submatrix A.tr(B)
     762              : !> \param A ...
     763              : !> \param B ...
     764              : !> \param trace ...
     765              : !> \par History
     766              : !>       2013.03 created [Rustam Z. Khaliullin]
     767              : !> \author Rustam Z. Khaliullin
     768              : ! **************************************************************************************************
     769            0 :    SUBROUTINE trace_submatrices(A, B, trace)
     770              : 
     771              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     772              :          INTENT(IN)                                      :: A, B
     773              :       REAL(KIND=dp), INTENT(OUT)                         :: trace
     774              : 
     775              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'trace_submatrices'
     776              : 
     777              :       INTEGER                                            :: domainA, domainB, handle, idomain, &
     778              :                                                             ndomainsA, ndomainsB
     779              :       REAL(KIND=dp)                                      :: curr_trace, send_trace
     780            0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_trace
     781              : 
     782            0 :       CALL timeset(routineN, handle)
     783              : 
     784            0 :       send_trace = 0.0_dp
     785              : 
     786            0 :       ndomainsA = SIZE(A)
     787            0 :       ndomainsB = SIZE(B)
     788              : 
     789            0 :       CPASSERT(ndomainsA == ndomainsB)
     790              : 
     791            0 :       DO idomain = 1, ndomainsA
     792              : 
     793            0 :          domainA = A(idomain)%domain
     794            0 :          domainB = B(idomain)%domain
     795              : 
     796            0 :          CPASSERT(domainA == domainB)
     797              : 
     798              :          ! check if the submatrix is local
     799            0 :          IF (domainA > 0) THEN
     800              : 
     801            0 :             CPASSERT(A(idomain)%nrows == B(idomain)%nrows)
     802            0 :             CPASSERT(A(idomain)%ncols == B(idomain)%ncols)
     803              : 
     804            0 :             curr_trace = SUM(A(idomain)%mdata(:, :)*B(idomain)%mdata(:, :))
     805            0 :             send_trace = send_trace + curr_trace
     806              : 
     807              :          END IF
     808              : 
     809              :       END DO ! loop over domains
     810              : 
     811              :       ! communicate local norm to the other nodes
     812            0 :       ALLOCATE (recv_trace(A(1)%nnodes))
     813            0 :       CALL A(1)%group%allgather(send_trace, recv_trace)
     814              : 
     815            0 :       trace = SUM(recv_trace)
     816              : 
     817            0 :       DEALLOCATE (recv_trace)
     818              : 
     819            0 :       CALL timestop(handle)
     820              : 
     821            0 :    END SUBROUTINE trace_submatrices
     822              : 
     823              : ! **************************************************************************************************
     824              : !> \brief Constructs submatrices for each ALMO domain by collecting distributed
     825              : !>        DBCSR blocks to local arrays
     826              : !> \param matrix ...
     827              : !> \param submatrix ...
     828              : !> \param distr_pattern ...
     829              : !> \param domain_map ...
     830              : !> \param node_of_domain ...
     831              : !> \param job_type ...
     832              : !> \par History
     833              : !>       2013.01 created [Rustam Z. Khaliullin]
     834              : !> \author Rustam Z. Khaliullin
     835              : ! **************************************************************************************************
     836         9240 :    SUBROUTINE construct_submatrices(matrix, submatrix, distr_pattern, domain_map, &
     837         3080 :                                     node_of_domain, job_type)
     838              : 
     839              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
     840              :       TYPE(domain_submatrix_type), DIMENSION(:), &
     841              :          INTENT(INOUT)                                   :: submatrix
     842              :       TYPE(dbcsr_type), INTENT(IN)                       :: distr_pattern
     843              :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
     844              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: node_of_domain
     845              :       INTEGER, INTENT(IN)                                :: job_type
     846              : 
     847              :       CHARACTER(len=*), PARAMETER :: routineN = 'construct_submatrices'
     848              : 
     849              :       CHARACTER                                          :: matrix_type
     850              :       INTEGER :: block_node, block_offset, col, col_offset, col_size, dest_node, GroupID, handle, &
     851              :          iBlock, icol, idomain, index_col, index_ec, index_er, index_row, index_sc, index_sr, &
     852              :          iNode, ldesc, myNode, nblkcols_tot, nblkrows_tot, ndomains, ndomains2, nNodes, &
     853              :          recv_size2_total, recv_size_total, row, row_size, send_size2_total, send_size_total, &
     854              :          smcol, smrow, start_data
     855         3080 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: first_col, first_row, offset2_block, offset_block, &
     856         3080 :          recv_data2, recv_offset2_cpu, recv_offset_cpu, recv_size2_cpu, recv_size_cpu, send_data2, &
     857         3080 :          send_offset2_cpu, send_offset_cpu, send_size2_cpu, send_size_cpu
     858              :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: recv_descriptor, send_descriptor
     859         3080 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, row_blk_size
     860              :       LOGICAL                                            :: found, transp
     861              :       REAL(KIND=dp)                                      :: antifactor
     862         3080 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_data, send_data
     863         3080 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
     864              :       TYPE(dbcsr_distribution_type)                      :: pattern_dist
     865              :       TYPE(mp_comm_type)                                 :: group
     866              : 
     867              : !INTEGER, PARAMETER                         :: select_row_col = 1
     868              : !INTEGER, PARAMETER                         :: select_row = 2
     869              : !                                                  subm_row_size,&
     870              : !                                                  subm_col_size,&
     871              : 
     872         3080 :       CALL timeset(routineN, handle)
     873              : 
     874         3080 :       CALL dbcsr_get_info(matrix, nblkrows_total=nblkrows_tot, nblkcols_total=nblkcols_tot)
     875         3080 :       ndomains = nblkcols_tot ! RZK-warning not true for atomic distributions
     876         3080 :       CALL dbcsr_get_info(distr_pattern, distribution=pattern_dist)
     877         3080 :       CALL dbcsr_distribution_get(pattern_dist, numnodes=nNodes, group=GroupID, mynode=myNode)
     878              : 
     879         3080 :       CALL group%set_handle(groupid)
     880              : 
     881         3080 :       matrix_type = dbcsr_get_matrix_type(matrix)
     882              : 
     883         3080 :       ldesc = 2
     884         9240 :       ALLOCATE (send_descriptor(ldesc, nNodes))
     885         6160 :       ALLOCATE (recv_descriptor(ldesc, nNodes))
     886        21560 :       send_descriptor(:, :) = 0
     887              : 
     888              :       ! find: the number of blocks and their sizes that must be sent to each cpu
     889              :       ! loop over all domains
     890        22454 :       DO idomain = 1, ndomains
     891              : 
     892        19374 :          dest_node = node_of_domain(idomain)
     893              : 
     894              :          ! loop over those rows that have non-zero quencher
     895        19374 :          index_sr = 1 ! index start row
     896        19374 :          IF (idomain > 1) index_sr = domain_map%index1(idomain - 1)
     897        19374 :          index_er = domain_map%index1(idomain) - 1 ! index end row
     898              : 
     899       105464 :          DO index_row = index_sr, index_er
     900              : 
     901        83010 :             row = domain_map%pairs(index_row, 1)
     902              : 
     903        83010 :             IF (job_type == select_row_col) THEN
     904              :                ! loop over those columns that have non-zero quencher
     905        14602 :                index_sc = 1 ! index start col
     906        14602 :                IF (idomain > 1) index_sc = domain_map%index1(idomain - 1)
     907        14602 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
     908              :             ELSE
     909              :                ! fake loop
     910              :                index_sc = 1 ! index start col
     911              :                index_ec = 1 ! index end col
     912              :             END IF
     913              : 
     914       233570 :             DO index_col = index_sc, index_ec
     915              : 
     916       131186 :                IF (job_type == select_row_col) THEN
     917        62778 :                   col = domain_map%pairs(index_col, 1)
     918              :                ELSE
     919        68408 :                   col = idomain
     920              :                END IF
     921              : 
     922       131186 :                transp = .FALSE.
     923              :                CALL dbcsr_get_stored_coordinates(matrix, &
     924       131186 :                                                  row, col, block_node)
     925       214196 :                IF (block_node == myNode) THEN
     926        65593 :                   CALL dbcsr_get_block_p(matrix, row, col, block_p, found, row_size, col_size)
     927        65593 :                   IF (found) THEN
     928        50286 :                      send_descriptor(1, dest_node + 1) = send_descriptor(1, dest_node + 1) + 1
     929              :                      send_descriptor(2, dest_node + 1) = send_descriptor(2, dest_node + 1) + &
     930        50286 :                                                          row_size*col_size
     931              :                   END IF
     932              :                END IF
     933              : 
     934              :             END DO ! loop over columns
     935              : 
     936              :          END DO ! loop over rows
     937              : 
     938              :       END DO
     939              : 
     940              :       ! simple but quadratically scaling procedure
     941              :       ! loop over local blocks
     942              :       !CALL dbcsr_iterator_start(iter,matrix)
     943              :       !DO WHILE (dbcsr_iterator_blocks_left(iter))
     944              :       !   CALL dbcsr_iterator_next_block(iter,row,col,data_p,&
     945              :       !           row_size=row_size,col_size=col_size)
     946              :       !   DO idomain = 1, ndomains
     947              :       !      IF (job_type==select_row_col) THEN
     948              :       !         domain_needs_block=(qblk_exists(domain_map,col,idomain)&
     949              :       !            .AND.qblk_exists(domain_map,row,idomain))
     950              :       !      ELSE
     951              :       !         domain_needs_block=(idomain==col&
     952              :       !            .AND.qblk_exists(domain_map,row,idomain))
     953              :       !      ENDIF
     954              :       !      IF (domain_needs_block) THEN
     955              :       !         transp=.FALSE.
     956              :       !         dest_node=node_of_domain(idomain)
     957              :       !         !CALL dbcsr_get_stored_coordinates(distr_pattern,&
     958              :       !         !        idomain, idomain, transp, dest_node)
     959              :       !         send_descriptor(1,dest_node+1)=send_descriptor(1,dest_node+1)+1
     960              :       !         send_descriptor(2,dest_node+1)=send_descriptor(2,dest_node+1)+&
     961              :       !            row_size*col_size
     962              :       !      ENDIF
     963              :       !   ENDDO
     964              :       !ENDDO
     965              :       !CALL dbcsr_iterator_stop(iter)
     966              : 
     967              :       ! communicate number of blocks and their sizes to the other nodes
     968         3080 :       CALL group%alltoall(send_descriptor, recv_descriptor, ldesc)
     969              : 
     970        12320 :       ALLOCATE (send_size_cpu(nNodes), send_offset_cpu(nNodes))
     971         3080 :       send_offset_cpu(1) = 0
     972         3080 :       send_size_cpu(1) = send_descriptor(2, 1)
     973         6160 :       DO iNode = 2, nNodes
     974         3080 :          send_size_cpu(iNode) = send_descriptor(2, iNode)
     975              :          send_offset_cpu(iNode) = send_offset_cpu(iNode - 1) + &
     976         6160 :                                   send_size_cpu(iNode - 1)
     977              :       END DO
     978         3080 :       send_size_total = send_offset_cpu(nNodes) + send_size_cpu(nNodes)
     979              : 
     980         9240 :       ALLOCATE (recv_size_cpu(nNodes), recv_offset_cpu(nNodes))
     981         3080 :       recv_offset_cpu(1) = 0
     982         3080 :       recv_size_cpu(1) = recv_descriptor(2, 1)
     983         6160 :       DO iNode = 2, nNodes
     984         3080 :          recv_size_cpu(iNode) = recv_descriptor(2, iNode)
     985              :          recv_offset_cpu(iNode) = recv_offset_cpu(iNode - 1) + &
     986         6160 :                                   recv_size_cpu(iNode - 1)
     987              :       END DO
     988         3080 :       recv_size_total = recv_offset_cpu(nNodes) + recv_size_cpu(nNodes)
     989              : 
     990         9240 :       ALLOCATE (send_size2_cpu(nNodes), send_offset2_cpu(nNodes))
     991         3080 :       send_offset2_cpu(1) = 0
     992         3080 :       send_size2_cpu(1) = 2*send_descriptor(1, 1)
     993         6160 :       DO iNode = 2, nNodes
     994         3080 :          send_size2_cpu(iNode) = 2*send_descriptor(1, iNode)
     995              :          send_offset2_cpu(iNode) = send_offset2_cpu(iNode - 1) + &
     996         6160 :                                    send_size2_cpu(iNode - 1)
     997              :       END DO
     998         3080 :       send_size2_total = send_offset2_cpu(nNodes) + send_size2_cpu(nNodes)
     999              : 
    1000         9240 :       ALLOCATE (recv_size2_cpu(nNodes), recv_offset2_cpu(nNodes))
    1001         3080 :       recv_offset2_cpu(1) = 0
    1002         3080 :       recv_size2_cpu(1) = 2*recv_descriptor(1, 1)
    1003         6160 :       DO iNode = 2, nNodes
    1004         3080 :          recv_size2_cpu(iNode) = 2*recv_descriptor(1, iNode)
    1005              :          recv_offset2_cpu(iNode) = recv_offset2_cpu(iNode - 1) + &
    1006         6160 :                                    recv_size2_cpu(iNode - 1)
    1007              :       END DO
    1008         3080 :       recv_size2_total = recv_offset2_cpu(nNodes) + recv_size2_cpu(nNodes)
    1009              : 
    1010         3080 :       DEALLOCATE (send_descriptor)
    1011         3080 :       DEALLOCATE (recv_descriptor)
    1012              : 
    1013              :       ! collect data from the matrix into send_data
    1014         9182 :       ALLOCATE (send_data(send_size_total))
    1015         9220 :       ALLOCATE (recv_data(recv_size_total))
    1016         9182 :       ALLOCATE (send_data2(send_size2_total))
    1017         9220 :       ALLOCATE (recv_data2(recv_size2_total))
    1018         6160 :       ALLOCATE (offset_block(nNodes))
    1019         6160 :       ALLOCATE (offset2_block(nNodes))
    1020         9240 :       offset_block(:) = 0
    1021         9240 :       offset2_block(:) = 0
    1022              :       ! loop over all domains
    1023        22454 :       DO idomain = 1, ndomains
    1024              : 
    1025        19374 :          dest_node = node_of_domain(idomain)
    1026              : 
    1027              :          ! loop over those rows that have non-zero quencher
    1028        19374 :          index_sr = 1 ! index start row
    1029        19374 :          IF (idomain > 1) index_sr = domain_map%index1(idomain - 1)
    1030        19374 :          index_er = domain_map%index1(idomain) - 1 ! index end row
    1031              : 
    1032       105464 :          DO index_row = index_sr, index_er
    1033              : 
    1034        83010 :             row = domain_map%pairs(index_row, 1)
    1035              : 
    1036        83010 :             IF (job_type == select_row_col) THEN
    1037              :                ! loop over those columns that have non-zero quencher
    1038        14602 :                index_sc = 1 ! index start col
    1039        14602 :                IF (idomain > 1) index_sc = domain_map%index1(idomain - 1)
    1040        14602 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
    1041              :             ELSE
    1042              :                ! fake loop
    1043              :                index_sc = 1 ! index start col
    1044              :                index_ec = 1 ! index end col
    1045              :             END IF
    1046              : 
    1047       233570 :             DO index_col = index_sc, index_ec
    1048              : 
    1049       131186 :                IF (job_type == select_row_col) THEN
    1050        62778 :                   col = domain_map%pairs(index_col, 1)
    1051              :                ELSE
    1052        68408 :                   col = idomain
    1053              :                END IF
    1054              : 
    1055       131186 :                transp = .FALSE.
    1056              :                CALL dbcsr_get_stored_coordinates(matrix, &
    1057       131186 :                                                  row, col, block_node)
    1058       214196 :                IF (block_node == myNode) THEN
    1059        65593 :                   CALL dbcsr_get_block_p(matrix, row, col, block_p, found, row_size, col_size)
    1060        65593 :                   IF (found) THEN
    1061              :                      !col_offset=0
    1062              :                      !DO icol=1,col_size
    1063              :                      !   start_data=send_offset_cpu(dest_node+1)+&
    1064              :                      !              offset_block(dest_node+1)+&
    1065              :                      !              col_offset
    1066              :                      !   send_data(start_data+1:start_data+row_size)=&
    1067              :                      !      data_p(1:row_size,icol)
    1068              :                      !   col_offset=col_offset+row_size
    1069              :                      !ENDDO
    1070        50286 :                      col_offset = row_size*col_size
    1071              :                      start_data = send_offset_cpu(dest_node + 1) + &
    1072        50286 :                                   offset_block(dest_node + 1)
    1073       100572 :                      send_data(start_data + 1:start_data + col_offset) = RESHAPE(block_p, [col_offset])
    1074        50286 :                      offset_block(dest_node + 1) = offset_block(dest_node + 1) + col_offset
    1075              :                      ! fill out row,col information
    1076              :                      send_data2(send_offset2_cpu(dest_node + 1) + &
    1077        50286 :                                 offset2_block(dest_node + 1) + 1) = row
    1078              :                      send_data2(send_offset2_cpu(dest_node + 1) + &
    1079        50286 :                                 offset2_block(dest_node + 1) + 2) = col
    1080        50286 :                      offset2_block(dest_node + 1) = offset2_block(dest_node + 1) + 2
    1081              :                   END IF
    1082              :                END IF
    1083              : 
    1084              :             END DO ! loop over columns
    1085              : 
    1086              :          END DO ! loop over rows
    1087              : 
    1088              :       END DO
    1089              :       ! more simple but quadratically scaling version
    1090              :       !CALL dbcsr_iterator_start(iter,matrix)
    1091              :       !DO WHILE (dbcsr_iterator_blocks_left(iter))
    1092              :       !   CALL dbcsr_iterator_next_block(iter,row,col,data_p,&
    1093              :       !           row_size=row_size,col_size=col_size)
    1094              :       !   DO idomain = 1, ndomains
    1095              :       !      IF (job_type==select_row_col) THEN
    1096              :       !         domain_needs_block=(qblk_exists(domain_map,col,idomain)&
    1097              :       !            .AND.qblk_exists(domain_map,row,idomain))
    1098              :       !      ELSE
    1099              :       !         domain_needs_block=(idomain==col&
    1100              :       !            .AND.qblk_exists(domain_map,row,idomain))
    1101              :       !      ENDIF
    1102              :       !      IF (domain_needs_block) THEN
    1103              :       !         transp=.FALSE.
    1104              :       !         dest_node=node_of_domain(idomain)
    1105              :       !         !CALL dbcsr_get_stored_coordinates(distr_pattern,&
    1106              :       !         !        idomain, idomain, transp, dest_node)
    1107              :       !         ! place the data appropriately
    1108              :       !         col_offset=0
    1109              :       !         DO icol=1,col_size
    1110              :       !            start_data=send_offset_cpu(dest_node+1)+&
    1111              :       !                       offset_block(dest_node+1)+&
    1112              :       !                       col_offset
    1113              :       !            send_data(start_data+1:start_data+row_size)=&
    1114              :       !               data_p(1:row_size,icol)
    1115              :       !            col_offset=col_offset+row_size
    1116              :       !         ENDDO
    1117              :       !         offset_block(dest_node+1)=offset_block(dest_node+1)+col_size*row_size
    1118              :       !         ! fill out row,col information
    1119              :       !         send_data2(send_offset2_cpu(dest_node+1)+&
    1120              :       !               offset2_block(dest_node+1)+1)=row
    1121              :       !         send_data2(send_offset2_cpu(dest_node+1)+&
    1122              :       !               offset2_block(dest_node+1)+2)=col
    1123              :       !         offset2_block(dest_node+1)=offset2_block(dest_node+1)+2
    1124              :       !      ENDIF
    1125              :       !   ENDDO
    1126              :       !ENDDO
    1127              :       !CALL dbcsr_iterator_stop(iter)
    1128              : 
    1129              :       ! send-receive all blocks
    1130              :       CALL group%alltoall(send_data, send_size_cpu, send_offset_cpu, &
    1131         3080 :                           recv_data, recv_size_cpu, recv_offset_cpu)
    1132              :       ! send-receive rows and cols of the blocks
    1133              :       CALL group%alltoall(send_data2, send_size2_cpu, send_offset2_cpu, &
    1134         3080 :                           recv_data2, recv_size2_cpu, recv_offset2_cpu)
    1135              : 
    1136         3080 :       DEALLOCATE (send_size_cpu, send_offset_cpu)
    1137         3080 :       DEALLOCATE (send_size2_cpu, send_offset2_cpu)
    1138         3080 :       DEALLOCATE (send_data)
    1139         3080 :       DEALLOCATE (send_data2)
    1140         3080 :       DEALLOCATE (offset_block)
    1141         3080 :       DEALLOCATE (offset2_block)
    1142              : 
    1143              :       ! copy blocks into submatrices
    1144         3080 :       CALL dbcsr_get_info(matrix, col_blk_size=col_blk_size, row_blk_size=row_blk_size)
    1145              : !    ALLOCATE(subm_row_size(ndomains),subm_col_size(ndomains))
    1146              : !    subm_row_size(:)=0
    1147              : !    subm_col_size(:)=0
    1148         3080 :       ndomains2 = SIZE(submatrix)
    1149         3080 :       IF (ndomains2 /= ndomains) THEN
    1150            0 :          CPABORT("wrong submatrix size")
    1151              :       END IF
    1152         3080 :       CALL release_submatrices(submatrix)
    1153        22454 :       submatrix(:)%nnodes = nNodes
    1154        22454 :       submatrix(:)%group = Group
    1155        22454 :       submatrix(:)%nrows = 0
    1156        22454 :       submatrix(:)%ncols = 0
    1157              : 
    1158        15400 :       ALLOCATE (first_row(nblkrows_tot), first_col(nblkcols_tot))
    1159        22454 :       submatrix(:)%domain = -1
    1160        22454 :       DO idomain = 1, ndomains
    1161        19374 :          dest_node = node_of_domain(idomain)
    1162              :          !transp=.FALSE.
    1163              :          !CALL dbcsr_get_stored_coordinates(distr_pattern,&
    1164              :          !        idomain, idomain, transp, dest_node)
    1165        22454 :          IF (dest_node == mynode) THEN
    1166         9687 :             submatrix(idomain)%domain = idomain
    1167         9687 :             submatrix(idomain)%nbrows = 0
    1168         9687 :             submatrix(idomain)%nbcols = 0
    1169              : 
    1170              :             ! loop over those rows that have non-zero quencher
    1171        90582 :             first_row(:) = -1
    1172         9687 :             index_sr = 1 ! index start row
    1173         9687 :             IF (idomain > 1) index_sr = domain_map%index1(idomain - 1)
    1174         9687 :             index_er = domain_map%index1(idomain) - 1 ! index end row
    1175        51192 :             DO index_row = index_sr, index_er
    1176        41505 :                row = domain_map%pairs(index_row, 1)
    1177              :                !DO row = 1, nblkrows_tot
    1178              :                !   IF (qblk_exists(domain_map,row,idomain)) THEN
    1179        41505 :                first_row(row) = submatrix(idomain)%nrows + 1
    1180        41505 :                submatrix(idomain)%nrows = submatrix(idomain)%nrows + row_blk_size(row)
    1181        51192 :                submatrix(idomain)%nbrows = submatrix(idomain)%nbrows + 1
    1182              :                !   ENDIF
    1183              :             END DO
    1184        29061 :             ALLOCATE (submatrix(idomain)%dbcsr_row(submatrix(idomain)%nbrows))
    1185        19374 :             ALLOCATE (submatrix(idomain)%size_brow(submatrix(idomain)%nbrows))
    1186         9687 :             smrow = 1
    1187              :             ! again loop over those rows that have non-zero quencher
    1188         9687 :             index_sr = 1 ! index start row
    1189         9687 :             IF (idomain > 1) index_sr = domain_map%index1(idomain - 1)
    1190         9687 :             index_er = domain_map%index1(idomain) - 1 ! index end row
    1191        51192 :             DO index_row = index_sr, index_er
    1192        41505 :                row = domain_map%pairs(index_row, 1)
    1193              :                !DO row = 1, nblkrows_tot
    1194              :                !   IF (first_row(row).ne.-1) THEN
    1195        41505 :                submatrix(idomain)%dbcsr_row(smrow) = row
    1196        41505 :                submatrix(idomain)%size_brow(smrow) = row_blk_size(row)
    1197        51192 :                smrow = smrow + 1
    1198              :                !   ENDIF
    1199              :             END DO
    1200              : 
    1201              :             ! loop over the necessary columns
    1202        90582 :             first_col(:) = -1
    1203         9687 :             IF (job_type == select_row_col) THEN
    1204              :                ! loop over those columns that have non-zero quencher
    1205         1837 :                index_sc = 1 ! index start col
    1206         1837 :                IF (idomain > 1) index_sc = domain_map%index1(idomain - 1)
    1207         1837 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
    1208              :             ELSE
    1209              :                ! fake loop
    1210              :                index_sc = 1 ! index start col
    1211              :                index_ec = 1 ! index end col
    1212              :             END IF
    1213        24838 :             DO index_col = index_sc, index_ec
    1214        15151 :                IF (job_type == select_row_col) THEN
    1215         7301 :                   col = domain_map%pairs(index_col, 1)
    1216              :                ELSE
    1217         7850 :                   col = idomain
    1218              :                END IF
    1219              :                !DO col = 1, nblkcols_tot
    1220              :                !   IF (job_type==select_row_col) THEN
    1221              :                !      domain_needs_block=(qblk_exists(domain_map,col,idomain))
    1222              :                !   ELSE
    1223              :                !      domain_needs_block=(col==idomain) ! RZK-warning col belongs to the domain
    1224              :                !   ENDIF
    1225              :                !   IF (domain_needs_block) THEN
    1226        15151 :                first_col(col) = submatrix(idomain)%ncols + 1
    1227        15151 :                submatrix(idomain)%ncols = submatrix(idomain)%ncols + col_blk_size(col)
    1228        24838 :                submatrix(idomain)%nbcols = submatrix(idomain)%nbcols + 1
    1229              :                !   ENDIF
    1230              :             END DO
    1231              : 
    1232        29061 :             ALLOCATE (submatrix(idomain)%dbcsr_col(submatrix(idomain)%nbcols))
    1233        19374 :             ALLOCATE (submatrix(idomain)%size_bcol(submatrix(idomain)%nbcols))
    1234              : 
    1235              :             ! loop over the necessary columns again
    1236         9687 :             smcol = 1
    1237         9687 :             IF (job_type == select_row_col) THEN
    1238              :                ! loop over those columns that have non-zero quencher
    1239         1837 :                index_sc = 1 ! index start col
    1240         1837 :                IF (idomain > 1) index_sc = domain_map%index1(idomain - 1)
    1241         1837 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
    1242              :             ELSE
    1243              :                ! fake loop
    1244              :                index_sc = 1 ! index start col
    1245              :                index_ec = 1 ! index end col
    1246              :             END IF
    1247        24838 :             DO index_col = index_sc, index_ec
    1248        15151 :                IF (job_type == select_row_col) THEN
    1249         7301 :                   col = domain_map%pairs(index_col, 1)
    1250              :                ELSE
    1251         7850 :                   col = idomain
    1252              :                END IF
    1253              :                !DO col = 1, nblkcols_tot
    1254              :                !   IF (first_col(col).ne.-1) THEN
    1255        15151 :                submatrix(idomain)%dbcsr_col(smcol) = col
    1256        15151 :                submatrix(idomain)%size_bcol(smcol) = col_blk_size(col)
    1257        24838 :                smcol = smcol + 1
    1258              :                !   ENDIF
    1259              :             END DO
    1260              : 
    1261            0 :             ALLOCATE (submatrix(idomain)%mdata( &
    1262              :                       submatrix(idomain)%nrows, &
    1263        38748 :                       submatrix(idomain)%ncols))
    1264      6331606 :             submatrix(idomain)%mdata(:, :) = 0.0_dp
    1265        29061 :             DO iNode = 1, nNodes
    1266        19374 :                block_offset = 0
    1267       238010 :                DO iBlock = 1, recv_size2_cpu(iNode)/2
    1268              :                   ! read the (row,col) of the block
    1269       208949 :                   row = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 1)
    1270       208949 :                   col = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 2)
    1271              :                   ! check if this block should be in the submatrix of this domain
    1272       208949 :                   IF ((first_col(col) /= -1) .AND. (first_row(row) /= -1)) THEN
    1273              :                      ! copy data from the received array into submatrix
    1274        73934 :                      start_data = recv_offset_cpu(iNode) + block_offset + 1
    1275       580303 :                      DO icol = 0, col_blk_size(col) - 1
    1276              :                         submatrix(idomain)%mdata(first_row(row): &
    1277              :                                                  first_row(row) + row_blk_size(row) - 1, &
    1278              :                                                  first_col(col) + icol) = &
    1279      8468182 :                            recv_data(start_data:start_data + row_blk_size(row) - 1)
    1280       580303 :                         start_data = start_data + row_blk_size(row)
    1281              :                      END DO
    1282        73934 :                      IF (job_type == select_row_col) THEN
    1283        51225 :                         IF (matrix_type == dbcsr_type_symmetric .OR. &
    1284              :                             matrix_type == dbcsr_type_antisymmetric) THEN
    1285              :                            ! copy data into the transposed block as well
    1286        10229 :                            antifactor = 1.0_dp
    1287        10229 :                            IF (matrix_type == dbcsr_type_antisymmetric) THEN
    1288            0 :                               antifactor = -1.0_dp
    1289              :                            END IF
    1290        10229 :                            start_data = recv_offset_cpu(iNode) + block_offset + 1
    1291       104710 :                            DO icol = 0, col_blk_size(col) - 1
    1292              :                               submatrix(idomain)%mdata(first_row(col) + icol, &
    1293              :                                                        first_col(row): &
    1294              :                                                        first_col(row) + row_blk_size(row) - 1) = &
    1295              :                                  antifactor*recv_data(start_data: &
    1296      1981842 :                                                       start_data + row_blk_size(row) - 1)
    1297       104710 :                               start_data = start_data + row_blk_size(row)
    1298              :                            END DO
    1299        40996 :                         ELSE IF (matrix_type == dbcsr_type_no_symmetry) THEN
    1300              :                         ELSE
    1301            0 :                            CPABORT("matrix type is NYI")
    1302              :                         END IF
    1303              :                      END IF
    1304              :                   END IF
    1305       228323 :                   block_offset = block_offset + col_blk_size(col)*row_blk_size(row)
    1306              :                END DO
    1307              :             END DO
    1308              :          END IF ! mynode.eq.dest_node
    1309              :       END DO ! loop over domains
    1310              : 
    1311         3080 :       DEALLOCATE (recv_size_cpu, recv_offset_cpu)
    1312         3080 :       DEALLOCATE (recv_size2_cpu, recv_offset2_cpu)
    1313         3080 :       DEALLOCATE (recv_data)
    1314         3080 :       DEALLOCATE (recv_data2)
    1315              : !    DEALLOCATE(subm_row_size,subm_col_size)
    1316         3080 :       DEALLOCATE (first_row, first_col)
    1317              : 
    1318         3080 :       CALL timestop(handle)
    1319              : 
    1320         3080 :    END SUBROUTINE construct_submatrices
    1321              : 
    1322              : ! **************************************************************************************************
    1323              : !> \brief Constructs a DBCSR matrix from submatrices
    1324              : !> \param matrix ...
    1325              : !> \param submatrix ...
    1326              : !> \param distr_pattern ...
    1327              : !> \par History
    1328              : !>       2013.01 created [Rustam Z. Khaliullin]
    1329              : !> \author Rustam Z. Khaliullin
    1330              : ! **************************************************************************************************
    1331         2396 :    SUBROUTINE construct_dbcsr_from_submatrices(matrix, submatrix, distr_pattern)
    1332              : 
    1333              :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
    1334              :       TYPE(domain_submatrix_type), DIMENSION(:), &
    1335              :          INTENT(IN)                                      :: submatrix
    1336              :       TYPE(dbcsr_type), INTENT(IN)                       :: distr_pattern
    1337              : 
    1338              :       CHARACTER(len=*), PARAMETER :: routineN = 'construct_dbcsr_from_submatrices'
    1339              : 
    1340              :       CHARACTER                                          :: matrix_type
    1341              :       INTEGER :: block_offset, col, col_offset, colsize, dest_node, GroupID, handle, iBlock, icol, &
    1342              :          idomain, iNode, irow_subm, ldesc, myNode, nblkcols_tot, nblkrows_tot, ndomains, &
    1343              :          ndomains2, nNodes, recv_size2_total, recv_size_total, row, rowsize, send_size2_total, &
    1344              :          send_size_total, smroff, start_data, unit_nr
    1345         2396 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: offset2_block, offset_block, recv_data2, &
    1346         2396 :          recv_offset2_cpu, recv_offset_cpu, recv_size2_cpu, recv_size_cpu, send_data2, &
    1347         2396 :          send_offset2_cpu, send_offset_cpu, send_size2_cpu, send_size_cpu
    1348         2396 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: recv_descriptor, send_descriptor
    1349         2396 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, row_blk_size
    1350              :       LOGICAL                                            :: transp
    1351         2396 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_data, send_data
    1352         2396 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: new_block
    1353         2396 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
    1354              :       TYPE(cp_logger_type), POINTER                      :: logger
    1355              :       TYPE(dbcsr_distribution_type)                      :: pattern_dist
    1356              :       TYPE(dbcsr_iterator_type)                          :: iter
    1357              :       TYPE(mp_comm_type)                                 :: group
    1358              : 
    1359         2396 :       CALL timeset(routineN, handle)
    1360              : 
    1361              :       ! get a useful output_unit
    1362         2396 :       logger => cp_get_default_logger()
    1363         2396 :       IF (logger%para_env%is_source()) THEN
    1364         1198 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    1365              :       ELSE
    1366              :          unit_nr = -1
    1367              :       END IF
    1368              : 
    1369         2396 :       CALL dbcsr_get_info(matrix, nblkrows_total=nblkrows_tot, nblkcols_total=nblkcols_tot)
    1370         2396 :       ndomains = nblkcols_tot ! RZK-warning not true for atomic distributions
    1371         2396 :       ndomains2 = SIZE(submatrix)
    1372              : 
    1373         2396 :       IF (ndomains /= ndomains2) THEN
    1374            0 :          CPABORT("domain mismatch")
    1375              :       END IF
    1376              : 
    1377         2396 :       CALL dbcsr_get_info(distr_pattern, distribution=pattern_dist)
    1378         2396 :       CALL dbcsr_distribution_get(pattern_dist, numnodes=nNodes, group=GroupID, mynode=myNode)
    1379              : 
    1380         2396 :       CALL group%set_handle(GroupID)
    1381              : 
    1382         2396 :       matrix_type = dbcsr_get_matrix_type(matrix)
    1383         2396 :       IF (matrix_type /= dbcsr_type_no_symmetry) THEN
    1384            0 :          CPABORT("only non-symmetric matrices so far")
    1385              :       END IF
    1386              : 
    1387              :       ! remove all blocks from the dbcsr matrix
    1388         2396 :       CALL dbcsr_iterator_start(iter, matrix)
    1389        21295 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    1390        18899 :          CALL dbcsr_iterator_next_block(iter, row, col, block_p)
    1391      1211147 :          block_p(:, :) = 0.0_dp
    1392              :       END DO
    1393         2396 :       CALL dbcsr_iterator_stop(iter)
    1394         2396 :       CALL dbcsr_filter(matrix, 0.1_dp)
    1395              : 
    1396         2396 :       CALL dbcsr_work_create(matrix, work_mutable=.TRUE.)
    1397              : 
    1398         2396 :       ldesc = 2
    1399         7188 :       ALLOCATE (send_descriptor(ldesc, nNodes))
    1400         4792 :       ALLOCATE (recv_descriptor(ldesc, nNodes))
    1401        16772 :       send_descriptor(:, :) = 0
    1402              : 
    1403              :       ! loop over domains - find how much data to send
    1404        18056 :       DO idomain = 1, ndomains
    1405              : 
    1406        18056 :          IF (submatrix(idomain)%domain > 0) THEN
    1407              : 
    1408        41966 :             DO irow_subm = 1, submatrix(idomain)%nbrows
    1409              : 
    1410        34136 :                IF (submatrix(idomain)%nbcols /= 1) THEN
    1411            0 :                   CPABORT("corrupt submatrix structure")
    1412              :                END IF
    1413              : 
    1414        34136 :                row = submatrix(idomain)%dbcsr_row(irow_subm)
    1415        34136 :                col = submatrix(idomain)%dbcsr_col(1)
    1416              : 
    1417        34136 :                IF (col /= idomain) THEN
    1418            0 :                   CPABORT("corrupt submatrix structure")
    1419              :                END IF
    1420              : 
    1421        34136 :                transp = .FALSE.
    1422              :                CALL dbcsr_get_stored_coordinates(distr_pattern, &
    1423        34136 :                                                  row, idomain, dest_node)
    1424              : 
    1425        34136 :                send_descriptor(1, dest_node + 1) = send_descriptor(1, dest_node + 1) + 1
    1426              :                send_descriptor(2, dest_node + 1) = send_descriptor(2, dest_node + 1) + &
    1427              :                                                    submatrix(idomain)%size_brow(irow_subm)* &
    1428        41966 :                                                    submatrix(idomain)%size_bcol(1)
    1429              : 
    1430              :             END DO ! loop over submatrix blocks
    1431              : 
    1432              :          END IF
    1433              : 
    1434              :       END DO ! loop over domains
    1435              : 
    1436              :       ! communicate number of blocks and their sizes to the other nodes
    1437         2396 :       CALL group%alltoall(send_descriptor, recv_descriptor, ldesc)
    1438              : 
    1439         9584 :       ALLOCATE (send_size_cpu(nNodes), send_offset_cpu(nNodes))
    1440         2396 :       send_offset_cpu(1) = 0
    1441         2396 :       send_size_cpu(1) = send_descriptor(2, 1)
    1442         4792 :       DO iNode = 2, nNodes
    1443         2396 :          send_size_cpu(iNode) = send_descriptor(2, iNode)
    1444              :          send_offset_cpu(iNode) = send_offset_cpu(iNode - 1) + &
    1445         4792 :                                   send_size_cpu(iNode - 1)
    1446              :       END DO
    1447         2396 :       send_size_total = send_offset_cpu(nNodes) + send_size_cpu(nNodes)
    1448              : 
    1449         7188 :       ALLOCATE (recv_size_cpu(nNodes), recv_offset_cpu(nNodes))
    1450         2396 :       recv_offset_cpu(1) = 0
    1451         2396 :       recv_size_cpu(1) = recv_descriptor(2, 1)
    1452         4792 :       DO iNode = 2, nNodes
    1453         2396 :          recv_size_cpu(iNode) = recv_descriptor(2, iNode)
    1454              :          recv_offset_cpu(iNode) = recv_offset_cpu(iNode - 1) + &
    1455         4792 :                                   recv_size_cpu(iNode - 1)
    1456              :       END DO
    1457         2396 :       recv_size_total = recv_offset_cpu(nNodes) + recv_size_cpu(nNodes)
    1458              : 
    1459         7188 :       ALLOCATE (send_size2_cpu(nNodes), send_offset2_cpu(nNodes))
    1460         2396 :       send_offset2_cpu(1) = 0
    1461         2396 :       send_size2_cpu(1) = 2*send_descriptor(1, 1)
    1462         4792 :       DO iNode = 2, nNodes
    1463         2396 :          send_size2_cpu(iNode) = 2*send_descriptor(1, iNode)
    1464              :          send_offset2_cpu(iNode) = send_offset2_cpu(iNode - 1) + &
    1465         4792 :                                    send_size2_cpu(iNode - 1)
    1466              :       END DO
    1467         2396 :       send_size2_total = send_offset2_cpu(nNodes) + send_size2_cpu(nNodes)
    1468              : 
    1469         7188 :       ALLOCATE (recv_size2_cpu(nNodes), recv_offset2_cpu(nNodes))
    1470         2396 :       recv_offset2_cpu(1) = 0
    1471         2396 :       recv_size2_cpu(1) = 2*recv_descriptor(1, 1)
    1472         4792 :       DO iNode = 2, nNodes
    1473         2396 :          recv_size2_cpu(iNode) = 2*recv_descriptor(1, iNode)
    1474              :          recv_offset2_cpu(iNode) = recv_offset2_cpu(iNode - 1) + &
    1475         4792 :                                    recv_size2_cpu(iNode - 1)
    1476              :       END DO
    1477         2396 :       recv_size2_total = recv_offset2_cpu(nNodes) + recv_size2_cpu(nNodes)
    1478              : 
    1479         2396 :       DEALLOCATE (send_descriptor)
    1480         2396 :       DEALLOCATE (recv_descriptor)
    1481              : 
    1482              :       ! collect data from the matrix into send_data
    1483         7188 :       ALLOCATE (send_data(send_size_total))
    1484         7153 :       ALLOCATE (recv_data(recv_size_total))
    1485         7188 :       ALLOCATE (send_data2(send_size2_total))
    1486         7153 :       ALLOCATE (recv_data2(recv_size2_total))
    1487         4792 :       ALLOCATE (offset_block(nNodes))
    1488         4792 :       ALLOCATE (offset2_block(nNodes))
    1489         7188 :       offset_block(:) = 0
    1490         7188 :       offset2_block(:) = 0
    1491              :       ! loop over domains - collect data to send
    1492        18056 :       DO idomain = 1, ndomains
    1493              : 
    1494        18056 :          IF (submatrix(idomain)%domain > 0) THEN
    1495              : 
    1496         7830 :             smroff = 0
    1497        41966 :             DO irow_subm = 1, submatrix(idomain)%nbrows
    1498              : 
    1499        34136 :                row = submatrix(idomain)%dbcsr_row(irow_subm)
    1500        34136 :                col = submatrix(idomain)%dbcsr_col(1)
    1501              : 
    1502        34136 :                rowsize = submatrix(idomain)%size_brow(irow_subm)
    1503        34136 :                colsize = submatrix(idomain)%size_bcol(1)
    1504              : 
    1505        34136 :                transp = .FALSE.
    1506              :                CALL dbcsr_get_stored_coordinates(distr_pattern, &
    1507        34136 :                                                  row, idomain, dest_node)
    1508              : 
    1509              :                ! place the data appropriately
    1510        34136 :                col_offset = 0
    1511       137454 :                DO icol = 1, colsize
    1512              :                   start_data = send_offset_cpu(dest_node + 1) + &
    1513              :                                offset_block(dest_node + 1) + &
    1514       103318 :                                col_offset
    1515              :                   send_data(start_data + 1:start_data + rowsize) = &
    1516      1452416 :                      submatrix(idomain)%mdata(smroff + 1:smroff + rowsize, icol)
    1517       137454 :                   col_offset = col_offset + rowsize
    1518              :                END DO
    1519              :                offset_block(dest_node + 1) = offset_block(dest_node + 1) + &
    1520        34136 :                                              colsize*rowsize
    1521              :                ! fill out row,col information
    1522              :                send_data2(send_offset2_cpu(dest_node + 1) + &
    1523        34136 :                           offset2_block(dest_node + 1) + 1) = row
    1524              :                send_data2(send_offset2_cpu(dest_node + 1) + &
    1525        34136 :                           offset2_block(dest_node + 1) + 2) = col
    1526        34136 :                offset2_block(dest_node + 1) = offset2_block(dest_node + 1) + 2
    1527              : 
    1528        76102 :                smroff = smroff + rowsize
    1529              : 
    1530              :             END DO
    1531              : 
    1532              :          END IF
    1533              : 
    1534              :       END DO ! loop over domains
    1535              : 
    1536              :       ! send-receive all blocks
    1537              :       CALL group%alltoall(send_data, send_size_cpu, send_offset_cpu, &
    1538         2396 :                           recv_data, recv_size_cpu, recv_offset_cpu)
    1539              :       ! send-receive rows and cols of the blocks
    1540              :       CALL group%alltoall(send_data2, send_size2_cpu, send_offset2_cpu, &
    1541         2396 :                           recv_data2, recv_size2_cpu, recv_offset2_cpu)
    1542              : 
    1543         2396 :       DEALLOCATE (send_size_cpu, send_offset_cpu)
    1544         2396 :       DEALLOCATE (send_size2_cpu, send_offset2_cpu)
    1545         2396 :       DEALLOCATE (send_data)
    1546         2396 :       DEALLOCATE (send_data2)
    1547         2396 :       DEALLOCATE (offset_block)
    1548         2396 :       DEALLOCATE (offset2_block)
    1549              : 
    1550              :       ! copy received data into dbcsr matrix
    1551         2396 :       CALL dbcsr_get_info(matrix, col_blk_size=col_blk_size, row_blk_size=row_blk_size)
    1552         7188 :       DO iNode = 1, nNodes
    1553         4792 :          block_offset = 0
    1554        41324 :          DO iBlock = 1, recv_size2_cpu(iNode)/2
    1555              :             ! read the (row,col) of the block
    1556        34136 :             row = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 1)
    1557        34136 :             col = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 2)
    1558              :             ! copy data from the received array into the matrix block
    1559        34136 :             start_data = recv_offset_cpu(iNode) + block_offset + 1
    1560       136544 :             ALLOCATE (new_block(row_blk_size(row), col_blk_size(col)))
    1561       137454 :             DO icol = 1, col_blk_size(col)
    1562              :                new_block(:, icol) = &
    1563      1452416 :                   recv_data(start_data:start_data + row_blk_size(row) - 1)
    1564       137454 :                start_data = start_data + row_blk_size(row)
    1565              :             END DO
    1566        34136 :             CALL dbcsr_put_block(matrix, row, col, new_block)
    1567        34136 :             DEALLOCATE (new_block)
    1568        38928 :             block_offset = block_offset + col_blk_size(col)*row_blk_size(row)
    1569              :          END DO
    1570              :       END DO
    1571              : 
    1572         2396 :       DEALLOCATE (recv_size_cpu, recv_offset_cpu)
    1573         2396 :       DEALLOCATE (recv_size2_cpu, recv_offset2_cpu)
    1574         2396 :       DEALLOCATE (recv_data)
    1575         2396 :       DEALLOCATE (recv_data2)
    1576              : 
    1577         2396 :       CALL dbcsr_finalize(matrix)
    1578              : 
    1579         2396 :       CALL timestop(handle)
    1580              : 
    1581        11980 :    END SUBROUTINE construct_dbcsr_from_submatrices
    1582              : 
    1583              : ! **************************************************************************************************
    1584              : !> \brief ...
    1585              : !> \param submatrices ...
    1586              : !> \param mpgroup ...
    1587              : ! **************************************************************************************************
    1588            0 :    SUBROUTINE print_submatrices(submatrices, mpgroup)
    1589              : 
    1590              :       TYPE(domain_submatrix_type), DIMENSION(:), &
    1591              :          INTENT(IN)                                      :: submatrices
    1592              :       TYPE(mp_comm_type), INTENT(IN)                     :: mpgroup
    1593              : 
    1594              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'print_submatrices'
    1595              : 
    1596              :       CHARACTER(len=30)                                  :: colstr, formatstr
    1597              :       INTEGER                                            :: handle, i, irow, n, ncols, nrows
    1598              : 
    1599            0 :       CALL timeset(routineN, handle)
    1600              : 
    1601            0 :       n = SIZE(submatrices)
    1602              : 
    1603            0 :       DO i = 1, n
    1604            0 :          nrows = SIZE(submatrices(i)%mdata, 1)
    1605            0 :          ncols = SIZE(submatrices(i)%mdata, 2)
    1606            0 :          WRITE (colstr, *) ncols
    1607            0 :          formatstr = '('//TRIM(ADJUSTL(colstr))//'F16.9)'
    1608            0 :          IF (submatrices(i)%domain > 0) THEN
    1609            0 :             WRITE (*, *) "SUBMATRIX: ", i, nrows, 'x', ncols
    1610            0 :             nrows = SIZE(submatrices(i)%mdata, 1)
    1611            0 :             DO irow = 1, nrows
    1612            0 :                WRITE (*, formatstr) submatrices(i)%mdata(irow, :)
    1613              :             END DO
    1614              :          END IF
    1615            0 :          CALL mpgroup%sync()
    1616              :       END DO
    1617              : 
    1618            0 :       CALL timestop(handle)
    1619              : 
    1620            0 :    END SUBROUTINE print_submatrices
    1621              : 
    1622              : ! **************************************************************************************************
    1623              : !> \brief Reports whether the DBCSR block (row,col) exists in the quencher
    1624              : !> \param map ...
    1625              : !> \param row ...
    1626              : !> \param col ...
    1627              : !> \return ...
    1628              : !> \par History
    1629              : !>       2013.01 created [Rustam Z. Khaliullin]
    1630              : !> \author Rustam Z. Khaliullin
    1631              : ! **************************************************************************************************
    1632            0 :    FUNCTION qblk_exists(map, row, col)
    1633              : 
    1634              :       TYPE(domain_map_type), INTENT(IN)                  :: map
    1635              :       INTEGER, INTENT(IN)                                :: row, col
    1636              :       LOGICAL                                            :: qblk_exists
    1637              : 
    1638              :       INTEGER                                            :: first, last, mid, ndomains
    1639              : 
    1640              : !CALL timeset(routineN,handle)
    1641              : 
    1642            0 :       ndomains = SIZE(map%index1)
    1643              : 
    1644            0 :       qblk_exists = .FALSE.
    1645            0 :       IF (col < 1 .OR. col > ndomains) RETURN
    1646            0 :       first = 1
    1647            0 :       IF (col > 1) first = map%index1(col - 1)
    1648            0 :       last = map%index1(col) - 1
    1649              : 
    1650              :       ! perform binary search within first-last
    1651            0 :       DO WHILE (last >= first)
    1652            0 :          mid = first + (last - first)/2
    1653            0 :          IF (map%pairs(mid, 1) > row) THEN
    1654            0 :             last = mid - 1
    1655            0 :          ELSE IF (map%pairs(mid, 1) < row) THEN
    1656            0 :             first = mid + 1
    1657              :          ELSE
    1658              :             qblk_exists = .TRUE. ! SUCCESS!!
    1659              :             EXIT
    1660              :          END IF
    1661              :       END DO
    1662              : 
    1663              :       !CALL timestop(handle)
    1664              : 
    1665              :       RETURN
    1666              : 
    1667              :    END FUNCTION qblk_exists
    1668              : 
    1669              : END MODULE domain_submatrix_methods
    1670              : 
        

Generated by: LCOV version 2.0-1