LCOV - code coverage report
Current view: top level - src - domain_submatrix_methods.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:34ef472) Lines: 565 676 83.6 %
Date: 2024-04-26 08:30:29 Functions: 14 20 70.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Subroutines to handle submatrices
      10             : !> \par History
      11             : !>       2013.01 created [Rustam Z Khaliullin]
      12             : !> \author Rustam Z Khaliullin
      13             : ! **************************************************************************************************
      14             : MODULE domain_submatrix_methods
      15             : 
      16             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      17             :                                               cp_logger_get_default_unit_nr,&
      18             :                                               cp_logger_type
      19             :    USE dbcsr_api,                       ONLY: &
      20             :         dbcsr_distribution_get, dbcsr_distribution_type, dbcsr_filter, dbcsr_finalize, &
      21             :         dbcsr_get_block_p, dbcsr_get_info, dbcsr_get_matrix_type, dbcsr_get_stored_coordinates, &
      22             :         dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_start, &
      23             :         dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_nblkcols_total, dbcsr_nblkrows_total, &
      24             :         dbcsr_reserve_block2d, dbcsr_type, dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, &
      25             :         dbcsr_type_symmetric, dbcsr_work_create
      26             :    USE domain_submatrix_types,          ONLY: domain_map_type,&
      27             :                                               domain_submatrix_type,&
      28             :                                               select_row_col
      29             :    USE kinds,                           ONLY: dp
      30             :    USE message_passing,                 ONLY: mp_comm_null,&
      31             :                                               mp_comm_type
      32             : #include "./base/base_uses.f90"
      33             : 
      34             :    IMPLICIT NONE
      35             : 
      36             :    PRIVATE
      37             : 
      38             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'domain_submatrix_methods'
      39             : 
      40             :    PUBLIC :: copy_submatrices, copy_submatrix_data, &
      41             :              release_submatrices, multiply_submatrices, add_submatrices, &
      42             :              construct_submatrices, init_submatrices, &
      43             :              construct_dbcsr_from_submatrices, &
      44             :              set_submatrices, &
      45             :              print_submatrices, maxnorm_submatrices
      46             : 
      47             :    INTERFACE init_submatrices
      48             :       MODULE PROCEDURE init_submatrices_0d
      49             :       MODULE PROCEDURE init_submatrices_1d
      50             :       MODULE PROCEDURE init_submatrices_2d
      51             :    END INTERFACE
      52             : 
      53             :    INTERFACE set_submatrices
      54             :       MODULE PROCEDURE set_submatrix_array
      55             :       MODULE PROCEDURE set_submatrix
      56             :    END INTERFACE
      57             : 
      58             :    INTERFACE copy_submatrices
      59             :       MODULE PROCEDURE copy_submatrix_array
      60             :       MODULE PROCEDURE copy_submatrix
      61             :    END INTERFACE
      62             : 
      63             :    INTERFACE release_submatrices
      64             :       MODULE PROCEDURE release_submatrix_array
      65             :       MODULE PROCEDURE release_submatrix
      66             :    END INTERFACE
      67             : 
      68             :    INTERFACE multiply_submatrices
      69             :       MODULE PROCEDURE multiply_submatrices_once
      70             :       MODULE PROCEDURE multiply_submatrices_array
      71             :    END INTERFACE
      72             : 
      73             :    INTERFACE add_submatrices
      74             :       MODULE PROCEDURE add_submatrices_once
      75             :       MODULE PROCEDURE add_submatrices_array
      76             :    END INTERFACE
      77             : 
      78             : CONTAINS
      79             : 
      80             : ! **************************************************************************************************
      81             : !> \brief ...
      82             : !> \param subm ...
      83             : ! **************************************************************************************************
      84           0 :    SUBROUTINE init_submatrices_0d(subm)
      85             : 
      86             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: subm
      87             : 
      88           0 :       subm%domain = -1
      89           0 :       subm%nbrows = -1
      90           0 :       subm%nbcols = -1
      91           0 :       subm%nrows = -1
      92           0 :       subm%ncols = -1
      93           0 :       subm%nnodes = -1
      94           0 :       subm%group = mp_comm_null
      95             : 
      96           0 :    END SUBROUTINE init_submatrices_0d
      97             : 
      98             : ! **************************************************************************************************
      99             : !> \brief ...
     100             : !> \param subm ...
     101             : ! **************************************************************************************************
     102        7968 :    SUBROUTINE init_submatrices_1d(subm)
     103             : 
     104             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     105             :          INTENT(INOUT)                                   :: subm
     106             : 
     107       58884 :       subm(:)%domain = -1
     108       58884 :       subm(:)%nbrows = -1
     109       58884 :       subm(:)%nbcols = -1
     110       58884 :       subm(:)%nrows = -1
     111       58884 :       subm(:)%ncols = -1
     112       58884 :       subm(:)%nnodes = -1
     113       58884 :       subm(:)%group = mp_comm_null
     114             : 
     115        7968 :    END SUBROUTINE init_submatrices_1d
     116             : 
     117             : ! **************************************************************************************************
     118             : !> \brief ...
     119             : !> \param subm ...
     120             : ! **************************************************************************************************
     121        1142 :    SUBROUTINE init_submatrices_2d(subm)
     122             : 
     123             :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
     124             :          INTENT(INOUT)                                   :: subm
     125             : 
     126       10282 :       subm(:, :)%domain = -1
     127       10282 :       subm(:, :)%nbrows = -1
     128       10282 :       subm(:, :)%nbcols = -1
     129       10282 :       subm(:, :)%nrows = -1
     130       10282 :       subm(:, :)%ncols = -1
     131       10282 :       subm(:, :)%nnodes = -1
     132       10282 :       subm(:, :)%group = mp_comm_null
     133             : 
     134        1142 :    END SUBROUTINE init_submatrices_2d
     135             : 
     136             : ! **************************************************************************************************
     137             : !> \brief ...
     138             : !> \param original ...
     139             : !> \param copy ...
     140             : !> \param copy_data ...
     141             : ! **************************************************************************************************
     142        1164 :    SUBROUTINE copy_submatrix_array(original, copy, copy_data)
     143             : 
     144             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     145             :          INTENT(IN)                                      :: original
     146             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     147             :          INTENT(INOUT)                                   :: copy
     148             :       LOGICAL, INTENT(IN)                                :: copy_data
     149             : 
     150             :       CHARACTER(len=*), PARAMETER :: routineN = 'copy_submatrix_array'
     151             : 
     152             :       INTEGER                                            :: handle, idomain, ndomains, ndomainsB
     153             : 
     154        1164 :       CALL timeset(routineN, handle)
     155             : 
     156        1164 :       ndomains = SIZE(original)
     157        1164 :       ndomainsB = SIZE(copy)
     158        1164 :       CPASSERT(ndomains .EQ. ndomainsB)
     159        8246 :       copy(:)%nnodes = original(:)%nnodes
     160        8246 :       copy(:)%group = original(:)%group
     161        8246 :       DO idomain = 1, ndomains
     162        8246 :          IF (original(idomain)%domain .GT. 0) THEN
     163        3541 :             CALL copy_submatrix(original(idomain), copy(idomain), copy_data)
     164             :          END IF
     165             :       END DO ! loop over domains
     166             : 
     167        1164 :       CALL timestop(handle)
     168             : 
     169        1164 :    END SUBROUTINE copy_submatrix_array
     170             : 
     171             : ! **************************************************************************************************
     172             : !> \brief ...
     173             : !> \param original ...
     174             : !> \param copy ...
     175             : !> \param copy_data ...
     176             : ! **************************************************************************************************
     177        4566 :    SUBROUTINE copy_submatrix(original, copy, copy_data)
     178             : 
     179             :       TYPE(domain_submatrix_type), INTENT(IN)            :: original
     180             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: copy
     181             :       LOGICAL, INTENT(IN)                                :: copy_data
     182             : 
     183             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'copy_submatrix'
     184             : 
     185             :       INTEGER                                            :: handle, icol, irow
     186             : 
     187        4566 :       CALL timeset(routineN, handle)
     188             : 
     189        4566 :       copy%domain = original%domain
     190        4566 :       copy%nnodes = original%nnodes
     191        4566 :       copy%group = original%group
     192             : 
     193        4566 :       IF (original%domain .GT. 0) THEN
     194             : 
     195        4566 :          copy%nbrows = original%nbrows
     196        4566 :          copy%nbcols = original%nbcols
     197        4566 :          copy%nrows = original%nrows
     198        4566 :          copy%ncols = original%ncols
     199             : 
     200        4566 :          IF (.NOT. ALLOCATED(copy%dbcsr_row)) THEN
     201        9906 :             ALLOCATE (copy%dbcsr_row(original%nbrows))
     202             :          ELSE
     203        1264 :             IF (SIZE(copy%dbcsr_row) .NE. SIZE(original%dbcsr_row)) THEN
     204           0 :                DEALLOCATE (copy%dbcsr_row)
     205           0 :                ALLOCATE (copy%dbcsr_row(original%nbrows))
     206             :             END IF
     207             :          END IF
     208        4566 :          IF (.NOT. ALLOCATED(copy%dbcsr_col)) THEN
     209        9906 :             ALLOCATE (copy%dbcsr_col(original%nbcols))
     210             :          ELSE
     211        1264 :             IF (SIZE(copy%dbcsr_col) .NE. SIZE(original%dbcsr_col)) THEN
     212           0 :                DEALLOCATE (copy%dbcsr_col)
     213           0 :                ALLOCATE (copy%dbcsr_col(original%nbcols))
     214             :             END IF
     215             :          END IF
     216        4566 :          IF (.NOT. ALLOCATED(copy%size_brow)) THEN
     217        9906 :             ALLOCATE (copy%size_brow(original%nbrows))
     218             :          ELSE
     219        1264 :             IF (SIZE(copy%size_brow) .NE. SIZE(original%size_brow)) THEN
     220           0 :                DEALLOCATE (copy%size_brow)
     221           0 :                ALLOCATE (copy%size_brow(original%nbrows))
     222             :             END IF
     223             :          END IF
     224        4566 :          IF (.NOT. ALLOCATED(copy%size_bcol)) THEN
     225        9906 :             ALLOCATE (copy%size_bcol(original%nbcols))
     226             :          ELSE
     227        1264 :             IF (SIZE(copy%size_bcol) .NE. SIZE(original%size_bcol)) THEN
     228           0 :                DEALLOCATE (copy%size_bcol)
     229           0 :                ALLOCATE (copy%size_bcol(original%nbcols))
     230             :             END IF
     231             :          END IF
     232             : 
     233       24024 :          DO irow = 1, original%nbrows
     234       19458 :             copy%dbcsr_row(irow) = original%dbcsr_row(irow)
     235       24024 :             copy%size_brow(irow) = original%size_brow(irow)
     236             :          END DO
     237             : 
     238       15012 :          DO icol = 1, original%nbcols
     239       10446 :             copy%dbcsr_col(icol) = original%dbcsr_col(icol)
     240       15012 :             copy%size_bcol(icol) = original%size_bcol(icol)
     241             :          END DO
     242             : 
     243        4566 :          IF (copy_data) THEN
     244        3541 :             CALL copy_submatrix_data(original%mdata, copy)
     245             :          END IF
     246             : 
     247             :       END IF ! do not copy empty submatrix
     248             : 
     249        4566 :       CALL timestop(handle)
     250             : 
     251        4566 :    END SUBROUTINE copy_submatrix
     252             : 
     253             : ! **************************************************************************************************
     254             : !> \brief ...
     255             : !> \param array ...
     256             : !> \param copy ...
     257             : ! **************************************************************************************************
     258        4566 :    SUBROUTINE copy_submatrix_data(array, copy)
     259             : 
     260             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: array
     261             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: copy
     262             : 
     263             :       CHARACTER(len=*), PARAMETER :: routineN = 'copy_submatrix_data'
     264             : 
     265             :       INTEGER                                            :: ds1, ds2, handle, ms1, ms2
     266             : 
     267        4566 :       CALL timeset(routineN, handle)
     268             : 
     269        4566 :       CPASSERT(copy%domain .GT. 0)
     270             : 
     271        4566 :       ds1 = SIZE(array, 1)
     272        4566 :       ds2 = SIZE(array, 2)
     273             : 
     274        4566 :       IF (.NOT. ALLOCATED(copy%mdata)) THEN
     275       13208 :          ALLOCATE (copy%mdata(ds1, ds2))
     276             :       ELSE
     277        1264 :          ms1 = SIZE(copy%mdata, 1)
     278        1264 :          ms2 = SIZE(copy%mdata, 2)
     279        1264 :          IF ((ds1 .NE. ms1) .OR. (ds2 .NE. ms2)) THEN
     280           0 :             DEALLOCATE (copy%mdata)
     281           0 :             ALLOCATE (copy%mdata(ds1, ds2))
     282             :          END IF
     283             :       END IF
     284             : 
     285     5710612 :       copy%mdata(:, :) = array(:, :)
     286             : 
     287        4566 :       CALL timestop(handle)
     288             : 
     289        4566 :    END SUBROUTINE copy_submatrix_data
     290             : 
     291             : ! **************************************************************************************************
     292             : !> \brief ...
     293             : !> \param submatrices ...
     294             : !> \param scalar ...
     295             : ! **************************************************************************************************
     296           0 :    SUBROUTINE set_submatrix_array(submatrices, scalar)
     297             : 
     298             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     299             :          INTENT(INOUT)                                   :: submatrices
     300             :       REAL(KIND=dp), INTENT(IN)                          :: scalar
     301             : 
     302             :       CHARACTER(len=*), PARAMETER :: routineN = 'set_submatrix_array'
     303             : 
     304             :       INTEGER                                            :: handle, idomain, ndomains
     305             : 
     306           0 :       CALL timeset(routineN, handle)
     307             : 
     308           0 :       ndomains = SIZE(submatrices)
     309           0 :       DO idomain = 1, ndomains
     310           0 :          IF (submatrices(idomain)%domain .GT. 0) THEN
     311           0 :             CALL set_submatrix(submatrices(idomain), scalar)
     312             :          END IF
     313             :       END DO ! loop over domains
     314             : 
     315           0 :       CALL timestop(handle)
     316             : 
     317           0 :    END SUBROUTINE set_submatrix_array
     318             : 
     319             : ! **************************************************************************************************
     320             : !> \brief ...
     321             : !> \param submatrix ...
     322             : !> \param scalar ...
     323             : ! **************************************************************************************************
     324           0 :    SUBROUTINE set_submatrix(submatrix, scalar)
     325             : 
     326             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: submatrix
     327             :       REAL(KIND=dp), INTENT(IN)                          :: scalar
     328             : 
     329             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'set_submatrix'
     330             : 
     331             :       INTEGER                                            :: ds1, ds2, handle, ms1, ms2
     332             : 
     333           0 :       CALL timeset(routineN, handle)
     334             : 
     335           0 :       CPASSERT(submatrix%domain .GT. 0)
     336           0 :       CPASSERT(submatrix%nrows .GT. 0)
     337           0 :       CPASSERT(submatrix%ncols .GT. 0)
     338             : 
     339           0 :       ds1 = submatrix%nrows
     340           0 :       ds2 = submatrix%ncols
     341             : 
     342           0 :       IF (.NOT. ALLOCATED(submatrix%mdata)) THEN
     343           0 :          ALLOCATE (submatrix%mdata(ds1, ds2))
     344             :       ELSE
     345           0 :          ms1 = SIZE(submatrix%mdata, 1)
     346           0 :          ms2 = SIZE(submatrix%mdata, 2)
     347           0 :          IF ((ds1 .NE. ms1) .OR. (ds2 .NE. ms2)) THEN
     348           0 :             DEALLOCATE (submatrix%mdata)
     349           0 :             ALLOCATE (submatrix%mdata(ds1, ds2))
     350             :          END IF
     351             :       END IF
     352             : 
     353           0 :       submatrix%mdata(:, :) = scalar
     354             : 
     355           0 :       CALL timestop(handle)
     356             : 
     357           0 :    END SUBROUTINE set_submatrix
     358             : 
     359             : ! **************************************************************************************************
     360             : !> \brief ...
     361             : !> \param subm ...
     362             : ! **************************************************************************************************
     363       12118 :    SUBROUTINE release_submatrix_array(subm)
     364             : 
     365             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     366             :          INTENT(INOUT)                                   :: subm
     367             : 
     368             :       CHARACTER(len=*), PARAMETER :: routineN = 'release_submatrix_array'
     369             : 
     370             :       INTEGER                                            :: handle, idomain, ndomains
     371             : 
     372       12118 :       CALL timeset(routineN, handle)
     373             : 
     374       12118 :       ndomains = SIZE(subm)
     375       90314 :       DO idomain = 1, ndomains
     376       90314 :          CALL release_submatrix(subm(idomain))
     377             :       END DO ! loop over domains
     378             : 
     379       12118 :       CALL timestop(handle)
     380             : 
     381       12118 :    END SUBROUTINE release_submatrix_array
     382             : 
     383             : ! **************************************************************************************************
     384             : !> \brief ...
     385             : !> \param subm ...
     386             : ! **************************************************************************************************
     387       78196 :    SUBROUTINE release_submatrix(subm)
     388             : 
     389             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: subm
     390             : 
     391             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'release_submatrix'
     392             : 
     393             :       INTEGER                                            :: handle
     394             : 
     395       78196 :       CALL timeset(routineN, handle)
     396             : 
     397       78196 :       subm%domain = -1
     398       78196 :       subm%nbrows = -1
     399       78196 :       subm%nbcols = -1
     400       78196 :       subm%nrows = -1
     401       78196 :       subm%ncols = -1
     402       78196 :       subm%nnodes = -1
     403       78196 :       subm%group = mp_comm_null
     404             : 
     405       78196 :       IF (ALLOCATED(subm%dbcsr_row)) THEN
     406       20999 :          DEALLOCATE (subm%dbcsr_row)
     407             :       END IF
     408       78196 :       IF (ALLOCATED(subm%dbcsr_col)) THEN
     409       20999 :          DEALLOCATE (subm%dbcsr_col)
     410             :       END IF
     411       78196 :       IF (ALLOCATED(subm%size_brow)) THEN
     412       20999 :          DEALLOCATE (subm%size_brow)
     413             :       END IF
     414       78196 :       IF (ALLOCATED(subm%size_bcol)) THEN
     415       20999 :          DEALLOCATE (subm%size_bcol)
     416             :       END IF
     417       78196 :       IF (ALLOCATED(subm%mdata)) THEN
     418       21004 :          DEALLOCATE (subm%mdata)
     419             :       END IF
     420             : 
     421       78196 :       CALL timestop(handle)
     422             : 
     423       78196 :    END SUBROUTINE release_submatrix
     424             : 
     425             :    ! more complex routine might be necessary if submatrices are distributed
     426             : ! **************************************************************************************************
     427             : !> \brief ...
     428             : !> \param transA ...
     429             : !> \param transB ...
     430             : !> \param alpha ...
     431             : !> \param A ...
     432             : !> \param B ...
     433             : !> \param beta ...
     434             : !> \param C ...
     435             : ! **************************************************************************************************
     436       10733 :    SUBROUTINE multiply_submatrices_once(transA, transB, alpha, A, B, beta, C)
     437             : 
     438             :       CHARACTER, INTENT(IN)                              :: transA, transB
     439             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     440             :       TYPE(domain_submatrix_type), INTENT(IN)            :: A, B
     441             :       REAL(KIND=dp), INTENT(IN)                          :: beta
     442             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: C
     443             : 
     444             :       CHARACTER(len=*), PARAMETER :: routineN = 'multiply_submatrices_once'
     445             : 
     446             :       INTEGER                                            :: cs1, cs2, handle, icol, irow, K, K1, &
     447             :                                                             LDA, LDB, LDC, M, Mblocks, N, Nblocks
     448             :       LOGICAL                                            :: NOTA, NOTB
     449             : 
     450       10733 :       CALL timeset(routineN, handle)
     451             : 
     452       10733 :       CPASSERT(A%domain .GT. 0)
     453       10733 :       CPASSERT(B%domain .GT. 0)
     454       10733 :       CPASSERT(C%domain .GT. 0)
     455             : 
     456       10733 :       LDA = SIZE(A%mdata, 1)
     457       10733 :       LDB = SIZE(B%mdata, 1)
     458             : 
     459       10733 :       NOTA = (transA .EQ. 'N') .OR. (transA .EQ. 'n')
     460       10733 :       NOTB = (transB .EQ. 'N') .OR. (transB .EQ. 'n')
     461             : 
     462       10733 :       IF (NOTA) THEN
     463       10733 :          M = A%nrows
     464       10733 :          K = A%ncols
     465       10733 :          Mblocks = A%nbrows
     466             :       ELSE
     467           0 :          M = A%ncols
     468           0 :          K = A%nrows
     469           0 :          Mblocks = A%nbcols
     470             :       END IF
     471             : 
     472       10733 :       IF (NOTB) THEN
     473       10638 :          K1 = B%nrows
     474       10638 :          N = B%ncols
     475       10638 :          Nblocks = B%nbcols
     476             :       ELSE
     477          95 :          K1 = B%ncols
     478          95 :          N = B%nrows
     479          95 :          Nblocks = B%nbrows
     480             :       END IF
     481             : 
     482             :       ! these checks are for debugging only
     483       10733 :       CPASSERT(K .EQ. K1)
     484             : 
     485             :       ! conform C matrix
     486       10733 :       C%nrows = M
     487       10733 :       C%ncols = N
     488       10733 :       C%nbrows = Mblocks
     489       10733 :       C%nbcols = Nblocks
     490       10733 :       IF (ALLOCATED(C%dbcsr_row)) THEN
     491        2723 :          DEALLOCATE (C%dbcsr_row)
     492             :       END IF
     493       32199 :       ALLOCATE (C%dbcsr_row(C%nbrows))
     494       10733 :       IF (ALLOCATED(C%dbcsr_col)) THEN
     495        2723 :          DEALLOCATE (C%dbcsr_col)
     496             :       END IF
     497       32199 :       ALLOCATE (C%dbcsr_col(C%nbcols))
     498       10733 :       IF (ALLOCATED(C%size_brow)) THEN
     499        2723 :          DEALLOCATE (C%size_brow)
     500             :       END IF
     501       32199 :       ALLOCATE (C%size_brow(C%nbrows))
     502       10733 :       IF (ALLOCATED(C%size_bcol)) THEN
     503        2723 :          DEALLOCATE (C%size_bcol)
     504             :       END IF
     505       32199 :       ALLOCATE (C%size_bcol(C%nbcols))
     506             : 
     507       57666 :       DO irow = 1, C%nbrows
     508       57666 :          IF (NOTA) THEN
     509       46933 :             C%dbcsr_row(irow) = A%dbcsr_row(irow)
     510       46933 :             C%size_brow(irow) = A%size_brow(irow)
     511             :          ELSE
     512           0 :             C%dbcsr_row(irow) = A%dbcsr_col(irow)
     513           0 :             C%size_brow(irow) = A%size_bcol(irow)
     514             :          END IF
     515             :       END DO
     516             : 
     517       22360 :       DO icol = 1, C%nbcols
     518       22360 :          IF (NOTB) THEN
     519       11234 :             C%dbcsr_col(icol) = B%dbcsr_col(icol)
     520       11234 :             C%size_bcol(icol) = B%size_bcol(icol)
     521             :          ELSE
     522         393 :             C%dbcsr_col(icol) = B%dbcsr_row(icol)
     523         393 :             C%size_bcol(icol) = B%size_brow(icol)
     524             :          END IF
     525             :       END DO
     526             : 
     527       10733 :       IF (.NOT. ALLOCATED(C%mdata)) THEN
     528             :          !!! cannot use non-zero beta if C is not allocated
     529        8010 :          CPASSERT(beta .EQ. 0.0_dp)
     530       32040 :          ALLOCATE (C%mdata(C%nrows, C%ncols))
     531             :       ELSE
     532        2723 :          cs1 = SIZE(C%mdata, 1)
     533        2723 :          cs2 = SIZE(C%mdata, 2)
     534        2723 :          IF ((C%nrows .NE. cs1) .OR. (C%ncols .NE. cs2)) THEN
     535             :             !!! cannot deallocate data if beta is non-zero
     536           0 :             CPASSERT(beta .EQ. 0.0_dp)
     537           0 :             DEALLOCATE (C%mdata)
     538           0 :             ALLOCATE (C%mdata(C%nrows, C%ncols))
     539             :          END IF
     540             :       END IF
     541             : 
     542       10733 :       LDC = C%nrows
     543             : 
     544       10733 :       CALL DGEMM(transA, transB, M, N, K, alpha, A%mdata, LDA, B%mdata, LDB, beta, C%mdata, LDC)
     545             : 
     546       10733 :       C%nnodes = A%nnodes
     547       10733 :       C%group = A%group
     548             : 
     549       10733 :       CALL timestop(handle)
     550             : 
     551       10733 :    END SUBROUTINE multiply_submatrices_once
     552             : 
     553             : ! **************************************************************************************************
     554             : !> \brief ...
     555             : !> \param transA ...
     556             : !> \param transB ...
     557             : !> \param alpha ...
     558             : !> \param A ...
     559             : !> \param B ...
     560             : !> \param beta ...
     561             : !> \param C ...
     562             : ! **************************************************************************************************
     563        3328 :    SUBROUTINE multiply_submatrices_array(transA, transB, alpha, A, B, beta, C)
     564             : 
     565             :       CHARACTER, INTENT(IN)                              :: transA, transB
     566             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     567             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     568             :          INTENT(IN)                                      :: A, B
     569             :       REAL(KIND=dp), INTENT(IN)                          :: beta
     570             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     571             :          INTENT(INOUT)                                   :: C
     572             : 
     573             :       CHARACTER(len=*), PARAMETER :: routineN = 'multiply_submatrices_array'
     574             : 
     575             :       INTEGER                                            :: handle, idomain, idomainA, idomainB, &
     576             :                                                             ndomains, ndomainsB, ndomainsC
     577             : 
     578        3328 :       CALL timeset(routineN, handle)
     579             : 
     580        3328 :       ndomains = SIZE(A)
     581        3328 :       ndomainsB = SIZE(B)
     582        3328 :       ndomainsC = SIZE(C)
     583             : 
     584        3328 :       CPASSERT(ndomains .EQ. ndomainsB)
     585        3328 :       CPASSERT(ndomainsB .EQ. ndomainsC)
     586             : 
     587       24794 :       DO idomain = 1, ndomains
     588             : 
     589       21466 :          idomainA = A(idomain)%domain
     590       21466 :          idomainB = B(idomain)%domain
     591             : 
     592       21466 :          CPASSERT(idomainA .EQ. idomainB)
     593             : 
     594       21466 :          C(idomain)%domain = idomainA
     595             : 
     596             :          ! check if the submatrix exists
     597       24794 :          IF (idomainA .GT. 0) THEN
     598       10733 :             CALL multiply_submatrices_once(transA, transB, alpha, A(idomain), B(idomain), beta, C(idomain))
     599             :          END IF ! submatrix for the domain exists
     600             : 
     601             :       END DO ! loop over domains
     602             : 
     603        3328 :       CALL timestop(handle)
     604             : 
     605        3328 :    END SUBROUTINE multiply_submatrices_array
     606             : 
     607             :    ! more complex routine might be necessary if submatrices are distributed
     608             : ! **************************************************************************************************
     609             : !> \brief ...
     610             : !> \param alpha ...
     611             : !> \param A ...
     612             : !> \param beta ...
     613             : !> \param B ...
     614             : !> \param transB ...
     615             : ! **************************************************************************************************
     616         205 :    SUBROUTINE add_submatrices_once(alpha, A, beta, B, transB)
     617             : 
     618             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     619             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: A
     620             :       REAL(KIND=dp), INTENT(IN)                          :: beta
     621             :       TYPE(domain_submatrix_type), INTENT(IN)            :: B
     622             :       CHARACTER, INTENT(IN)                              :: transB
     623             : 
     624             :       CHARACTER(len=*), PARAMETER :: routineN = 'add_submatrices_once'
     625             : 
     626             :       INTEGER                                            :: C1, C2, handle, icol, R1, R2
     627             :       LOGICAL                                            :: NOTB
     628             : 
     629         205 :       CALL timeset(routineN, handle)
     630             : 
     631         205 :       CPASSERT(A%domain .GT. 0)
     632         205 :       CPASSERT(B%domain .GT. 0)
     633             : 
     634         205 :       R1 = A%nrows
     635         205 :       C1 = A%ncols
     636             : 
     637         205 :       NOTB = (transB .EQ. 'N') .OR. (transB .EQ. 'n')
     638             : 
     639         205 :       IF (NOTB) THEN
     640         110 :          R2 = B%nrows
     641         110 :          C2 = B%ncols
     642             :       ELSE
     643          95 :          R2 = B%ncols
     644          95 :          C2 = B%nrows
     645             :       END IF
     646             : 
     647             :       ! these checks are for debugging only
     648         205 :       CPASSERT(C1 .EQ. C2)
     649         205 :       CPASSERT(R1 .EQ. R2)
     650             : 
     651         205 :       IF (NOTB) THEN
     652        7507 :          DO icol = 1, C1
     653      765846 :             A%mdata(:, icol) = alpha*A%mdata(:, icol) + beta*B%mdata(:, icol)
     654             :          END DO
     655             :       ELSE
     656        6690 :          DO icol = 1, C1
     657      701043 :             A%mdata(:, icol) = alpha*A%mdata(:, icol) + beta*B%mdata(icol, :)
     658             :          END DO
     659             :       END IF
     660             : 
     661         205 :       CALL timestop(handle)
     662             : 
     663         205 :    END SUBROUTINE add_submatrices_once
     664             : 
     665             : ! **************************************************************************************************
     666             : !> \brief ...
     667             : !> \param alpha ...
     668             : !> \param A ...
     669             : !> \param beta ...
     670             : !> \param B ...
     671             : !> \param transB ...
     672             : ! **************************************************************************************************
     673          66 :    SUBROUTINE add_submatrices_array(alpha, A, beta, B, transB)
     674             : 
     675             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     676             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     677             :          INTENT(INOUT)                                   :: A
     678             :       REAL(KIND=dp), INTENT(IN)                          :: beta
     679             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     680             :          INTENT(IN)                                      :: B
     681             :       CHARACTER, INTENT(IN)                              :: transB
     682             : 
     683             :       CHARACTER(len=*), PARAMETER :: routineN = 'add_submatrices_array'
     684             : 
     685             :       INTEGER                                            :: handle, idomain, idomainA, idomainB, &
     686             :                                                             ndomains, ndomainsB
     687             : 
     688          66 :       CALL timeset(routineN, handle)
     689             : 
     690          66 :       ndomains = SIZE(A)
     691          66 :       ndomainsB = SIZE(B)
     692             : 
     693          66 :       CPASSERT(ndomains .EQ. ndomainsB)
     694             : 
     695         476 :       DO idomain = 1, ndomains
     696             : 
     697         410 :          idomainA = A(idomain)%domain
     698         410 :          idomainB = B(idomain)%domain
     699             : 
     700         410 :          CPASSERT(idomainA .EQ. idomainB)
     701             : 
     702             :          ! check if the submatrix exists
     703         476 :          IF (idomainA .GT. 0) THEN
     704         205 :             CALL add_submatrices_once(alpha, A(idomain), beta, B(idomain), transB)
     705             :          END IF ! submatrix for the domain exists
     706             : 
     707             :       END DO ! loop over domains
     708             : 
     709          66 :       CALL timestop(handle)
     710             : 
     711          66 :    END SUBROUTINE add_submatrices_array
     712             : 
     713             : ! **************************************************************************************************
     714             : !> \brief Computes the max norm of the collection of submatrices
     715             : !> \param submatrices ...
     716             : !> \param norm ...
     717             : !> \par History
     718             : !>       2013.03 created [Rustam Z. Khaliullin]
     719             : !> \author Rustam Z. Khaliullin
     720             : ! **************************************************************************************************
     721           2 :    SUBROUTINE maxnorm_submatrices(submatrices, norm)
     722             : 
     723             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     724             :          INTENT(IN)                                      :: submatrices
     725             :       REAL(KIND=dp), INTENT(OUT)                         :: norm
     726             : 
     727             :       CHARACTER(len=*), PARAMETER :: routineN = 'maxnorm_submatrices'
     728             : 
     729             :       INTEGER                                            :: handle, idomain, ndomains
     730             :       REAL(KIND=dp)                                      :: curr_norm, send_norm
     731           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_norm
     732             : 
     733           2 :       CALL timeset(routineN, handle)
     734             : 
     735           2 :       send_norm = 0.0_dp
     736             : 
     737           2 :       ndomains = SIZE(submatrices)
     738             : 
     739          12 :       DO idomain = 1, ndomains
     740             : 
     741             :          ! check if the submatrix is local
     742          12 :          IF (submatrices(idomain)%domain .GT. 0) THEN
     743       31607 :             curr_norm = MAXVAL(ABS(submatrices(idomain)%mdata))
     744           5 :             IF (curr_norm .GT. send_norm) send_norm = curr_norm
     745             :          END IF
     746             : 
     747             :       END DO ! loop over domains
     748             : 
     749             :       ! communicate local norm to the other nodes
     750           6 :       ALLOCATE (recv_norm(submatrices(1)%nnodes))
     751           2 :       CALL submatrices(1)%group%allgather(send_norm, recv_norm)
     752             : 
     753           8 :       norm = MAXVAL(recv_norm)
     754             : 
     755           2 :       DEALLOCATE (recv_norm)
     756             : 
     757           2 :       CALL timestop(handle)
     758             : 
     759           2 :    END SUBROUTINE maxnorm_submatrices
     760             : 
     761             : ! **************************************************************************************************
     762             : !> \brief Computes the sum of traces of the submatrix A.tr(B)
     763             : !> \param A ...
     764             : !> \param B ...
     765             : !> \param trace ...
     766             : !> \par History
     767             : !>       2013.03 created [Rustam Z. Khaliullin]
     768             : !> \author Rustam Z. Khaliullin
     769             : ! **************************************************************************************************
     770           0 :    SUBROUTINE trace_submatrices(A, B, trace)
     771             : 
     772             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     773             :          INTENT(IN)                                      :: A, B
     774             :       REAL(KIND=dp), INTENT(OUT)                         :: trace
     775             : 
     776             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'trace_submatrices'
     777             : 
     778             :       INTEGER                                            :: domainA, domainB, handle, idomain, &
     779             :                                                             ndomainsA, ndomainsB
     780             :       REAL(KIND=dp)                                      :: curr_trace, send_trace
     781           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_trace
     782             : 
     783           0 :       CALL timeset(routineN, handle)
     784             : 
     785           0 :       send_trace = 0.0_dp
     786             : 
     787           0 :       ndomainsA = SIZE(A)
     788           0 :       ndomainsB = SIZE(B)
     789             : 
     790           0 :       CPASSERT(ndomainsA .EQ. ndomainsB)
     791             : 
     792           0 :       DO idomain = 1, ndomainsA
     793             : 
     794           0 :          domainA = A(idomain)%domain
     795           0 :          domainB = B(idomain)%domain
     796             : 
     797           0 :          CPASSERT(domainA .EQ. domainB)
     798             : 
     799             :          ! check if the submatrix is local
     800           0 :          IF (domainA .GT. 0) THEN
     801             : 
     802           0 :             CPASSERT(A(idomain)%nrows .EQ. B(idomain)%nrows)
     803           0 :             CPASSERT(A(idomain)%ncols .EQ. B(idomain)%ncols)
     804             : 
     805           0 :             curr_trace = SUM(A(idomain)%mdata(:, :)*B(idomain)%mdata(:, :))
     806           0 :             send_trace = send_trace + curr_trace
     807             : 
     808             :          END IF
     809             : 
     810             :       END DO ! loop over domains
     811             : 
     812             :       ! communicate local norm to the other nodes
     813           0 :       ALLOCATE (recv_trace(A(1)%nnodes))
     814           0 :       CALL A(1)%group%allgather(send_trace, recv_trace)
     815             : 
     816           0 :       trace = SUM(recv_trace)
     817             : 
     818           0 :       DEALLOCATE (recv_trace)
     819             : 
     820           0 :       CALL timestop(handle)
     821             : 
     822           0 :    END SUBROUTINE trace_submatrices
     823             : 
     824             : ! **************************************************************************************************
     825             : !> \brief Constructs submatrices for each ALMO domain by collecting distributed
     826             : !>        DBCSR blocks to local arrays
     827             : !> \param matrix ...
     828             : !> \param submatrix ...
     829             : !> \param distr_pattern ...
     830             : !> \param domain_map ...
     831             : !> \param node_of_domain ...
     832             : !> \param job_type ...
     833             : !> \par History
     834             : !>       2013.01 created [Rustam Z. Khaliullin]
     835             : !> \author Rustam Z. Khaliullin
     836             : ! **************************************************************************************************
     837        9240 :    SUBROUTINE construct_submatrices(matrix, submatrix, distr_pattern, domain_map, &
     838        3080 :                                     node_of_domain, job_type)
     839             : 
     840             :       TYPE(dbcsr_type), INTENT(IN)                       :: matrix
     841             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     842             :          INTENT(INOUT)                                   :: submatrix
     843             :       TYPE(dbcsr_type), INTENT(IN)                       :: distr_pattern
     844             :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
     845             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: node_of_domain
     846             :       INTEGER, INTENT(IN)                                :: job_type
     847             : 
     848             :       CHARACTER(len=*), PARAMETER :: routineN = 'construct_submatrices'
     849             : 
     850             :       CHARACTER                                          :: matrix_type
     851             :       INTEGER :: block_node, block_offset, col, col_offset, col_size, dest_node, GroupID, handle, &
     852             :          iBlock, icol, idomain, index_col, index_ec, index_er, index_row, index_sc, index_sr, &
     853             :          iNode, ldesc, myNode, nblkcols_tot, nblkrows_tot, ndomains, ndomains2, nNodes, &
     854             :          recv_size2_total, recv_size_total, row, row_size, send_size2_total, send_size_total, &
     855             :          smcol, smrow, start_data
     856        3080 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: first_col, first_row, offset2_block, offset_block, &
     857        3080 :          recv_data2, recv_offset2_cpu, recv_offset_cpu, recv_size2_cpu, recv_size_cpu, send_data2, &
     858        3080 :          send_offset2_cpu, send_offset_cpu, send_size2_cpu, send_size_cpu
     859             :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: recv_descriptor, send_descriptor
     860        3080 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, row_blk_size
     861             :       LOGICAL                                            :: found, transp
     862             :       REAL(KIND=dp)                                      :: antifactor
     863        3080 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_data, send_data
     864        3080 :       REAL(KIND=dp), DIMENSION(:), POINTER               :: block_p
     865             :       TYPE(dbcsr_distribution_type)                      :: pattern_dist
     866             :       TYPE(mp_comm_type)                                 :: group
     867             : 
     868             : !INTEGER, PARAMETER                         :: select_row_col = 1
     869             : !INTEGER, PARAMETER                         :: select_row = 2
     870             : !                                                  subm_row_size,&
     871             : !                                                  subm_col_size,&
     872             : 
     873        3080 :       CALL timeset(routineN, handle)
     874             : 
     875        3080 :       nblkrows_tot = dbcsr_nblkrows_total(matrix)
     876        3080 :       nblkcols_tot = dbcsr_nblkcols_total(matrix)
     877             : 
     878        3080 :       ndomains = nblkcols_tot ! RZK-warning not true for atomic distributions
     879        3080 :       CALL dbcsr_get_info(distr_pattern, distribution=pattern_dist)
     880        3080 :       CALL dbcsr_distribution_get(pattern_dist, numnodes=nNodes, group=GroupID, mynode=myNode)
     881             : 
     882        3080 :       CALL group%set_handle(groupid)
     883             : 
     884        3080 :       matrix_type = dbcsr_get_matrix_type(matrix)
     885             : 
     886        3080 :       ldesc = 2
     887        9240 :       ALLOCATE (send_descriptor(ldesc, nNodes))
     888        9240 :       ALLOCATE (recv_descriptor(ldesc, nNodes))
     889       21560 :       send_descriptor(:, :) = 0
     890             : 
     891             :       ! find: the number of blocks and their sizes that must be sent to each cpu
     892             :       ! loop over all domains
     893       22454 :       DO idomain = 1, ndomains
     894             : 
     895       19374 :          dest_node = node_of_domain(idomain)
     896             : 
     897             :          ! loop over those rows that have non-zero quencher
     898       19374 :          index_sr = 1 ! index start row
     899       19374 :          IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
     900       19374 :          index_er = domain_map%index1(idomain) - 1 ! index end row
     901             : 
     902      105464 :          DO index_row = index_sr, index_er
     903             : 
     904       83010 :             row = domain_map%pairs(index_row, 1)
     905             : 
     906       83010 :             IF (job_type == select_row_col) THEN
     907             :                ! loop over those columns that have non-zero quencher
     908       14602 :                index_sc = 1 ! index start col
     909       14602 :                IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
     910       14602 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
     911             :             ELSE
     912             :                ! fake loop
     913             :                index_sc = 1 ! index start col
     914             :                index_ec = 1 ! index end col
     915             :             END IF
     916             : 
     917      233570 :             DO index_col = index_sc, index_ec
     918             : 
     919      131186 :                IF (job_type == select_row_col) THEN
     920       62778 :                   col = domain_map%pairs(index_col, 1)
     921             :                ELSE
     922       68408 :                   col = idomain
     923             :                END IF
     924             : 
     925      131186 :                transp = .FALSE.
     926             :                CALL dbcsr_get_stored_coordinates(matrix, &
     927      131186 :                                                  row, col, block_node)
     928      214196 :                IF (block_node .EQ. myNode) THEN
     929       65593 :                   CALL dbcsr_get_block_p(matrix, row, col, block_p, found, row_size, col_size)
     930       65593 :                   IF (found) THEN
     931       50286 :                      send_descriptor(1, dest_node + 1) = send_descriptor(1, dest_node + 1) + 1
     932             :                      send_descriptor(2, dest_node + 1) = send_descriptor(2, dest_node + 1) + &
     933       50286 :                                                          row_size*col_size
     934             :                   END IF
     935             :                END IF
     936             : 
     937             :             END DO ! loop over columns
     938             : 
     939             :          END DO ! loop over rows
     940             : 
     941             :       END DO
     942             : 
     943             :       ! simple but quadratically scaling procedure
     944             :       ! loop over local blocks
     945             :       !CALL dbcsr_iterator_start(iter,matrix)
     946             :       !DO WHILE (dbcsr_iterator_blocks_left(iter))
     947             :       !   CALL dbcsr_iterator_next_block(iter,row,col,data_p,&
     948             :       !           row_size=row_size,col_size=col_size)
     949             :       !   DO idomain = 1, ndomains
     950             :       !      IF (job_type==select_row_col) THEN
     951             :       !         domain_needs_block=(qblk_exists(domain_map,col,idomain)&
     952             :       !            .AND.qblk_exists(domain_map,row,idomain))
     953             :       !      ELSE
     954             :       !         domain_needs_block=(idomain==col&
     955             :       !            .AND.qblk_exists(domain_map,row,idomain))
     956             :       !      ENDIF
     957             :       !      IF (domain_needs_block) THEN
     958             :       !         transp=.FALSE.
     959             :       !         dest_node=node_of_domain(idomain)
     960             :       !         !CALL dbcsr_get_stored_coordinates(distr_pattern,&
     961             :       !         !        idomain, idomain, transp, dest_node)
     962             :       !         send_descriptor(1,dest_node+1)=send_descriptor(1,dest_node+1)+1
     963             :       !         send_descriptor(2,dest_node+1)=send_descriptor(2,dest_node+1)+&
     964             :       !            row_size*col_size
     965             :       !      ENDIF
     966             :       !   ENDDO
     967             :       !ENDDO
     968             :       !CALL dbcsr_iterator_stop(iter)
     969             : 
     970             :       ! communicate number of blocks and their sizes to the other nodes
     971        3080 :       CALL group%alltoall(send_descriptor, recv_descriptor, ldesc)
     972             : 
     973       15400 :       ALLOCATE (send_size_cpu(nNodes), send_offset_cpu(nNodes))
     974        3080 :       send_offset_cpu(1) = 0
     975        3080 :       send_size_cpu(1) = send_descriptor(2, 1)
     976        6160 :       DO iNode = 2, nNodes
     977        3080 :          send_size_cpu(iNode) = send_descriptor(2, iNode)
     978             :          send_offset_cpu(iNode) = send_offset_cpu(iNode - 1) + &
     979        6160 :                                   send_size_cpu(iNode - 1)
     980             :       END DO
     981        3080 :       send_size_total = send_offset_cpu(nNodes) + send_size_cpu(nNodes)
     982             : 
     983       15400 :       ALLOCATE (recv_size_cpu(nNodes), recv_offset_cpu(nNodes))
     984        3080 :       recv_offset_cpu(1) = 0
     985        3080 :       recv_size_cpu(1) = recv_descriptor(2, 1)
     986        6160 :       DO iNode = 2, nNodes
     987        3080 :          recv_size_cpu(iNode) = recv_descriptor(2, iNode)
     988             :          recv_offset_cpu(iNode) = recv_offset_cpu(iNode - 1) + &
     989        6160 :                                   recv_size_cpu(iNode - 1)
     990             :       END DO
     991        3080 :       recv_size_total = recv_offset_cpu(nNodes) + recv_size_cpu(nNodes)
     992             : 
     993       15400 :       ALLOCATE (send_size2_cpu(nNodes), send_offset2_cpu(nNodes))
     994        3080 :       send_offset2_cpu(1) = 0
     995        3080 :       send_size2_cpu(1) = 2*send_descriptor(1, 1)
     996        6160 :       DO iNode = 2, nNodes
     997        3080 :          send_size2_cpu(iNode) = 2*send_descriptor(1, iNode)
     998             :          send_offset2_cpu(iNode) = send_offset2_cpu(iNode - 1) + &
     999        6160 :                                    send_size2_cpu(iNode - 1)
    1000             :       END DO
    1001        3080 :       send_size2_total = send_offset2_cpu(nNodes) + send_size2_cpu(nNodes)
    1002             : 
    1003       15400 :       ALLOCATE (recv_size2_cpu(nNodes), recv_offset2_cpu(nNodes))
    1004        3080 :       recv_offset2_cpu(1) = 0
    1005        3080 :       recv_size2_cpu(1) = 2*recv_descriptor(1, 1)
    1006        6160 :       DO iNode = 2, nNodes
    1007        3080 :          recv_size2_cpu(iNode) = 2*recv_descriptor(1, iNode)
    1008             :          recv_offset2_cpu(iNode) = recv_offset2_cpu(iNode - 1) + &
    1009        6160 :                                    recv_size2_cpu(iNode - 1)
    1010             :       END DO
    1011        3080 :       recv_size2_total = recv_offset2_cpu(nNodes) + recv_size2_cpu(nNodes)
    1012             : 
    1013        3080 :       DEALLOCATE (send_descriptor)
    1014        3080 :       DEALLOCATE (recv_descriptor)
    1015             : 
    1016             :       ! collect data from the matrix into send_data
    1017        9182 :       ALLOCATE (send_data(send_size_total))
    1018        9220 :       ALLOCATE (recv_data(recv_size_total))
    1019        9182 :       ALLOCATE (send_data2(send_size2_total))
    1020        9220 :       ALLOCATE (recv_data2(recv_size2_total))
    1021        9240 :       ALLOCATE (offset_block(nNodes))
    1022        9240 :       ALLOCATE (offset2_block(nNodes))
    1023        9240 :       offset_block(:) = 0
    1024        9240 :       offset2_block(:) = 0
    1025             :       ! loop over all domains
    1026       22454 :       DO idomain = 1, ndomains
    1027             : 
    1028       19374 :          dest_node = node_of_domain(idomain)
    1029             : 
    1030             :          ! loop over those rows that have non-zero quencher
    1031       19374 :          index_sr = 1 ! index start row
    1032       19374 :          IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
    1033       19374 :          index_er = domain_map%index1(idomain) - 1 ! index end row
    1034             : 
    1035      105464 :          DO index_row = index_sr, index_er
    1036             : 
    1037       83010 :             row = domain_map%pairs(index_row, 1)
    1038             : 
    1039       83010 :             IF (job_type == select_row_col) THEN
    1040             :                ! loop over those columns that have non-zero quencher
    1041       14602 :                index_sc = 1 ! index start col
    1042       14602 :                IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
    1043       14602 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
    1044             :             ELSE
    1045             :                ! fake loop
    1046             :                index_sc = 1 ! index start col
    1047             :                index_ec = 1 ! index end col
    1048             :             END IF
    1049             : 
    1050      233570 :             DO index_col = index_sc, index_ec
    1051             : 
    1052      131186 :                IF (job_type == select_row_col) THEN
    1053       62778 :                   col = domain_map%pairs(index_col, 1)
    1054             :                ELSE
    1055       68408 :                   col = idomain
    1056             :                END IF
    1057             : 
    1058      131186 :                transp = .FALSE.
    1059             :                CALL dbcsr_get_stored_coordinates(matrix, &
    1060      131186 :                                                  row, col, block_node)
    1061      214196 :                IF (block_node .EQ. myNode) THEN
    1062       65593 :                   CALL dbcsr_get_block_p(matrix, row, col, block_p, found, row_size, col_size)
    1063       65593 :                   IF (found) THEN
    1064             :                      !col_offset=0
    1065             :                      !DO icol=1,col_size
    1066             :                      !   start_data=send_offset_cpu(dest_node+1)+&
    1067             :                      !              offset_block(dest_node+1)+&
    1068             :                      !              col_offset
    1069             :                      !   send_data(start_data+1:start_data+row_size)=&
    1070             :                      !      data_p(1:row_size,icol)
    1071             :                      !   col_offset=col_offset+row_size
    1072             :                      !ENDDO
    1073       50286 :                      col_offset = row_size*col_size
    1074             :                      start_data = send_offset_cpu(dest_node + 1) + &
    1075       50286 :                                   offset_block(dest_node + 1)
    1076             :                      send_data(start_data + 1:start_data + col_offset) = &
    1077     5618163 :                         block_p(1:col_offset)
    1078       50286 :                      offset_block(dest_node + 1) = offset_block(dest_node + 1) + col_offset
    1079             :                      ! fill out row,col information
    1080             :                      send_data2(send_offset2_cpu(dest_node + 1) + &
    1081       50286 :                                 offset2_block(dest_node + 1) + 1) = row
    1082             :                      send_data2(send_offset2_cpu(dest_node + 1) + &
    1083       50286 :                                 offset2_block(dest_node + 1) + 2) = col
    1084       50286 :                      offset2_block(dest_node + 1) = offset2_block(dest_node + 1) + 2
    1085             :                   END IF
    1086             :                END IF
    1087             : 
    1088             :             END DO ! loop over columns
    1089             : 
    1090             :          END DO ! loop over rows
    1091             : 
    1092             :       END DO
    1093             :       ! more simple but quadratically scaling version
    1094             :       !CALL dbcsr_iterator_start(iter,matrix)
    1095             :       !DO WHILE (dbcsr_iterator_blocks_left(iter))
    1096             :       !   CALL dbcsr_iterator_next_block(iter,row,col,data_p,&
    1097             :       !           row_size=row_size,col_size=col_size)
    1098             :       !   DO idomain = 1, ndomains
    1099             :       !      IF (job_type==select_row_col) THEN
    1100             :       !         domain_needs_block=(qblk_exists(domain_map,col,idomain)&
    1101             :       !            .AND.qblk_exists(domain_map,row,idomain))
    1102             :       !      ELSE
    1103             :       !         domain_needs_block=(idomain==col&
    1104             :       !            .AND.qblk_exists(domain_map,row,idomain))
    1105             :       !      ENDIF
    1106             :       !      IF (domain_needs_block) THEN
    1107             :       !         transp=.FALSE.
    1108             :       !         dest_node=node_of_domain(idomain)
    1109             :       !         !CALL dbcsr_get_stored_coordinates(distr_pattern,&
    1110             :       !         !        idomain, idomain, transp, dest_node)
    1111             :       !         ! place the data appropriately
    1112             :       !         col_offset=0
    1113             :       !         DO icol=1,col_size
    1114             :       !            start_data=send_offset_cpu(dest_node+1)+&
    1115             :       !                       offset_block(dest_node+1)+&
    1116             :       !                       col_offset
    1117             :       !            send_data(start_data+1:start_data+row_size)=&
    1118             :       !               data_p(1:row_size,icol)
    1119             :       !            col_offset=col_offset+row_size
    1120             :       !         ENDDO
    1121             :       !         offset_block(dest_node+1)=offset_block(dest_node+1)+col_size*row_size
    1122             :       !         ! fill out row,col information
    1123             :       !         send_data2(send_offset2_cpu(dest_node+1)+&
    1124             :       !               offset2_block(dest_node+1)+1)=row
    1125             :       !         send_data2(send_offset2_cpu(dest_node+1)+&
    1126             :       !               offset2_block(dest_node+1)+2)=col
    1127             :       !         offset2_block(dest_node+1)=offset2_block(dest_node+1)+2
    1128             :       !      ENDIF
    1129             :       !   ENDDO
    1130             :       !ENDDO
    1131             :       !CALL dbcsr_iterator_stop(iter)
    1132             : 
    1133             :       ! send-receive all blocks
    1134             :       CALL group%alltoall(send_data, send_size_cpu, send_offset_cpu, &
    1135        3080 :                           recv_data, recv_size_cpu, recv_offset_cpu)
    1136             :       ! send-receive rows and cols of the blocks
    1137             :       CALL group%alltoall(send_data2, send_size2_cpu, send_offset2_cpu, &
    1138        3080 :                           recv_data2, recv_size2_cpu, recv_offset2_cpu)
    1139             : 
    1140        3080 :       DEALLOCATE (send_size_cpu, send_offset_cpu)
    1141        3080 :       DEALLOCATE (send_size2_cpu, send_offset2_cpu)
    1142        3080 :       DEALLOCATE (send_data)
    1143        3080 :       DEALLOCATE (send_data2)
    1144        3080 :       DEALLOCATE (offset_block)
    1145        3080 :       DEALLOCATE (offset2_block)
    1146             : 
    1147             :       ! copy blocks into submatrices
    1148        3080 :       CALL dbcsr_get_info(matrix, col_blk_size=col_blk_size, row_blk_size=row_blk_size)
    1149             : !    ALLOCATE(subm_row_size(ndomains),subm_col_size(ndomains))
    1150             : !    subm_row_size(:)=0
    1151             : !    subm_col_size(:)=0
    1152        3080 :       ndomains2 = SIZE(submatrix)
    1153        3080 :       IF (ndomains2 .NE. ndomains) THEN
    1154           0 :          CPABORT("wrong submatrix size")
    1155             :       END IF
    1156        3080 :       CALL release_submatrices(submatrix)
    1157       22454 :       submatrix(:)%nnodes = nNodes
    1158       22454 :       submatrix(:)%group = Group
    1159       22454 :       submatrix(:)%nrows = 0
    1160       22454 :       submatrix(:)%ncols = 0
    1161             : 
    1162       15400 :       ALLOCATE (first_row(nblkrows_tot), first_col(nblkcols_tot))
    1163       22454 :       submatrix(:)%domain = -1
    1164       22454 :       DO idomain = 1, ndomains
    1165       19374 :          dest_node = node_of_domain(idomain)
    1166             :          !transp=.FALSE.
    1167             :          !CALL dbcsr_get_stored_coordinates(distr_pattern,&
    1168             :          !        idomain, idomain, transp, dest_node)
    1169       22454 :          IF (dest_node .EQ. mynode) THEN
    1170        9687 :             submatrix(idomain)%domain = idomain
    1171        9687 :             submatrix(idomain)%nbrows = 0
    1172        9687 :             submatrix(idomain)%nbcols = 0
    1173             : 
    1174             :             ! loop over those rows that have non-zero quencher
    1175       90582 :             first_row(:) = -1
    1176        9687 :             index_sr = 1 ! index start row
    1177        9687 :             IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
    1178        9687 :             index_er = domain_map%index1(idomain) - 1 ! index end row
    1179       51192 :             DO index_row = index_sr, index_er
    1180       41505 :                row = domain_map%pairs(index_row, 1)
    1181             :                !DO row = 1, nblkrows_tot
    1182             :                !   IF (qblk_exists(domain_map,row,idomain)) THEN
    1183       41505 :                first_row(row) = submatrix(idomain)%nrows + 1
    1184       41505 :                submatrix(idomain)%nrows = submatrix(idomain)%nrows + row_blk_size(row)
    1185       51192 :                submatrix(idomain)%nbrows = submatrix(idomain)%nbrows + 1
    1186             :                !   ENDIF
    1187             :             END DO
    1188       29061 :             ALLOCATE (submatrix(idomain)%dbcsr_row(submatrix(idomain)%nbrows))
    1189       29061 :             ALLOCATE (submatrix(idomain)%size_brow(submatrix(idomain)%nbrows))
    1190        9687 :             smrow = 1
    1191             :             ! again loop over those rows that have non-zero quencher
    1192        9687 :             index_sr = 1 ! index start row
    1193        9687 :             IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
    1194        9687 :             index_er = domain_map%index1(idomain) - 1 ! index end row
    1195       51192 :             DO index_row = index_sr, index_er
    1196       41505 :                row = domain_map%pairs(index_row, 1)
    1197             :                !DO row = 1, nblkrows_tot
    1198             :                !   IF (first_row(row).ne.-1) THEN
    1199       41505 :                submatrix(idomain)%dbcsr_row(smrow) = row
    1200       41505 :                submatrix(idomain)%size_brow(smrow) = row_blk_size(row)
    1201       51192 :                smrow = smrow + 1
    1202             :                !   ENDIF
    1203             :             END DO
    1204             : 
    1205             :             ! loop over the necessary columns
    1206       90582 :             first_col(:) = -1
    1207        9687 :             IF (job_type == select_row_col) THEN
    1208             :                ! loop over those columns that have non-zero quencher
    1209        1837 :                index_sc = 1 ! index start col
    1210        1837 :                IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
    1211        1837 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
    1212             :             ELSE
    1213             :                ! fake loop
    1214             :                index_sc = 1 ! index start col
    1215             :                index_ec = 1 ! index end col
    1216             :             END IF
    1217       24838 :             DO index_col = index_sc, index_ec
    1218       15151 :                IF (job_type == select_row_col) THEN
    1219        7301 :                   col = domain_map%pairs(index_col, 1)
    1220             :                ELSE
    1221        7850 :                   col = idomain
    1222             :                END IF
    1223             :                !DO col = 1, nblkcols_tot
    1224             :                !   IF (job_type==select_row_col) THEN
    1225             :                !      domain_needs_block=(qblk_exists(domain_map,col,idomain))
    1226             :                !   ELSE
    1227             :                !      domain_needs_block=(col==idomain) ! RZK-warning col belongs to the domain
    1228             :                !   ENDIF
    1229             :                !   IF (domain_needs_block) THEN
    1230       15151 :                first_col(col) = submatrix(idomain)%ncols + 1
    1231       15151 :                submatrix(idomain)%ncols = submatrix(idomain)%ncols + col_blk_size(col)
    1232       24838 :                submatrix(idomain)%nbcols = submatrix(idomain)%nbcols + 1
    1233             :                !   ENDIF
    1234             :             END DO
    1235             : 
    1236       29061 :             ALLOCATE (submatrix(idomain)%dbcsr_col(submatrix(idomain)%nbcols))
    1237       29061 :             ALLOCATE (submatrix(idomain)%size_bcol(submatrix(idomain)%nbcols))
    1238             : 
    1239             :             ! loop over the necessary columns again
    1240        9687 :             smcol = 1
    1241        9687 :             IF (job_type == select_row_col) THEN
    1242             :                ! loop over those columns that have non-zero quencher
    1243        1837 :                index_sc = 1 ! index start col
    1244        1837 :                IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
    1245        1837 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
    1246             :             ELSE
    1247             :                ! fake loop
    1248             :                index_sc = 1 ! index start col
    1249             :                index_ec = 1 ! index end col
    1250             :             END IF
    1251       24838 :             DO index_col = index_sc, index_ec
    1252       15151 :                IF (job_type == select_row_col) THEN
    1253        7301 :                   col = domain_map%pairs(index_col, 1)
    1254             :                ELSE
    1255        7850 :                   col = idomain
    1256             :                END IF
    1257             :                !DO col = 1, nblkcols_tot
    1258             :                !   IF (first_col(col).ne.-1) THEN
    1259       15151 :                submatrix(idomain)%dbcsr_col(smcol) = col
    1260       15151 :                submatrix(idomain)%size_bcol(smcol) = col_blk_size(col)
    1261       24838 :                smcol = smcol + 1
    1262             :                !   ENDIF
    1263             :             END DO
    1264             : 
    1265           0 :             ALLOCATE (submatrix(idomain)%mdata( &
    1266             :                       submatrix(idomain)%nrows, &
    1267       38748 :                       submatrix(idomain)%ncols))
    1268     6331606 :             submatrix(idomain)%mdata(:, :) = 0.0_dp
    1269       29061 :             DO iNode = 1, nNodes
    1270       19374 :                block_offset = 0
    1271      238010 :                DO iBlock = 1, recv_size2_cpu(iNode)/2
    1272             :                   ! read the (row,col) of the block
    1273      208949 :                   row = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 1)
    1274      208949 :                   col = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 2)
    1275             :                   ! check if this block should be in the submatrix of this domain
    1276      208949 :                   IF ((first_col(col) .NE. -1) .AND. (first_row(row) .NE. -1)) THEN
    1277             :                      ! copy data from the received array into submatrix
    1278       73934 :                      start_data = recv_offset_cpu(iNode) + block_offset + 1
    1279      580303 :                      DO icol = 0, col_blk_size(col) - 1
    1280             :                         submatrix(idomain)%mdata(first_row(row): &
    1281             :                                                  first_row(row) + row_blk_size(row) - 1, &
    1282             :                                                  first_col(col) + icol) = &
    1283     8468182 :                            recv_data(start_data:start_data + row_blk_size(row) - 1)
    1284      580303 :                         start_data = start_data + row_blk_size(row)
    1285             :                      END DO
    1286       73934 :                      IF (job_type == select_row_col) THEN
    1287       51225 :                         IF (matrix_type == dbcsr_type_symmetric .OR. &
    1288             :                             matrix_type == dbcsr_type_antisymmetric) THEN
    1289             :                            ! copy data into the transposed block as well
    1290       10229 :                            antifactor = 1.0_dp
    1291       10229 :                            IF (matrix_type == dbcsr_type_antisymmetric) THEN
    1292           0 :                               antifactor = -1.0_dp
    1293             :                            END IF
    1294       10229 :                            start_data = recv_offset_cpu(iNode) + block_offset + 1
    1295      104710 :                            DO icol = 0, col_blk_size(col) - 1
    1296             :                               submatrix(idomain)%mdata(first_row(col) + icol, &
    1297             :                                                        first_col(row): &
    1298             :                                                        first_col(row) + row_blk_size(row) - 1) = &
    1299             :                                  antifactor*recv_data(start_data: &
    1300     1981842 :                                                       start_data + row_blk_size(row) - 1)
    1301      104710 :                               start_data = start_data + row_blk_size(row)
    1302             :                            END DO
    1303       40996 :                         ELSE IF (matrix_type == dbcsr_type_no_symmetry) THEN
    1304             :                         ELSE
    1305           0 :                            CPABORT("matrix type is NYI")
    1306             :                         END IF
    1307             :                      END IF
    1308             :                   END IF
    1309      228323 :                   block_offset = block_offset + col_blk_size(col)*row_blk_size(row)
    1310             :                END DO
    1311             :             END DO
    1312             :          END IF ! mynode.eq.dest_node
    1313             :       END DO ! loop over domains
    1314             : 
    1315        3080 :       DEALLOCATE (recv_size_cpu, recv_offset_cpu)
    1316        3080 :       DEALLOCATE (recv_size2_cpu, recv_offset2_cpu)
    1317        3080 :       DEALLOCATE (recv_data)
    1318        3080 :       DEALLOCATE (recv_data2)
    1319             : !    DEALLOCATE(subm_row_size,subm_col_size)
    1320        3080 :       DEALLOCATE (first_row, first_col)
    1321             : 
    1322        3080 :       CALL timestop(handle)
    1323             : 
    1324        6160 :    END SUBROUTINE construct_submatrices
    1325             : 
    1326             : ! **************************************************************************************************
    1327             : !> \brief Constructs a DBCSR matrix from submatrices
    1328             : !> \param matrix ...
    1329             : !> \param submatrix ...
    1330             : !> \param distr_pattern ...
    1331             : !> \par History
    1332             : !>       2013.01 created [Rustam Z. Khaliullin]
    1333             : !> \author Rustam Z. Khaliullin
    1334             : ! **************************************************************************************************
    1335        2396 :    SUBROUTINE construct_dbcsr_from_submatrices(matrix, submatrix, distr_pattern)
    1336             : 
    1337             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
    1338             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    1339             :          INTENT(IN)                                      :: submatrix
    1340             :       TYPE(dbcsr_type), INTENT(IN)                       :: distr_pattern
    1341             : 
    1342             :       CHARACTER(len=*), PARAMETER :: routineN = 'construct_dbcsr_from_submatrices'
    1343             : 
    1344             :       CHARACTER                                          :: matrix_type
    1345             :       INTEGER :: block_offset, col, col_offset, colsize, dest_node, GroupID, handle, iBlock, icol, &
    1346             :          idomain, iNode, irow_subm, ldesc, myNode, nblkcols_tot, nblkrows_tot, ndomains, &
    1347             :          ndomains2, nNodes, recv_size2_total, recv_size_total, row, rowsize, send_size2_total, &
    1348             :          send_size_total, smroff, start_data, unit_nr
    1349        2396 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: offset2_block, offset_block, recv_data2, &
    1350        2396 :          recv_offset2_cpu, recv_offset_cpu, recv_size2_cpu, recv_size_cpu, send_data2, &
    1351        2396 :          send_offset2_cpu, send_offset_cpu, send_size2_cpu, send_size_cpu
    1352        2396 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: recv_descriptor, send_descriptor
    1353        2396 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, row_blk_size
    1354             :       LOGICAL                                            :: transp
    1355        2396 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_data, send_data
    1356        2396 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
    1357             :       TYPE(cp_logger_type), POINTER                      :: logger
    1358             :       TYPE(dbcsr_distribution_type)                      :: pattern_dist
    1359             :       TYPE(dbcsr_iterator_type)                          :: iter
    1360             :       TYPE(mp_comm_type)                                 :: group
    1361             : 
    1362        2396 :       CALL timeset(routineN, handle)
    1363             : 
    1364             :       ! get a useful output_unit
    1365        2396 :       logger => cp_get_default_logger()
    1366        2396 :       IF (logger%para_env%is_source()) THEN
    1367        1198 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    1368             :       ELSE
    1369             :          unit_nr = -1
    1370             :       END IF
    1371             : 
    1372             :       nblkrows_tot = dbcsr_nblkrows_total(matrix)
    1373        2396 :       nblkcols_tot = dbcsr_nblkcols_total(matrix)
    1374             : 
    1375        2396 :       ndomains = nblkcols_tot ! RZK-warning not true for atomic distributions
    1376        2396 :       ndomains2 = SIZE(submatrix)
    1377             : 
    1378        2396 :       IF (ndomains .NE. ndomains2) THEN
    1379           0 :          CPABORT("domain mismatch")
    1380             :       END IF
    1381             : 
    1382        2396 :       CALL dbcsr_get_info(distr_pattern, distribution=pattern_dist)
    1383        2396 :       CALL dbcsr_distribution_get(pattern_dist, numnodes=nNodes, group=GroupID, mynode=myNode)
    1384             : 
    1385        2396 :       CALL group%set_handle(GroupID)
    1386             : 
    1387        2396 :       matrix_type = dbcsr_get_matrix_type(matrix)
    1388        2396 :       IF (matrix_type .NE. dbcsr_type_no_symmetry) THEN
    1389           0 :          CPABORT("only non-symmetric matrices so far")
    1390             :       END IF
    1391             : 
    1392             :       ! remove all blocks from the dbcsr matrix
    1393        2396 :       CALL dbcsr_iterator_start(iter, matrix)
    1394       21295 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    1395       18899 :          CALL dbcsr_iterator_next_block(iter, row, col, block_p)
    1396     1211147 :          block_p(:, :) = 0.0_dp
    1397             :       END DO
    1398        2396 :       CALL dbcsr_iterator_stop(iter)
    1399        2396 :       CALL dbcsr_filter(matrix, 0.1_dp)
    1400             : 
    1401        2396 :       CALL dbcsr_work_create(matrix, work_mutable=.TRUE.)
    1402             : 
    1403        2396 :       ldesc = 2
    1404        7188 :       ALLOCATE (send_descriptor(ldesc, nNodes))
    1405        7188 :       ALLOCATE (recv_descriptor(ldesc, nNodes))
    1406       16772 :       send_descriptor(:, :) = 0
    1407             : 
    1408             :       ! loop over domains - find how much data to send
    1409       18056 :       DO idomain = 1, ndomains
    1410             : 
    1411       18056 :          IF (submatrix(idomain)%domain .GT. 0) THEN
    1412             : 
    1413       41966 :             DO irow_subm = 1, submatrix(idomain)%nbrows
    1414             : 
    1415       34136 :                IF (submatrix(idomain)%nbcols .NE. 1) THEN
    1416           0 :                   CPABORT("corrupt submatrix structure")
    1417             :                END IF
    1418             : 
    1419       34136 :                row = submatrix(idomain)%dbcsr_row(irow_subm)
    1420       34136 :                col = submatrix(idomain)%dbcsr_col(1)
    1421             : 
    1422       34136 :                IF (col .NE. idomain) THEN
    1423           0 :                   CPABORT("corrupt submatrix structure")
    1424             :                END IF
    1425             : 
    1426       34136 :                transp = .FALSE.
    1427             :                CALL dbcsr_get_stored_coordinates(distr_pattern, &
    1428       34136 :                                                  row, idomain, dest_node)
    1429             : 
    1430       34136 :                send_descriptor(1, dest_node + 1) = send_descriptor(1, dest_node + 1) + 1
    1431             :                send_descriptor(2, dest_node + 1) = send_descriptor(2, dest_node + 1) + &
    1432             :                                                    submatrix(idomain)%size_brow(irow_subm)* &
    1433       41966 :                                                    submatrix(idomain)%size_bcol(1)
    1434             : 
    1435             :             END DO ! loop over submatrix blocks
    1436             : 
    1437             :          END IF
    1438             : 
    1439             :       END DO ! loop over domains
    1440             : 
    1441             :       ! communicate number of blocks and their sizes to the other nodes
    1442        2396 :       CALL group%alltoall(send_descriptor, recv_descriptor, ldesc)
    1443             : 
    1444       11980 :       ALLOCATE (send_size_cpu(nNodes), send_offset_cpu(nNodes))
    1445        2396 :       send_offset_cpu(1) = 0
    1446        2396 :       send_size_cpu(1) = send_descriptor(2, 1)
    1447        4792 :       DO iNode = 2, nNodes
    1448        2396 :          send_size_cpu(iNode) = send_descriptor(2, iNode)
    1449             :          send_offset_cpu(iNode) = send_offset_cpu(iNode - 1) + &
    1450        4792 :                                   send_size_cpu(iNode - 1)
    1451             :       END DO
    1452        2396 :       send_size_total = send_offset_cpu(nNodes) + send_size_cpu(nNodes)
    1453             : 
    1454       11980 :       ALLOCATE (recv_size_cpu(nNodes), recv_offset_cpu(nNodes))
    1455        2396 :       recv_offset_cpu(1) = 0
    1456        2396 :       recv_size_cpu(1) = recv_descriptor(2, 1)
    1457        4792 :       DO iNode = 2, nNodes
    1458        2396 :          recv_size_cpu(iNode) = recv_descriptor(2, iNode)
    1459             :          recv_offset_cpu(iNode) = recv_offset_cpu(iNode - 1) + &
    1460        4792 :                                   recv_size_cpu(iNode - 1)
    1461             :       END DO
    1462        2396 :       recv_size_total = recv_offset_cpu(nNodes) + recv_size_cpu(nNodes)
    1463             : 
    1464       11980 :       ALLOCATE (send_size2_cpu(nNodes), send_offset2_cpu(nNodes))
    1465        2396 :       send_offset2_cpu(1) = 0
    1466        2396 :       send_size2_cpu(1) = 2*send_descriptor(1, 1)
    1467        4792 :       DO iNode = 2, nNodes
    1468        2396 :          send_size2_cpu(iNode) = 2*send_descriptor(1, iNode)
    1469             :          send_offset2_cpu(iNode) = send_offset2_cpu(iNode - 1) + &
    1470        4792 :                                    send_size2_cpu(iNode - 1)
    1471             :       END DO
    1472        2396 :       send_size2_total = send_offset2_cpu(nNodes) + send_size2_cpu(nNodes)
    1473             : 
    1474       11980 :       ALLOCATE (recv_size2_cpu(nNodes), recv_offset2_cpu(nNodes))
    1475        2396 :       recv_offset2_cpu(1) = 0
    1476        2396 :       recv_size2_cpu(1) = 2*recv_descriptor(1, 1)
    1477        4792 :       DO iNode = 2, nNodes
    1478        2396 :          recv_size2_cpu(iNode) = 2*recv_descriptor(1, iNode)
    1479             :          recv_offset2_cpu(iNode) = recv_offset2_cpu(iNode - 1) + &
    1480        4792 :                                    recv_size2_cpu(iNode - 1)
    1481             :       END DO
    1482        2396 :       recv_size2_total = recv_offset2_cpu(nNodes) + recv_size2_cpu(nNodes)
    1483             : 
    1484        2396 :       DEALLOCATE (send_descriptor)
    1485        2396 :       DEALLOCATE (recv_descriptor)
    1486             : 
    1487             :       ! collect data from the matrix into send_data
    1488        7188 :       ALLOCATE (send_data(send_size_total))
    1489        7153 :       ALLOCATE (recv_data(recv_size_total))
    1490        7188 :       ALLOCATE (send_data2(send_size2_total))
    1491        7153 :       ALLOCATE (recv_data2(recv_size2_total))
    1492        7188 :       ALLOCATE (offset_block(nNodes))
    1493        7188 :       ALLOCATE (offset2_block(nNodes))
    1494        7188 :       offset_block(:) = 0
    1495        7188 :       offset2_block(:) = 0
    1496             :       ! loop over domains - collect data to send
    1497       18056 :       DO idomain = 1, ndomains
    1498             : 
    1499       18056 :          IF (submatrix(idomain)%domain .GT. 0) THEN
    1500             : 
    1501        7830 :             smroff = 0
    1502       41966 :             DO irow_subm = 1, submatrix(idomain)%nbrows
    1503             : 
    1504       34136 :                row = submatrix(idomain)%dbcsr_row(irow_subm)
    1505       34136 :                col = submatrix(idomain)%dbcsr_col(1)
    1506             : 
    1507       34136 :                rowsize = submatrix(idomain)%size_brow(irow_subm)
    1508       34136 :                colsize = submatrix(idomain)%size_bcol(1)
    1509             : 
    1510       34136 :                transp = .FALSE.
    1511             :                CALL dbcsr_get_stored_coordinates(distr_pattern, &
    1512       34136 :                                                  row, idomain, dest_node)
    1513             : 
    1514             :                ! place the data appropriately
    1515       34136 :                col_offset = 0
    1516      137454 :                DO icol = 1, colsize
    1517             :                   start_data = send_offset_cpu(dest_node + 1) + &
    1518             :                                offset_block(dest_node + 1) + &
    1519      103318 :                                col_offset
    1520             :                   send_data(start_data + 1:start_data + rowsize) = &
    1521     1452416 :                      submatrix(idomain)%mdata(smroff + 1:smroff + rowsize, icol)
    1522      137454 :                   col_offset = col_offset + rowsize
    1523             :                END DO
    1524             :                offset_block(dest_node + 1) = offset_block(dest_node + 1) + &
    1525       34136 :                                              colsize*rowsize
    1526             :                ! fill out row,col information
    1527             :                send_data2(send_offset2_cpu(dest_node + 1) + &
    1528       34136 :                           offset2_block(dest_node + 1) + 1) = row
    1529             :                send_data2(send_offset2_cpu(dest_node + 1) + &
    1530       34136 :                           offset2_block(dest_node + 1) + 2) = col
    1531       34136 :                offset2_block(dest_node + 1) = offset2_block(dest_node + 1) + 2
    1532             : 
    1533       76102 :                smroff = smroff + rowsize
    1534             : 
    1535             :             END DO
    1536             : 
    1537             :          END IF
    1538             : 
    1539             :       END DO ! loop over domains
    1540             : 
    1541             :       ! send-receive all blocks
    1542             :       CALL group%alltoall(send_data, send_size_cpu, send_offset_cpu, &
    1543        2396 :                           recv_data, recv_size_cpu, recv_offset_cpu)
    1544             :       ! send-receive rows and cols of the blocks
    1545             :       CALL group%alltoall(send_data2, send_size2_cpu, send_offset2_cpu, &
    1546        2396 :                           recv_data2, recv_size2_cpu, recv_offset2_cpu)
    1547             : 
    1548        2396 :       DEALLOCATE (send_size_cpu, send_offset_cpu)
    1549        2396 :       DEALLOCATE (send_size2_cpu, send_offset2_cpu)
    1550        2396 :       DEALLOCATE (send_data)
    1551        2396 :       DEALLOCATE (send_data2)
    1552        2396 :       DEALLOCATE (offset_block)
    1553        2396 :       DEALLOCATE (offset2_block)
    1554             : 
    1555             :       ! copy received data into dbcsr matrix
    1556        2396 :       CALL dbcsr_get_info(matrix, col_blk_size=col_blk_size, row_blk_size=row_blk_size)
    1557        7188 :       DO iNode = 1, nNodes
    1558        4792 :          block_offset = 0
    1559       41324 :          DO iBlock = 1, recv_size2_cpu(iNode)/2
    1560             :             ! read the (row,col) of the block
    1561       34136 :             row = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 1)
    1562       34136 :             col = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 2)
    1563             :             !CALL dbcsr_get_block_p(matrix,row,col,block_p,found)
    1564             :             !IF (.NOT.found) THEN
    1565       34136 :             NULLIFY (block_p)
    1566       34136 :             CALL dbcsr_reserve_block2d(matrix, row, col, block_p)
    1567       34136 :             CPASSERT(ASSOCIATED(block_p))
    1568             :             !ENDIF
    1569             :             ! copy data from the received array into the matrix block
    1570       34136 :             start_data = recv_offset_cpu(iNode) + block_offset + 1
    1571      137454 :             DO icol = 1, col_blk_size(col)
    1572             :                block_p(:, icol) = &
    1573     1452416 :                   recv_data(start_data:start_data + row_blk_size(row) - 1)
    1574      137454 :                start_data = start_data + row_blk_size(row)
    1575             :             END DO
    1576       38928 :             block_offset = block_offset + col_blk_size(col)*row_blk_size(row)
    1577             :          END DO
    1578             :       END DO
    1579             : 
    1580        2396 :       DEALLOCATE (recv_size_cpu, recv_offset_cpu)
    1581        2396 :       DEALLOCATE (recv_size2_cpu, recv_offset2_cpu)
    1582        2396 :       DEALLOCATE (recv_data)
    1583        2396 :       DEALLOCATE (recv_data2)
    1584             : 
    1585        2396 :       CALL dbcsr_finalize(matrix)
    1586             : 
    1587        2396 :       CALL timestop(handle)
    1588             : 
    1589        4792 :    END SUBROUTINE construct_dbcsr_from_submatrices
    1590             : 
    1591             : ! **************************************************************************************************
    1592             : !> \brief ...
    1593             : !> \param submatrices ...
    1594             : !> \param mpgroup ...
    1595             : ! **************************************************************************************************
    1596           0 :    SUBROUTINE print_submatrices(submatrices, mpgroup)
    1597             : 
    1598             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    1599             :          INTENT(IN)                                      :: submatrices
    1600             :       TYPE(mp_comm_type), INTENT(IN)                     :: mpgroup
    1601             : 
    1602             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'print_submatrices'
    1603             : 
    1604             :       CHARACTER(len=30)                                  :: colstr, formatstr
    1605             :       INTEGER                                            :: handle, i, irow, n, ncols, nrows
    1606             : 
    1607           0 :       CALL timeset(routineN, handle)
    1608             : 
    1609           0 :       n = SIZE(submatrices)
    1610             : 
    1611           0 :       DO i = 1, n
    1612           0 :          nrows = SIZE(submatrices(i)%mdata, 1)
    1613           0 :          ncols = SIZE(submatrices(i)%mdata, 2)
    1614           0 :          WRITE (colstr, *) ncols
    1615           0 :          formatstr = '('//TRIM(ADJUSTL(colstr))//'F16.9)'
    1616           0 :          IF (submatrices(i)%domain .GT. 0) THEN
    1617           0 :             WRITE (*, *) "SUBMATRIX: ", i, nrows, 'x', ncols
    1618           0 :             nrows = SIZE(submatrices(i)%mdata, 1)
    1619           0 :             DO irow = 1, nrows
    1620           0 :                WRITE (*, formatstr) submatrices(i)%mdata(irow, :)
    1621             :             END DO
    1622             :          END IF
    1623           0 :          CALL mpgroup%sync()
    1624             :       END DO
    1625             : 
    1626           0 :       CALL timestop(handle)
    1627             : 
    1628           0 :    END SUBROUTINE print_submatrices
    1629             : 
    1630             : ! **************************************************************************************************
    1631             : !> \brief Reports whether the DBCSR block (row,col) exists in the quencher
    1632             : !> \param map ...
    1633             : !> \param row ...
    1634             : !> \param col ...
    1635             : !> \return ...
    1636             : !> \par History
    1637             : !>       2013.01 created [Rustam Z. Khaliullin]
    1638             : !> \author Rustam Z. Khaliullin
    1639             : ! **************************************************************************************************
    1640           0 :    FUNCTION qblk_exists(map, row, col)
    1641             : 
    1642             :       TYPE(domain_map_type), INTENT(IN)                  :: map
    1643             :       INTEGER, INTENT(IN)                                :: row, col
    1644             :       LOGICAL                                            :: qblk_exists
    1645             : 
    1646             :       INTEGER                                            :: first, last, mid, ndomains
    1647             : 
    1648             : !CALL timeset(routineN,handle)
    1649             : 
    1650           0 :       ndomains = SIZE(map%index1)
    1651             : 
    1652           0 :       qblk_exists = .FALSE.
    1653           0 :       IF (col .LT. 1 .OR. col .GT. ndomains) RETURN
    1654           0 :       first = 1
    1655           0 :       IF (col .GT. 1) first = map%index1(col - 1)
    1656           0 :       last = map%index1(col) - 1
    1657             : 
    1658             :       ! perform binary search within first-last
    1659           0 :       DO WHILE (last .GE. first)
    1660           0 :          mid = first + (last - first)/2
    1661           0 :          IF (map%pairs(mid, 1) .GT. row) THEN
    1662           0 :             last = mid - 1
    1663           0 :          ELSE IF (map%pairs(mid, 1) .LT. row) THEN
    1664           0 :             first = mid + 1
    1665             :          ELSE
    1666             :             qblk_exists = .TRUE. ! SUCCESS!!
    1667             :             EXIT
    1668             :          END IF
    1669             :       END DO
    1670             : 
    1671             :       !CALL timestop(handle)
    1672             : 
    1673             :       RETURN
    1674             : 
    1675             :    END FUNCTION qblk_exists
    1676             : 
    1677             : END MODULE domain_submatrix_methods
    1678             : 

Generated by: LCOV version 1.15