LCOV - code coverage report
Current view: top level - src - domain_submatrix_methods.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:3130539) Lines: 564 675 83.6 %
Date: 2025-05-14 06:57:18 Functions: 14 20 70.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Subroutines to handle submatrices
      10             : !> \par History
      11             : !>       2013.01 created [Rustam Z Khaliullin]
      12             : !> \author Rustam Z Khaliullin
      13             : ! **************************************************************************************************
      14             : MODULE domain_submatrix_methods
      15             : 
      16             :    USE cp_dbcsr_api,                    ONLY: &
      17             :         dbcsr_distribution_get, dbcsr_distribution_type, dbcsr_filter, dbcsr_finalize, &
      18             :         dbcsr_get_block_p, dbcsr_get_info, dbcsr_get_matrix_type, dbcsr_get_stored_coordinates, &
      19             :         dbcsr_iterator_blocks_left, dbcsr_iterator_next_block, dbcsr_iterator_start, &
      20             :         dbcsr_iterator_stop, dbcsr_iterator_type, dbcsr_put_block, dbcsr_type, &
      21             :         dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, dbcsr_type_symmetric, dbcsr_work_create
      22             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      23             :                                               cp_logger_get_default_unit_nr,&
      24             :                                               cp_logger_type
      25             :    USE domain_submatrix_types,          ONLY: domain_map_type,&
      26             :                                               domain_submatrix_type,&
      27             :                                               select_row_col
      28             :    USE kinds,                           ONLY: dp
      29             :    USE message_passing,                 ONLY: mp_comm_null,&
      30             :                                               mp_comm_type
      31             : #include "./base/base_uses.f90"
      32             : 
      33             :    IMPLICIT NONE
      34             : 
      35             :    PRIVATE
      36             : 
      37             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'domain_submatrix_methods'
      38             : 
      39             :    PUBLIC :: copy_submatrices, copy_submatrix_data, &
      40             :              release_submatrices, multiply_submatrices, add_submatrices, &
      41             :              construct_submatrices, init_submatrices, &
      42             :              construct_dbcsr_from_submatrices, &
      43             :              set_submatrices, &
      44             :              print_submatrices, maxnorm_submatrices
      45             : 
      46             :    INTERFACE init_submatrices
      47             :       MODULE PROCEDURE init_submatrices_0d
      48             :       MODULE PROCEDURE init_submatrices_1d
      49             :       MODULE PROCEDURE init_submatrices_2d
      50             :    END INTERFACE
      51             : 
      52             :    INTERFACE set_submatrices
      53             :       MODULE PROCEDURE set_submatrix_array
      54             :       MODULE PROCEDURE set_submatrix
      55             :    END INTERFACE
      56             : 
      57             :    INTERFACE copy_submatrices
      58             :       MODULE PROCEDURE copy_submatrix_array
      59             :       MODULE PROCEDURE copy_submatrix
      60             :    END INTERFACE
      61             : 
      62             :    INTERFACE release_submatrices
      63             :       MODULE PROCEDURE release_submatrix_array
      64             :       MODULE PROCEDURE release_submatrix
      65             :    END INTERFACE
      66             : 
      67             :    INTERFACE multiply_submatrices
      68             :       MODULE PROCEDURE multiply_submatrices_once
      69             :       MODULE PROCEDURE multiply_submatrices_array
      70             :    END INTERFACE
      71             : 
      72             :    INTERFACE add_submatrices
      73             :       MODULE PROCEDURE add_submatrices_once
      74             :       MODULE PROCEDURE add_submatrices_array
      75             :    END INTERFACE
      76             : 
      77             : CONTAINS
      78             : 
      79             : ! **************************************************************************************************
      80             : !> \brief ...
      81             : !> \param subm ...
      82             : ! **************************************************************************************************
      83           0 :    SUBROUTINE init_submatrices_0d(subm)
      84             : 
      85             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: subm
      86             : 
      87           0 :       subm%domain = -1
      88           0 :       subm%nbrows = -1
      89           0 :       subm%nbcols = -1
      90           0 :       subm%nrows = -1
      91           0 :       subm%ncols = -1
      92           0 :       subm%nnodes = -1
      93           0 :       subm%group = mp_comm_null
      94             : 
      95           0 :    END SUBROUTINE init_submatrices_0d
      96             : 
      97             : ! **************************************************************************************************
      98             : !> \brief ...
      99             : !> \param subm ...
     100             : ! **************************************************************************************************
     101        7968 :    SUBROUTINE init_submatrices_1d(subm)
     102             : 
     103             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     104             :          INTENT(INOUT)                                   :: subm
     105             : 
     106       58884 :       subm(:)%domain = -1
     107       58884 :       subm(:)%nbrows = -1
     108       58884 :       subm(:)%nbcols = -1
     109       58884 :       subm(:)%nrows = -1
     110       58884 :       subm(:)%ncols = -1
     111       58884 :       subm(:)%nnodes = -1
     112       58884 :       subm(:)%group = mp_comm_null
     113             : 
     114        7968 :    END SUBROUTINE init_submatrices_1d
     115             : 
     116             : ! **************************************************************************************************
     117             : !> \brief ...
     118             : !> \param subm ...
     119             : ! **************************************************************************************************
     120        1142 :    SUBROUTINE init_submatrices_2d(subm)
     121             : 
     122             :       TYPE(domain_submatrix_type), DIMENSION(:, :), &
     123             :          INTENT(INOUT)                                   :: subm
     124             : 
     125       10282 :       subm(:, :)%domain = -1
     126       10282 :       subm(:, :)%nbrows = -1
     127       10282 :       subm(:, :)%nbcols = -1
     128       10282 :       subm(:, :)%nrows = -1
     129       10282 :       subm(:, :)%ncols = -1
     130       10282 :       subm(:, :)%nnodes = -1
     131       10282 :       subm(:, :)%group = mp_comm_null
     132             : 
     133        1142 :    END SUBROUTINE init_submatrices_2d
     134             : 
     135             : ! **************************************************************************************************
     136             : !> \brief ...
     137             : !> \param original ...
     138             : !> \param copy ...
     139             : !> \param copy_data ...
     140             : ! **************************************************************************************************
     141        1164 :    SUBROUTINE copy_submatrix_array(original, copy, copy_data)
     142             : 
     143             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     144             :          INTENT(IN)                                      :: original
     145             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     146             :          INTENT(INOUT)                                   :: copy
     147             :       LOGICAL, INTENT(IN)                                :: copy_data
     148             : 
     149             :       CHARACTER(len=*), PARAMETER :: routineN = 'copy_submatrix_array'
     150             : 
     151             :       INTEGER                                            :: handle, idomain, ndomains, ndomainsB
     152             : 
     153        1164 :       CALL timeset(routineN, handle)
     154             : 
     155        1164 :       ndomains = SIZE(original)
     156        1164 :       ndomainsB = SIZE(copy)
     157        1164 :       CPASSERT(ndomains .EQ. ndomainsB)
     158        8246 :       copy(:)%nnodes = original(:)%nnodes
     159        8246 :       copy(:)%group = original(:)%group
     160        8246 :       DO idomain = 1, ndomains
     161        8246 :          IF (original(idomain)%domain .GT. 0) THEN
     162        3541 :             CALL copy_submatrix(original(idomain), copy(idomain), copy_data)
     163             :          END IF
     164             :       END DO ! loop over domains
     165             : 
     166        1164 :       CALL timestop(handle)
     167             : 
     168        1164 :    END SUBROUTINE copy_submatrix_array
     169             : 
     170             : ! **************************************************************************************************
     171             : !> \brief ...
     172             : !> \param original ...
     173             : !> \param copy ...
     174             : !> \param copy_data ...
     175             : ! **************************************************************************************************
     176        4566 :    SUBROUTINE copy_submatrix(original, copy, copy_data)
     177             : 
     178             :       TYPE(domain_submatrix_type), INTENT(IN)            :: original
     179             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: copy
     180             :       LOGICAL, INTENT(IN)                                :: copy_data
     181             : 
     182             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'copy_submatrix'
     183             : 
     184             :       INTEGER                                            :: handle, icol, irow
     185             : 
     186        4566 :       CALL timeset(routineN, handle)
     187             : 
     188        4566 :       copy%domain = original%domain
     189        4566 :       copy%nnodes = original%nnodes
     190        4566 :       copy%group = original%group
     191             : 
     192        4566 :       IF (original%domain .GT. 0) THEN
     193             : 
     194        4566 :          copy%nbrows = original%nbrows
     195        4566 :          copy%nbcols = original%nbcols
     196        4566 :          copy%nrows = original%nrows
     197        4566 :          copy%ncols = original%ncols
     198             : 
     199        4566 :          IF (.NOT. ALLOCATED(copy%dbcsr_row)) THEN
     200        9906 :             ALLOCATE (copy%dbcsr_row(original%nbrows))
     201             :          ELSE
     202        1264 :             IF (SIZE(copy%dbcsr_row) .NE. SIZE(original%dbcsr_row)) THEN
     203           0 :                DEALLOCATE (copy%dbcsr_row)
     204           0 :                ALLOCATE (copy%dbcsr_row(original%nbrows))
     205             :             END IF
     206             :          END IF
     207        4566 :          IF (.NOT. ALLOCATED(copy%dbcsr_col)) THEN
     208        9906 :             ALLOCATE (copy%dbcsr_col(original%nbcols))
     209             :          ELSE
     210        1264 :             IF (SIZE(copy%dbcsr_col) .NE. SIZE(original%dbcsr_col)) THEN
     211           0 :                DEALLOCATE (copy%dbcsr_col)
     212           0 :                ALLOCATE (copy%dbcsr_col(original%nbcols))
     213             :             END IF
     214             :          END IF
     215        4566 :          IF (.NOT. ALLOCATED(copy%size_brow)) THEN
     216        9906 :             ALLOCATE (copy%size_brow(original%nbrows))
     217             :          ELSE
     218        1264 :             IF (SIZE(copy%size_brow) .NE. SIZE(original%size_brow)) THEN
     219           0 :                DEALLOCATE (copy%size_brow)
     220           0 :                ALLOCATE (copy%size_brow(original%nbrows))
     221             :             END IF
     222             :          END IF
     223        4566 :          IF (.NOT. ALLOCATED(copy%size_bcol)) THEN
     224        9906 :             ALLOCATE (copy%size_bcol(original%nbcols))
     225             :          ELSE
     226        1264 :             IF (SIZE(copy%size_bcol) .NE. SIZE(original%size_bcol)) THEN
     227           0 :                DEALLOCATE (copy%size_bcol)
     228           0 :                ALLOCATE (copy%size_bcol(original%nbcols))
     229             :             END IF
     230             :          END IF
     231             : 
     232       24024 :          DO irow = 1, original%nbrows
     233       19458 :             copy%dbcsr_row(irow) = original%dbcsr_row(irow)
     234       24024 :             copy%size_brow(irow) = original%size_brow(irow)
     235             :          END DO
     236             : 
     237       15012 :          DO icol = 1, original%nbcols
     238       10446 :             copy%dbcsr_col(icol) = original%dbcsr_col(icol)
     239       15012 :             copy%size_bcol(icol) = original%size_bcol(icol)
     240             :          END DO
     241             : 
     242        4566 :          IF (copy_data) THEN
     243        3541 :             CALL copy_submatrix_data(original%mdata, copy)
     244             :          END IF
     245             : 
     246             :       END IF ! do not copy empty submatrix
     247             : 
     248        4566 :       CALL timestop(handle)
     249             : 
     250        4566 :    END SUBROUTINE copy_submatrix
     251             : 
     252             : ! **************************************************************************************************
     253             : !> \brief ...
     254             : !> \param array ...
     255             : !> \param copy ...
     256             : ! **************************************************************************************************
     257        4566 :    SUBROUTINE copy_submatrix_data(array, copy)
     258             : 
     259             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: array
     260             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: copy
     261             : 
     262             :       CHARACTER(len=*), PARAMETER :: routineN = 'copy_submatrix_data'
     263             : 
     264             :       INTEGER                                            :: ds1, ds2, handle, ms1, ms2
     265             : 
     266        4566 :       CALL timeset(routineN, handle)
     267             : 
     268        4566 :       CPASSERT(copy%domain .GT. 0)
     269             : 
     270        4566 :       ds1 = SIZE(array, 1)
     271        4566 :       ds2 = SIZE(array, 2)
     272             : 
     273        4566 :       IF (.NOT. ALLOCATED(copy%mdata)) THEN
     274       13208 :          ALLOCATE (copy%mdata(ds1, ds2))
     275             :       ELSE
     276        1264 :          ms1 = SIZE(copy%mdata, 1)
     277        1264 :          ms2 = SIZE(copy%mdata, 2)
     278        1264 :          IF ((ds1 .NE. ms1) .OR. (ds2 .NE. ms2)) THEN
     279           0 :             DEALLOCATE (copy%mdata)
     280           0 :             ALLOCATE (copy%mdata(ds1, ds2))
     281             :          END IF
     282             :       END IF
     283             : 
     284     5710612 :       copy%mdata(:, :) = array(:, :)
     285             : 
     286        4566 :       CALL timestop(handle)
     287             : 
     288        4566 :    END SUBROUTINE copy_submatrix_data
     289             : 
     290             : ! **************************************************************************************************
     291             : !> \brief ...
     292             : !> \param submatrices ...
     293             : !> \param scalar ...
     294             : ! **************************************************************************************************
     295           0 :    SUBROUTINE set_submatrix_array(submatrices, scalar)
     296             : 
     297             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     298             :          INTENT(INOUT)                                   :: submatrices
     299             :       REAL(KIND=dp), INTENT(IN)                          :: scalar
     300             : 
     301             :       CHARACTER(len=*), PARAMETER :: routineN = 'set_submatrix_array'
     302             : 
     303             :       INTEGER                                            :: handle, idomain, ndomains
     304             : 
     305           0 :       CALL timeset(routineN, handle)
     306             : 
     307           0 :       ndomains = SIZE(submatrices)
     308           0 :       DO idomain = 1, ndomains
     309           0 :          IF (submatrices(idomain)%domain .GT. 0) THEN
     310           0 :             CALL set_submatrix(submatrices(idomain), scalar)
     311             :          END IF
     312             :       END DO ! loop over domains
     313             : 
     314           0 :       CALL timestop(handle)
     315             : 
     316           0 :    END SUBROUTINE set_submatrix_array
     317             : 
     318             : ! **************************************************************************************************
     319             : !> \brief ...
     320             : !> \param submatrix ...
     321             : !> \param scalar ...
     322             : ! **************************************************************************************************
     323           0 :    SUBROUTINE set_submatrix(submatrix, scalar)
     324             : 
     325             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: submatrix
     326             :       REAL(KIND=dp), INTENT(IN)                          :: scalar
     327             : 
     328             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'set_submatrix'
     329             : 
     330             :       INTEGER                                            :: ds1, ds2, handle, ms1, ms2
     331             : 
     332           0 :       CALL timeset(routineN, handle)
     333             : 
     334           0 :       CPASSERT(submatrix%domain .GT. 0)
     335           0 :       CPASSERT(submatrix%nrows .GT. 0)
     336           0 :       CPASSERT(submatrix%ncols .GT. 0)
     337             : 
     338           0 :       ds1 = submatrix%nrows
     339           0 :       ds2 = submatrix%ncols
     340             : 
     341           0 :       IF (.NOT. ALLOCATED(submatrix%mdata)) THEN
     342           0 :          ALLOCATE (submatrix%mdata(ds1, ds2))
     343             :       ELSE
     344           0 :          ms1 = SIZE(submatrix%mdata, 1)
     345           0 :          ms2 = SIZE(submatrix%mdata, 2)
     346           0 :          IF ((ds1 .NE. ms1) .OR. (ds2 .NE. ms2)) THEN
     347           0 :             DEALLOCATE (submatrix%mdata)
     348           0 :             ALLOCATE (submatrix%mdata(ds1, ds2))
     349             :          END IF
     350             :       END IF
     351             : 
     352           0 :       submatrix%mdata(:, :) = scalar
     353             : 
     354           0 :       CALL timestop(handle)
     355             : 
     356           0 :    END SUBROUTINE set_submatrix
     357             : 
     358             : ! **************************************************************************************************
     359             : !> \brief ...
     360             : !> \param subm ...
     361             : ! **************************************************************************************************
     362       12118 :    SUBROUTINE release_submatrix_array(subm)
     363             : 
     364             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     365             :          INTENT(INOUT)                                   :: subm
     366             : 
     367             :       CHARACTER(len=*), PARAMETER :: routineN = 'release_submatrix_array'
     368             : 
     369             :       INTEGER                                            :: handle, idomain, ndomains
     370             : 
     371       12118 :       CALL timeset(routineN, handle)
     372             : 
     373       12118 :       ndomains = SIZE(subm)
     374       90314 :       DO idomain = 1, ndomains
     375       90314 :          CALL release_submatrix(subm(idomain))
     376             :       END DO ! loop over domains
     377             : 
     378       12118 :       CALL timestop(handle)
     379             : 
     380       12118 :    END SUBROUTINE release_submatrix_array
     381             : 
     382             : ! **************************************************************************************************
     383             : !> \brief ...
     384             : !> \param subm ...
     385             : ! **************************************************************************************************
     386       78196 :    SUBROUTINE release_submatrix(subm)
     387             : 
     388             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: subm
     389             : 
     390             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'release_submatrix'
     391             : 
     392             :       INTEGER                                            :: handle
     393             : 
     394       78196 :       CALL timeset(routineN, handle)
     395             : 
     396       78196 :       subm%domain = -1
     397       78196 :       subm%nbrows = -1
     398       78196 :       subm%nbcols = -1
     399       78196 :       subm%nrows = -1
     400       78196 :       subm%ncols = -1
     401       78196 :       subm%nnodes = -1
     402       78196 :       subm%group = mp_comm_null
     403             : 
     404       78196 :       IF (ALLOCATED(subm%dbcsr_row)) THEN
     405       20999 :          DEALLOCATE (subm%dbcsr_row)
     406             :       END IF
     407       78196 :       IF (ALLOCATED(subm%dbcsr_col)) THEN
     408       20999 :          DEALLOCATE (subm%dbcsr_col)
     409             :       END IF
     410       78196 :       IF (ALLOCATED(subm%size_brow)) THEN
     411       20999 :          DEALLOCATE (subm%size_brow)
     412             :       END IF
     413       78196 :       IF (ALLOCATED(subm%size_bcol)) THEN
     414       20999 :          DEALLOCATE (subm%size_bcol)
     415             :       END IF
     416       78196 :       IF (ALLOCATED(subm%mdata)) THEN
     417       21004 :          DEALLOCATE (subm%mdata)
     418             :       END IF
     419             : 
     420       78196 :       CALL timestop(handle)
     421             : 
     422       78196 :    END SUBROUTINE release_submatrix
     423             : 
     424             :    ! more complex routine might be necessary if submatrices are distributed
     425             : ! **************************************************************************************************
     426             : !> \brief ...
     427             : !> \param transA ...
     428             : !> \param transB ...
     429             : !> \param alpha ...
     430             : !> \param A ...
     431             : !> \param B ...
     432             : !> \param beta ...
     433             : !> \param C ...
     434             : ! **************************************************************************************************
     435       10733 :    SUBROUTINE multiply_submatrices_once(transA, transB, alpha, A, B, beta, C)
     436             : 
     437             :       CHARACTER, INTENT(IN)                              :: transA, transB
     438             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     439             :       TYPE(domain_submatrix_type), INTENT(IN)            :: A, B
     440             :       REAL(KIND=dp), INTENT(IN)                          :: beta
     441             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: C
     442             : 
     443             :       CHARACTER(len=*), PARAMETER :: routineN = 'multiply_submatrices_once'
     444             : 
     445             :       INTEGER                                            :: cs1, cs2, handle, icol, irow, K, K1, &
     446             :                                                             LDA, LDB, LDC, M, Mblocks, N, Nblocks
     447             :       LOGICAL                                            :: NOTA, NOTB
     448             : 
     449       10733 :       CALL timeset(routineN, handle)
     450             : 
     451       10733 :       CPASSERT(A%domain .GT. 0)
     452       10733 :       CPASSERT(B%domain .GT. 0)
     453       10733 :       CPASSERT(C%domain .GT. 0)
     454             : 
     455       10733 :       LDA = SIZE(A%mdata, 1)
     456       10733 :       LDB = SIZE(B%mdata, 1)
     457             : 
     458       10733 :       NOTA = (transA .EQ. 'N') .OR. (transA .EQ. 'n')
     459       10733 :       NOTB = (transB .EQ. 'N') .OR. (transB .EQ. 'n')
     460             : 
     461       10733 :       IF (NOTA) THEN
     462       10733 :          M = A%nrows
     463       10733 :          K = A%ncols
     464       10733 :          Mblocks = A%nbrows
     465             :       ELSE
     466           0 :          M = A%ncols
     467           0 :          K = A%nrows
     468           0 :          Mblocks = A%nbcols
     469             :       END IF
     470             : 
     471       10733 :       IF (NOTB) THEN
     472       10638 :          K1 = B%nrows
     473       10638 :          N = B%ncols
     474       10638 :          Nblocks = B%nbcols
     475             :       ELSE
     476          95 :          K1 = B%ncols
     477          95 :          N = B%nrows
     478          95 :          Nblocks = B%nbrows
     479             :       END IF
     480             : 
     481             :       ! these checks are for debugging only
     482       10733 :       CPASSERT(K .EQ. K1)
     483             : 
     484             :       ! conform C matrix
     485       10733 :       C%nrows = M
     486       10733 :       C%ncols = N
     487       10733 :       C%nbrows = Mblocks
     488       10733 :       C%nbcols = Nblocks
     489       10733 :       IF (ALLOCATED(C%dbcsr_row)) THEN
     490        2723 :          DEALLOCATE (C%dbcsr_row)
     491             :       END IF
     492       32199 :       ALLOCATE (C%dbcsr_row(C%nbrows))
     493       10733 :       IF (ALLOCATED(C%dbcsr_col)) THEN
     494        2723 :          DEALLOCATE (C%dbcsr_col)
     495             :       END IF
     496       32199 :       ALLOCATE (C%dbcsr_col(C%nbcols))
     497       10733 :       IF (ALLOCATED(C%size_brow)) THEN
     498        2723 :          DEALLOCATE (C%size_brow)
     499             :       END IF
     500       32199 :       ALLOCATE (C%size_brow(C%nbrows))
     501       10733 :       IF (ALLOCATED(C%size_bcol)) THEN
     502        2723 :          DEALLOCATE (C%size_bcol)
     503             :       END IF
     504       32199 :       ALLOCATE (C%size_bcol(C%nbcols))
     505             : 
     506       57666 :       DO irow = 1, C%nbrows
     507       57666 :          IF (NOTA) THEN
     508       46933 :             C%dbcsr_row(irow) = A%dbcsr_row(irow)
     509       46933 :             C%size_brow(irow) = A%size_brow(irow)
     510             :          ELSE
     511           0 :             C%dbcsr_row(irow) = A%dbcsr_col(irow)
     512           0 :             C%size_brow(irow) = A%size_bcol(irow)
     513             :          END IF
     514             :       END DO
     515             : 
     516       22360 :       DO icol = 1, C%nbcols
     517       22360 :          IF (NOTB) THEN
     518       11234 :             C%dbcsr_col(icol) = B%dbcsr_col(icol)
     519       11234 :             C%size_bcol(icol) = B%size_bcol(icol)
     520             :          ELSE
     521         393 :             C%dbcsr_col(icol) = B%dbcsr_row(icol)
     522         393 :             C%size_bcol(icol) = B%size_brow(icol)
     523             :          END IF
     524             :       END DO
     525             : 
     526       10733 :       IF (.NOT. ALLOCATED(C%mdata)) THEN
     527             :          !!! cannot use non-zero beta if C is not allocated
     528        8010 :          CPASSERT(beta .EQ. 0.0_dp)
     529       32040 :          ALLOCATE (C%mdata(C%nrows, C%ncols))
     530             :       ELSE
     531        2723 :          cs1 = SIZE(C%mdata, 1)
     532        2723 :          cs2 = SIZE(C%mdata, 2)
     533        2723 :          IF ((C%nrows .NE. cs1) .OR. (C%ncols .NE. cs2)) THEN
     534             :             !!! cannot deallocate data if beta is non-zero
     535           0 :             CPASSERT(beta .EQ. 0.0_dp)
     536           0 :             DEALLOCATE (C%mdata)
     537           0 :             ALLOCATE (C%mdata(C%nrows, C%ncols))
     538             :          END IF
     539             :       END IF
     540             : 
     541       10733 :       LDC = C%nrows
     542             : 
     543       10733 :       CALL DGEMM(transA, transB, M, N, K, alpha, A%mdata, LDA, B%mdata, LDB, beta, C%mdata, LDC)
     544             : 
     545       10733 :       C%nnodes = A%nnodes
     546       10733 :       C%group = A%group
     547             : 
     548       10733 :       CALL timestop(handle)
     549             : 
     550       10733 :    END SUBROUTINE multiply_submatrices_once
     551             : 
     552             : ! **************************************************************************************************
     553             : !> \brief ...
     554             : !> \param transA ...
     555             : !> \param transB ...
     556             : !> \param alpha ...
     557             : !> \param A ...
     558             : !> \param B ...
     559             : !> \param beta ...
     560             : !> \param C ...
     561             : ! **************************************************************************************************
     562        3328 :    SUBROUTINE multiply_submatrices_array(transA, transB, alpha, A, B, beta, C)
     563             : 
     564             :       CHARACTER, INTENT(IN)                              :: transA, transB
     565             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     566             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     567             :          INTENT(IN)                                      :: A, B
     568             :       REAL(KIND=dp), INTENT(IN)                          :: beta
     569             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     570             :          INTENT(INOUT)                                   :: C
     571             : 
     572             :       CHARACTER(len=*), PARAMETER :: routineN = 'multiply_submatrices_array'
     573             : 
     574             :       INTEGER                                            :: handle, idomain, idomainA, idomainB, &
     575             :                                                             ndomains, ndomainsB, ndomainsC
     576             : 
     577        3328 :       CALL timeset(routineN, handle)
     578             : 
     579        3328 :       ndomains = SIZE(A)
     580        3328 :       ndomainsB = SIZE(B)
     581        3328 :       ndomainsC = SIZE(C)
     582             : 
     583        3328 :       CPASSERT(ndomains .EQ. ndomainsB)
     584        3328 :       CPASSERT(ndomainsB .EQ. ndomainsC)
     585             : 
     586       24794 :       DO idomain = 1, ndomains
     587             : 
     588       21466 :          idomainA = A(idomain)%domain
     589       21466 :          idomainB = B(idomain)%domain
     590             : 
     591       21466 :          CPASSERT(idomainA .EQ. idomainB)
     592             : 
     593       21466 :          C(idomain)%domain = idomainA
     594             : 
     595             :          ! check if the submatrix exists
     596       24794 :          IF (idomainA .GT. 0) THEN
     597       10733 :             CALL multiply_submatrices_once(transA, transB, alpha, A(idomain), B(idomain), beta, C(idomain))
     598             :          END IF ! submatrix for the domain exists
     599             : 
     600             :       END DO ! loop over domains
     601             : 
     602        3328 :       CALL timestop(handle)
     603             : 
     604        3328 :    END SUBROUTINE multiply_submatrices_array
     605             : 
     606             :    ! more complex routine might be necessary if submatrices are distributed
     607             : ! **************************************************************************************************
     608             : !> \brief ...
     609             : !> \param alpha ...
     610             : !> \param A ...
     611             : !> \param beta ...
     612             : !> \param B ...
     613             : !> \param transB ...
     614             : ! **************************************************************************************************
     615         205 :    SUBROUTINE add_submatrices_once(alpha, A, beta, B, transB)
     616             : 
     617             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     618             :       TYPE(domain_submatrix_type), INTENT(INOUT)         :: A
     619             :       REAL(KIND=dp), INTENT(IN)                          :: beta
     620             :       TYPE(domain_submatrix_type), INTENT(IN)            :: B
     621             :       CHARACTER, INTENT(IN)                              :: transB
     622             : 
     623             :       CHARACTER(len=*), PARAMETER :: routineN = 'add_submatrices_once'
     624             : 
     625             :       INTEGER                                            :: C1, C2, handle, icol, R1, R2
     626             :       LOGICAL                                            :: NOTB
     627             : 
     628         205 :       CALL timeset(routineN, handle)
     629             : 
     630         205 :       CPASSERT(A%domain .GT. 0)
     631         205 :       CPASSERT(B%domain .GT. 0)
     632             : 
     633         205 :       R1 = A%nrows
     634         205 :       C1 = A%ncols
     635             : 
     636         205 :       NOTB = (transB .EQ. 'N') .OR. (transB .EQ. 'n')
     637             : 
     638         205 :       IF (NOTB) THEN
     639         110 :          R2 = B%nrows
     640         110 :          C2 = B%ncols
     641             :       ELSE
     642          95 :          R2 = B%ncols
     643          95 :          C2 = B%nrows
     644             :       END IF
     645             : 
     646             :       ! these checks are for debugging only
     647         205 :       CPASSERT(C1 .EQ. C2)
     648         205 :       CPASSERT(R1 .EQ. R2)
     649             : 
     650         205 :       IF (NOTB) THEN
     651        7507 :          DO icol = 1, C1
     652      765846 :             A%mdata(:, icol) = alpha*A%mdata(:, icol) + beta*B%mdata(:, icol)
     653             :          END DO
     654             :       ELSE
     655        6690 :          DO icol = 1, C1
     656      701043 :             A%mdata(:, icol) = alpha*A%mdata(:, icol) + beta*B%mdata(icol, :)
     657             :          END DO
     658             :       END IF
     659             : 
     660         205 :       CALL timestop(handle)
     661             : 
     662         205 :    END SUBROUTINE add_submatrices_once
     663             : 
     664             : ! **************************************************************************************************
     665             : !> \brief ...
     666             : !> \param alpha ...
     667             : !> \param A ...
     668             : !> \param beta ...
     669             : !> \param B ...
     670             : !> \param transB ...
     671             : ! **************************************************************************************************
     672          66 :    SUBROUTINE add_submatrices_array(alpha, A, beta, B, transB)
     673             : 
     674             :       REAL(KIND=dp), INTENT(IN)                          :: alpha
     675             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     676             :          INTENT(INOUT)                                   :: A
     677             :       REAL(KIND=dp), INTENT(IN)                          :: beta
     678             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     679             :          INTENT(IN)                                      :: B
     680             :       CHARACTER, INTENT(IN)                              :: transB
     681             : 
     682             :       CHARACTER(len=*), PARAMETER :: routineN = 'add_submatrices_array'
     683             : 
     684             :       INTEGER                                            :: handle, idomain, idomainA, idomainB, &
     685             :                                                             ndomains, ndomainsB
     686             : 
     687          66 :       CALL timeset(routineN, handle)
     688             : 
     689          66 :       ndomains = SIZE(A)
     690          66 :       ndomainsB = SIZE(B)
     691             : 
     692          66 :       CPASSERT(ndomains .EQ. ndomainsB)
     693             : 
     694         476 :       DO idomain = 1, ndomains
     695             : 
     696         410 :          idomainA = A(idomain)%domain
     697         410 :          idomainB = B(idomain)%domain
     698             : 
     699         410 :          CPASSERT(idomainA .EQ. idomainB)
     700             : 
     701             :          ! check if the submatrix exists
     702         476 :          IF (idomainA .GT. 0) THEN
     703         205 :             CALL add_submatrices_once(alpha, A(idomain), beta, B(idomain), transB)
     704             :          END IF ! submatrix for the domain exists
     705             : 
     706             :       END DO ! loop over domains
     707             : 
     708          66 :       CALL timestop(handle)
     709             : 
     710          66 :    END SUBROUTINE add_submatrices_array
     711             : 
     712             : ! **************************************************************************************************
     713             : !> \brief Computes the max norm of the collection of submatrices
     714             : !> \param submatrices ...
     715             : !> \param norm ...
     716             : !> \par History
     717             : !>       2013.03 created [Rustam Z. Khaliullin]
     718             : !> \author Rustam Z. Khaliullin
     719             : ! **************************************************************************************************
     720           2 :    SUBROUTINE maxnorm_submatrices(submatrices, norm)
     721             : 
     722             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     723             :          INTENT(IN)                                      :: submatrices
     724             :       REAL(KIND=dp), INTENT(OUT)                         :: norm
     725             : 
     726             :       CHARACTER(len=*), PARAMETER :: routineN = 'maxnorm_submatrices'
     727             : 
     728             :       INTEGER                                            :: handle, idomain, ndomains
     729             :       REAL(KIND=dp)                                      :: curr_norm, send_norm
     730             :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_norm
     731             : 
     732           2 :       CALL timeset(routineN, handle)
     733             : 
     734           2 :       send_norm = 0.0_dp
     735             : 
     736           2 :       ndomains = SIZE(submatrices)
     737             : 
     738          12 :       DO idomain = 1, ndomains
     739             : 
     740             :          ! check if the submatrix is local
     741          12 :          IF (submatrices(idomain)%domain .GT. 0) THEN
     742       31607 :             curr_norm = MAXVAL(ABS(submatrices(idomain)%mdata))
     743           5 :             IF (curr_norm .GT. send_norm) send_norm = curr_norm
     744             :          END IF
     745             : 
     746             :       END DO ! loop over domains
     747             : 
     748             :       ! communicate local norm to the other nodes
     749           6 :       ALLOCATE (recv_norm(submatrices(1)%nnodes))
     750           2 :       CALL submatrices(1)%group%allgather(send_norm, recv_norm)
     751             : 
     752           8 :       norm = MAXVAL(recv_norm)
     753             : 
     754           2 :       DEALLOCATE (recv_norm)
     755             : 
     756           2 :       CALL timestop(handle)
     757             : 
     758           2 :    END SUBROUTINE maxnorm_submatrices
     759             : 
     760             : ! **************************************************************************************************
     761             : !> \brief Computes the sum of traces of the submatrix A.tr(B)
     762             : !> \param A ...
     763             : !> \param B ...
     764             : !> \param trace ...
     765             : !> \par History
     766             : !>       2013.03 created [Rustam Z. Khaliullin]
     767             : !> \author Rustam Z. Khaliullin
     768             : ! **************************************************************************************************
     769           0 :    SUBROUTINE trace_submatrices(A, B, trace)
     770             : 
     771             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     772             :          INTENT(IN)                                      :: A, B
     773             :       REAL(KIND=dp), INTENT(OUT)                         :: trace
     774             : 
     775             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'trace_submatrices'
     776             : 
     777             :       INTEGER                                            :: domainA, domainB, handle, idomain, &
     778             :                                                             ndomainsA, ndomainsB
     779             :       REAL(KIND=dp)                                      :: curr_trace, send_trace
     780           0 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_trace
     781             : 
     782           0 :       CALL timeset(routineN, handle)
     783             : 
     784           0 :       send_trace = 0.0_dp
     785             : 
     786           0 :       ndomainsA = SIZE(A)
     787           0 :       ndomainsB = SIZE(B)
     788             : 
     789           0 :       CPASSERT(ndomainsA .EQ. ndomainsB)
     790             : 
     791           0 :       DO idomain = 1, ndomainsA
     792             : 
     793           0 :          domainA = A(idomain)%domain
     794           0 :          domainB = B(idomain)%domain
     795             : 
     796           0 :          CPASSERT(domainA .EQ. domainB)
     797             : 
     798             :          ! check if the submatrix is local
     799           0 :          IF (domainA .GT. 0) THEN
     800             : 
     801           0 :             CPASSERT(A(idomain)%nrows .EQ. B(idomain)%nrows)
     802           0 :             CPASSERT(A(idomain)%ncols .EQ. B(idomain)%ncols)
     803             : 
     804           0 :             curr_trace = SUM(A(idomain)%mdata(:, :)*B(idomain)%mdata(:, :))
     805           0 :             send_trace = send_trace + curr_trace
     806             : 
     807             :          END IF
     808             : 
     809             :       END DO ! loop over domains
     810             : 
     811             :       ! communicate local norm to the other nodes
     812           0 :       ALLOCATE (recv_trace(A(1)%nnodes))
     813           0 :       CALL A(1)%group%allgather(send_trace, recv_trace)
     814             : 
     815           0 :       trace = SUM(recv_trace)
     816             : 
     817           0 :       DEALLOCATE (recv_trace)
     818             : 
     819           0 :       CALL timestop(handle)
     820             : 
     821           0 :    END SUBROUTINE trace_submatrices
     822             : 
     823             : ! **************************************************************************************************
     824             : !> \brief Constructs submatrices for each ALMO domain by collecting distributed
     825             : !>        DBCSR blocks to local arrays
     826             : !> \param matrix ...
     827             : !> \param submatrix ...
     828             : !> \param distr_pattern ...
     829             : !> \param domain_map ...
     830             : !> \param node_of_domain ...
     831             : !> \param job_type ...
     832             : !> \par History
     833             : !>       2013.01 created [Rustam Z. Khaliullin]
     834             : !> \author Rustam Z. Khaliullin
     835             : ! **************************************************************************************************
     836        9240 :    SUBROUTINE construct_submatrices(matrix, submatrix, distr_pattern, domain_map, &
     837        3080 :                                     node_of_domain, job_type)
     838             : 
     839             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
     840             :       TYPE(domain_submatrix_type), DIMENSION(:), &
     841             :          INTENT(INOUT)                                   :: submatrix
     842             :       TYPE(dbcsr_type), INTENT(IN)                       :: distr_pattern
     843             :       TYPE(domain_map_type), INTENT(IN)                  :: domain_map
     844             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: node_of_domain
     845             :       INTEGER, INTENT(IN)                                :: job_type
     846             : 
     847             :       CHARACTER(len=*), PARAMETER :: routineN = 'construct_submatrices'
     848             : 
     849             :       CHARACTER                                          :: matrix_type
     850             :       INTEGER :: block_node, block_offset, col, col_offset, col_size, dest_node, GroupID, handle, &
     851             :          iBlock, icol, idomain, index_col, index_ec, index_er, index_row, index_sc, index_sr, &
     852             :          iNode, ldesc, myNode, nblkcols_tot, nblkrows_tot, ndomains, ndomains2, nNodes, &
     853             :          recv_size2_total, recv_size_total, row, row_size, send_size2_total, send_size_total, &
     854             :          smcol, smrow, start_data
     855        3080 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: first_col, first_row, offset2_block, offset_block, &
     856        3080 :          recv_data2, recv_offset2_cpu, recv_offset_cpu, recv_size2_cpu, recv_size_cpu, send_data2, &
     857        3080 :          send_offset2_cpu, send_offset_cpu, send_size2_cpu, send_size_cpu
     858             :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: recv_descriptor, send_descriptor
     859        3080 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, row_blk_size
     860             :       LOGICAL                                            :: found, transp
     861             :       REAL(KIND=dp)                                      :: antifactor
     862        3080 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_data, send_data
     863        3080 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
     864             :       TYPE(dbcsr_distribution_type)                      :: pattern_dist
     865             :       TYPE(mp_comm_type)                                 :: group
     866             : 
     867             : !INTEGER, PARAMETER                         :: select_row_col = 1
     868             : !INTEGER, PARAMETER                         :: select_row = 2
     869             : !                                                  subm_row_size,&
     870             : !                                                  subm_col_size,&
     871             : 
     872        3080 :       CALL timeset(routineN, handle)
     873             : 
     874        3080 :       CALL dbcsr_get_info(matrix, nblkrows_total=nblkrows_tot, nblkcols_total=nblkcols_tot)
     875        3080 :       ndomains = nblkcols_tot ! RZK-warning not true for atomic distributions
     876        3080 :       CALL dbcsr_get_info(distr_pattern, distribution=pattern_dist)
     877        3080 :       CALL dbcsr_distribution_get(pattern_dist, numnodes=nNodes, group=GroupID, mynode=myNode)
     878             : 
     879        3080 :       CALL group%set_handle(groupid)
     880             : 
     881        3080 :       matrix_type = dbcsr_get_matrix_type(matrix)
     882             : 
     883        3080 :       ldesc = 2
     884        9240 :       ALLOCATE (send_descriptor(ldesc, nNodes))
     885        6160 :       ALLOCATE (recv_descriptor(ldesc, nNodes))
     886       21560 :       send_descriptor(:, :) = 0
     887             : 
     888             :       ! find: the number of blocks and their sizes that must be sent to each cpu
     889             :       ! loop over all domains
     890       22454 :       DO idomain = 1, ndomains
     891             : 
     892       19374 :          dest_node = node_of_domain(idomain)
     893             : 
     894             :          ! loop over those rows that have non-zero quencher
     895       19374 :          index_sr = 1 ! index start row
     896       19374 :          IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
     897       19374 :          index_er = domain_map%index1(idomain) - 1 ! index end row
     898             : 
     899      105464 :          DO index_row = index_sr, index_er
     900             : 
     901       83010 :             row = domain_map%pairs(index_row, 1)
     902             : 
     903       83010 :             IF (job_type == select_row_col) THEN
     904             :                ! loop over those columns that have non-zero quencher
     905       14602 :                index_sc = 1 ! index start col
     906       14602 :                IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
     907       14602 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
     908             :             ELSE
     909             :                ! fake loop
     910             :                index_sc = 1 ! index start col
     911             :                index_ec = 1 ! index end col
     912             :             END IF
     913             : 
     914      233570 :             DO index_col = index_sc, index_ec
     915             : 
     916      131186 :                IF (job_type == select_row_col) THEN
     917       62778 :                   col = domain_map%pairs(index_col, 1)
     918             :                ELSE
     919       68408 :                   col = idomain
     920             :                END IF
     921             : 
     922      131186 :                transp = .FALSE.
     923             :                CALL dbcsr_get_stored_coordinates(matrix, &
     924      131186 :                                                  row, col, block_node)
     925      214196 :                IF (block_node .EQ. myNode) THEN
     926       65593 :                   CALL dbcsr_get_block_p(matrix, row, col, block_p, found, row_size, col_size)
     927       65593 :                   IF (found) THEN
     928       50286 :                      send_descriptor(1, dest_node + 1) = send_descriptor(1, dest_node + 1) + 1
     929             :                      send_descriptor(2, dest_node + 1) = send_descriptor(2, dest_node + 1) + &
     930       50286 :                                                          row_size*col_size
     931             :                   END IF
     932             :                END IF
     933             : 
     934             :             END DO ! loop over columns
     935             : 
     936             :          END DO ! loop over rows
     937             : 
     938             :       END DO
     939             : 
     940             :       ! simple but quadratically scaling procedure
     941             :       ! loop over local blocks
     942             :       !CALL dbcsr_iterator_start(iter,matrix)
     943             :       !DO WHILE (dbcsr_iterator_blocks_left(iter))
     944             :       !   CALL dbcsr_iterator_next_block(iter,row,col,data_p,&
     945             :       !           row_size=row_size,col_size=col_size)
     946             :       !   DO idomain = 1, ndomains
     947             :       !      IF (job_type==select_row_col) THEN
     948             :       !         domain_needs_block=(qblk_exists(domain_map,col,idomain)&
     949             :       !            .AND.qblk_exists(domain_map,row,idomain))
     950             :       !      ELSE
     951             :       !         domain_needs_block=(idomain==col&
     952             :       !            .AND.qblk_exists(domain_map,row,idomain))
     953             :       !      ENDIF
     954             :       !      IF (domain_needs_block) THEN
     955             :       !         transp=.FALSE.
     956             :       !         dest_node=node_of_domain(idomain)
     957             :       !         !CALL dbcsr_get_stored_coordinates(distr_pattern,&
     958             :       !         !        idomain, idomain, transp, dest_node)
     959             :       !         send_descriptor(1,dest_node+1)=send_descriptor(1,dest_node+1)+1
     960             :       !         send_descriptor(2,dest_node+1)=send_descriptor(2,dest_node+1)+&
     961             :       !            row_size*col_size
     962             :       !      ENDIF
     963             :       !   ENDDO
     964             :       !ENDDO
     965             :       !CALL dbcsr_iterator_stop(iter)
     966             : 
     967             :       ! communicate number of blocks and their sizes to the other nodes
     968        3080 :       CALL group%alltoall(send_descriptor, recv_descriptor, ldesc)
     969             : 
     970       12320 :       ALLOCATE (send_size_cpu(nNodes), send_offset_cpu(nNodes))
     971        3080 :       send_offset_cpu(1) = 0
     972        3080 :       send_size_cpu(1) = send_descriptor(2, 1)
     973        6160 :       DO iNode = 2, nNodes
     974        3080 :          send_size_cpu(iNode) = send_descriptor(2, iNode)
     975             :          send_offset_cpu(iNode) = send_offset_cpu(iNode - 1) + &
     976        6160 :                                   send_size_cpu(iNode - 1)
     977             :       END DO
     978        3080 :       send_size_total = send_offset_cpu(nNodes) + send_size_cpu(nNodes)
     979             : 
     980        9240 :       ALLOCATE (recv_size_cpu(nNodes), recv_offset_cpu(nNodes))
     981        3080 :       recv_offset_cpu(1) = 0
     982        3080 :       recv_size_cpu(1) = recv_descriptor(2, 1)
     983        6160 :       DO iNode = 2, nNodes
     984        3080 :          recv_size_cpu(iNode) = recv_descriptor(2, iNode)
     985             :          recv_offset_cpu(iNode) = recv_offset_cpu(iNode - 1) + &
     986        6160 :                                   recv_size_cpu(iNode - 1)
     987             :       END DO
     988        3080 :       recv_size_total = recv_offset_cpu(nNodes) + recv_size_cpu(nNodes)
     989             : 
     990        9240 :       ALLOCATE (send_size2_cpu(nNodes), send_offset2_cpu(nNodes))
     991        3080 :       send_offset2_cpu(1) = 0
     992        3080 :       send_size2_cpu(1) = 2*send_descriptor(1, 1)
     993        6160 :       DO iNode = 2, nNodes
     994        3080 :          send_size2_cpu(iNode) = 2*send_descriptor(1, iNode)
     995             :          send_offset2_cpu(iNode) = send_offset2_cpu(iNode - 1) + &
     996        6160 :                                    send_size2_cpu(iNode - 1)
     997             :       END DO
     998        3080 :       send_size2_total = send_offset2_cpu(nNodes) + send_size2_cpu(nNodes)
     999             : 
    1000        9240 :       ALLOCATE (recv_size2_cpu(nNodes), recv_offset2_cpu(nNodes))
    1001        3080 :       recv_offset2_cpu(1) = 0
    1002        3080 :       recv_size2_cpu(1) = 2*recv_descriptor(1, 1)
    1003        6160 :       DO iNode = 2, nNodes
    1004        3080 :          recv_size2_cpu(iNode) = 2*recv_descriptor(1, iNode)
    1005             :          recv_offset2_cpu(iNode) = recv_offset2_cpu(iNode - 1) + &
    1006        6160 :                                    recv_size2_cpu(iNode - 1)
    1007             :       END DO
    1008        3080 :       recv_size2_total = recv_offset2_cpu(nNodes) + recv_size2_cpu(nNodes)
    1009             : 
    1010        3080 :       DEALLOCATE (send_descriptor)
    1011        3080 :       DEALLOCATE (recv_descriptor)
    1012             : 
    1013             :       ! collect data from the matrix into send_data
    1014        9182 :       ALLOCATE (send_data(send_size_total))
    1015        9220 :       ALLOCATE (recv_data(recv_size_total))
    1016        9182 :       ALLOCATE (send_data2(send_size2_total))
    1017        9220 :       ALLOCATE (recv_data2(recv_size2_total))
    1018        6160 :       ALLOCATE (offset_block(nNodes))
    1019        6160 :       ALLOCATE (offset2_block(nNodes))
    1020        9240 :       offset_block(:) = 0
    1021        9240 :       offset2_block(:) = 0
    1022             :       ! loop over all domains
    1023       22454 :       DO idomain = 1, ndomains
    1024             : 
    1025       19374 :          dest_node = node_of_domain(idomain)
    1026             : 
    1027             :          ! loop over those rows that have non-zero quencher
    1028       19374 :          index_sr = 1 ! index start row
    1029       19374 :          IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
    1030       19374 :          index_er = domain_map%index1(idomain) - 1 ! index end row
    1031             : 
    1032      105464 :          DO index_row = index_sr, index_er
    1033             : 
    1034       83010 :             row = domain_map%pairs(index_row, 1)
    1035             : 
    1036       83010 :             IF (job_type == select_row_col) THEN
    1037             :                ! loop over those columns that have non-zero quencher
    1038       14602 :                index_sc = 1 ! index start col
    1039       14602 :                IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
    1040       14602 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
    1041             :             ELSE
    1042             :                ! fake loop
    1043             :                index_sc = 1 ! index start col
    1044             :                index_ec = 1 ! index end col
    1045             :             END IF
    1046             : 
    1047      233570 :             DO index_col = index_sc, index_ec
    1048             : 
    1049      131186 :                IF (job_type == select_row_col) THEN
    1050       62778 :                   col = domain_map%pairs(index_col, 1)
    1051             :                ELSE
    1052       68408 :                   col = idomain
    1053             :                END IF
    1054             : 
    1055      131186 :                transp = .FALSE.
    1056             :                CALL dbcsr_get_stored_coordinates(matrix, &
    1057      131186 :                                                  row, col, block_node)
    1058      214196 :                IF (block_node .EQ. myNode) THEN
    1059       65593 :                   CALL dbcsr_get_block_p(matrix, row, col, block_p, found, row_size, col_size)
    1060       65593 :                   IF (found) THEN
    1061             :                      !col_offset=0
    1062             :                      !DO icol=1,col_size
    1063             :                      !   start_data=send_offset_cpu(dest_node+1)+&
    1064             :                      !              offset_block(dest_node+1)+&
    1065             :                      !              col_offset
    1066             :                      !   send_data(start_data+1:start_data+row_size)=&
    1067             :                      !      data_p(1:row_size,icol)
    1068             :                      !   col_offset=col_offset+row_size
    1069             :                      !ENDDO
    1070       50286 :                      col_offset = row_size*col_size
    1071             :                      start_data = send_offset_cpu(dest_node + 1) + &
    1072       50286 :                                   offset_block(dest_node + 1)
    1073      100572 :                      send_data(start_data + 1:start_data + col_offset) = RESHAPE(block_p, [col_offset])
    1074       50286 :                      offset_block(dest_node + 1) = offset_block(dest_node + 1) + col_offset
    1075             :                      ! fill out row,col information
    1076             :                      send_data2(send_offset2_cpu(dest_node + 1) + &
    1077       50286 :                                 offset2_block(dest_node + 1) + 1) = row
    1078             :                      send_data2(send_offset2_cpu(dest_node + 1) + &
    1079       50286 :                                 offset2_block(dest_node + 1) + 2) = col
    1080       50286 :                      offset2_block(dest_node + 1) = offset2_block(dest_node + 1) + 2
    1081             :                   END IF
    1082             :                END IF
    1083             : 
    1084             :             END DO ! loop over columns
    1085             : 
    1086             :          END DO ! loop over rows
    1087             : 
    1088             :       END DO
    1089             :       ! more simple but quadratically scaling version
    1090             :       !CALL dbcsr_iterator_start(iter,matrix)
    1091             :       !DO WHILE (dbcsr_iterator_blocks_left(iter))
    1092             :       !   CALL dbcsr_iterator_next_block(iter,row,col,data_p,&
    1093             :       !           row_size=row_size,col_size=col_size)
    1094             :       !   DO idomain = 1, ndomains
    1095             :       !      IF (job_type==select_row_col) THEN
    1096             :       !         domain_needs_block=(qblk_exists(domain_map,col,idomain)&
    1097             :       !            .AND.qblk_exists(domain_map,row,idomain))
    1098             :       !      ELSE
    1099             :       !         domain_needs_block=(idomain==col&
    1100             :       !            .AND.qblk_exists(domain_map,row,idomain))
    1101             :       !      ENDIF
    1102             :       !      IF (domain_needs_block) THEN
    1103             :       !         transp=.FALSE.
    1104             :       !         dest_node=node_of_domain(idomain)
    1105             :       !         !CALL dbcsr_get_stored_coordinates(distr_pattern,&
    1106             :       !         !        idomain, idomain, transp, dest_node)
    1107             :       !         ! place the data appropriately
    1108             :       !         col_offset=0
    1109             :       !         DO icol=1,col_size
    1110             :       !            start_data=send_offset_cpu(dest_node+1)+&
    1111             :       !                       offset_block(dest_node+1)+&
    1112             :       !                       col_offset
    1113             :       !            send_data(start_data+1:start_data+row_size)=&
    1114             :       !               data_p(1:row_size,icol)
    1115             :       !            col_offset=col_offset+row_size
    1116             :       !         ENDDO
    1117             :       !         offset_block(dest_node+1)=offset_block(dest_node+1)+col_size*row_size
    1118             :       !         ! fill out row,col information
    1119             :       !         send_data2(send_offset2_cpu(dest_node+1)+&
    1120             :       !               offset2_block(dest_node+1)+1)=row
    1121             :       !         send_data2(send_offset2_cpu(dest_node+1)+&
    1122             :       !               offset2_block(dest_node+1)+2)=col
    1123             :       !         offset2_block(dest_node+1)=offset2_block(dest_node+1)+2
    1124             :       !      ENDIF
    1125             :       !   ENDDO
    1126             :       !ENDDO
    1127             :       !CALL dbcsr_iterator_stop(iter)
    1128             : 
    1129             :       ! send-receive all blocks
    1130             :       CALL group%alltoall(send_data, send_size_cpu, send_offset_cpu, &
    1131        3080 :                           recv_data, recv_size_cpu, recv_offset_cpu)
    1132             :       ! send-receive rows and cols of the blocks
    1133             :       CALL group%alltoall(send_data2, send_size2_cpu, send_offset2_cpu, &
    1134        3080 :                           recv_data2, recv_size2_cpu, recv_offset2_cpu)
    1135             : 
    1136        3080 :       DEALLOCATE (send_size_cpu, send_offset_cpu)
    1137        3080 :       DEALLOCATE (send_size2_cpu, send_offset2_cpu)
    1138        3080 :       DEALLOCATE (send_data)
    1139        3080 :       DEALLOCATE (send_data2)
    1140        3080 :       DEALLOCATE (offset_block)
    1141        3080 :       DEALLOCATE (offset2_block)
    1142             : 
    1143             :       ! copy blocks into submatrices
    1144        3080 :       CALL dbcsr_get_info(matrix, col_blk_size=col_blk_size, row_blk_size=row_blk_size)
    1145             : !    ALLOCATE(subm_row_size(ndomains),subm_col_size(ndomains))
    1146             : !    subm_row_size(:)=0
    1147             : !    subm_col_size(:)=0
    1148        3080 :       ndomains2 = SIZE(submatrix)
    1149        3080 :       IF (ndomains2 .NE. ndomains) THEN
    1150           0 :          CPABORT("wrong submatrix size")
    1151             :       END IF
    1152        3080 :       CALL release_submatrices(submatrix)
    1153       22454 :       submatrix(:)%nnodes = nNodes
    1154       22454 :       submatrix(:)%group = Group
    1155       22454 :       submatrix(:)%nrows = 0
    1156       22454 :       submatrix(:)%ncols = 0
    1157             : 
    1158       15400 :       ALLOCATE (first_row(nblkrows_tot), first_col(nblkcols_tot))
    1159       22454 :       submatrix(:)%domain = -1
    1160       22454 :       DO idomain = 1, ndomains
    1161       19374 :          dest_node = node_of_domain(idomain)
    1162             :          !transp=.FALSE.
    1163             :          !CALL dbcsr_get_stored_coordinates(distr_pattern,&
    1164             :          !        idomain, idomain, transp, dest_node)
    1165       22454 :          IF (dest_node .EQ. mynode) THEN
    1166        9687 :             submatrix(idomain)%domain = idomain
    1167        9687 :             submatrix(idomain)%nbrows = 0
    1168        9687 :             submatrix(idomain)%nbcols = 0
    1169             : 
    1170             :             ! loop over those rows that have non-zero quencher
    1171       90582 :             first_row(:) = -1
    1172        9687 :             index_sr = 1 ! index start row
    1173        9687 :             IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
    1174        9687 :             index_er = domain_map%index1(idomain) - 1 ! index end row
    1175       51192 :             DO index_row = index_sr, index_er
    1176       41505 :                row = domain_map%pairs(index_row, 1)
    1177             :                !DO row = 1, nblkrows_tot
    1178             :                !   IF (qblk_exists(domain_map,row,idomain)) THEN
    1179       41505 :                first_row(row) = submatrix(idomain)%nrows + 1
    1180       41505 :                submatrix(idomain)%nrows = submatrix(idomain)%nrows + row_blk_size(row)
    1181       51192 :                submatrix(idomain)%nbrows = submatrix(idomain)%nbrows + 1
    1182             :                !   ENDIF
    1183             :             END DO
    1184       29061 :             ALLOCATE (submatrix(idomain)%dbcsr_row(submatrix(idomain)%nbrows))
    1185       19374 :             ALLOCATE (submatrix(idomain)%size_brow(submatrix(idomain)%nbrows))
    1186        9687 :             smrow = 1
    1187             :             ! again loop over those rows that have non-zero quencher
    1188        9687 :             index_sr = 1 ! index start row
    1189        9687 :             IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
    1190        9687 :             index_er = domain_map%index1(idomain) - 1 ! index end row
    1191       51192 :             DO index_row = index_sr, index_er
    1192       41505 :                row = domain_map%pairs(index_row, 1)
    1193             :                !DO row = 1, nblkrows_tot
    1194             :                !   IF (first_row(row).ne.-1) THEN
    1195       41505 :                submatrix(idomain)%dbcsr_row(smrow) = row
    1196       41505 :                submatrix(idomain)%size_brow(smrow) = row_blk_size(row)
    1197       51192 :                smrow = smrow + 1
    1198             :                !   ENDIF
    1199             :             END DO
    1200             : 
    1201             :             ! loop over the necessary columns
    1202       90582 :             first_col(:) = -1
    1203        9687 :             IF (job_type == select_row_col) THEN
    1204             :                ! loop over those columns that have non-zero quencher
    1205        1837 :                index_sc = 1 ! index start col
    1206        1837 :                IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
    1207        1837 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
    1208             :             ELSE
    1209             :                ! fake loop
    1210             :                index_sc = 1 ! index start col
    1211             :                index_ec = 1 ! index end col
    1212             :             END IF
    1213       24838 :             DO index_col = index_sc, index_ec
    1214       15151 :                IF (job_type == select_row_col) THEN
    1215        7301 :                   col = domain_map%pairs(index_col, 1)
    1216             :                ELSE
    1217        7850 :                   col = idomain
    1218             :                END IF
    1219             :                !DO col = 1, nblkcols_tot
    1220             :                !   IF (job_type==select_row_col) THEN
    1221             :                !      domain_needs_block=(qblk_exists(domain_map,col,idomain))
    1222             :                !   ELSE
    1223             :                !      domain_needs_block=(col==idomain) ! RZK-warning col belongs to the domain
    1224             :                !   ENDIF
    1225             :                !   IF (domain_needs_block) THEN
    1226       15151 :                first_col(col) = submatrix(idomain)%ncols + 1
    1227       15151 :                submatrix(idomain)%ncols = submatrix(idomain)%ncols + col_blk_size(col)
    1228       24838 :                submatrix(idomain)%nbcols = submatrix(idomain)%nbcols + 1
    1229             :                !   ENDIF
    1230             :             END DO
    1231             : 
    1232       29061 :             ALLOCATE (submatrix(idomain)%dbcsr_col(submatrix(idomain)%nbcols))
    1233       19374 :             ALLOCATE (submatrix(idomain)%size_bcol(submatrix(idomain)%nbcols))
    1234             : 
    1235             :             ! loop over the necessary columns again
    1236        9687 :             smcol = 1
    1237        9687 :             IF (job_type == select_row_col) THEN
    1238             :                ! loop over those columns that have non-zero quencher
    1239        1837 :                index_sc = 1 ! index start col
    1240        1837 :                IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
    1241        1837 :                index_ec = domain_map%index1(idomain) - 1 ! index end col
    1242             :             ELSE
    1243             :                ! fake loop
    1244             :                index_sc = 1 ! index start col
    1245             :                index_ec = 1 ! index end col
    1246             :             END IF
    1247       24838 :             DO index_col = index_sc, index_ec
    1248       15151 :                IF (job_type == select_row_col) THEN
    1249        7301 :                   col = domain_map%pairs(index_col, 1)
    1250             :                ELSE
    1251        7850 :                   col = idomain
    1252             :                END IF
    1253             :                !DO col = 1, nblkcols_tot
    1254             :                !   IF (first_col(col).ne.-1) THEN
    1255       15151 :                submatrix(idomain)%dbcsr_col(smcol) = col
    1256       15151 :                submatrix(idomain)%size_bcol(smcol) = col_blk_size(col)
    1257       24838 :                smcol = smcol + 1
    1258             :                !   ENDIF
    1259             :             END DO
    1260             : 
    1261           0 :             ALLOCATE (submatrix(idomain)%mdata( &
    1262             :                       submatrix(idomain)%nrows, &
    1263       38748 :                       submatrix(idomain)%ncols))
    1264     6331606 :             submatrix(idomain)%mdata(:, :) = 0.0_dp
    1265       29061 :             DO iNode = 1, nNodes
    1266       19374 :                block_offset = 0
    1267      238010 :                DO iBlock = 1, recv_size2_cpu(iNode)/2
    1268             :                   ! read the (row,col) of the block
    1269      208949 :                   row = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 1)
    1270      208949 :                   col = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 2)
    1271             :                   ! check if this block should be in the submatrix of this domain
    1272      208949 :                   IF ((first_col(col) .NE. -1) .AND. (first_row(row) .NE. -1)) THEN
    1273             :                      ! copy data from the received array into submatrix
    1274       73934 :                      start_data = recv_offset_cpu(iNode) + block_offset + 1
    1275      580303 :                      DO icol = 0, col_blk_size(col) - 1
    1276             :                         submatrix(idomain)%mdata(first_row(row): &
    1277             :                                                  first_row(row) + row_blk_size(row) - 1, &
    1278             :                                                  first_col(col) + icol) = &
    1279     8468182 :                            recv_data(start_data:start_data + row_blk_size(row) - 1)
    1280      580303 :                         start_data = start_data + row_blk_size(row)
    1281             :                      END DO
    1282       73934 :                      IF (job_type == select_row_col) THEN
    1283       51225 :                         IF (matrix_type == dbcsr_type_symmetric .OR. &
    1284             :                             matrix_type == dbcsr_type_antisymmetric) THEN
    1285             :                            ! copy data into the transposed block as well
    1286       10229 :                            antifactor = 1.0_dp
    1287       10229 :                            IF (matrix_type == dbcsr_type_antisymmetric) THEN
    1288           0 :                               antifactor = -1.0_dp
    1289             :                            END IF
    1290       10229 :                            start_data = recv_offset_cpu(iNode) + block_offset + 1
    1291      104710 :                            DO icol = 0, col_blk_size(col) - 1
    1292             :                               submatrix(idomain)%mdata(first_row(col) + icol, &
    1293             :                                                        first_col(row): &
    1294             :                                                        first_col(row) + row_blk_size(row) - 1) = &
    1295             :                                  antifactor*recv_data(start_data: &
    1296     1981842 :                                                       start_data + row_blk_size(row) - 1)
    1297      104710 :                               start_data = start_data + row_blk_size(row)
    1298             :                            END DO
    1299       40996 :                         ELSE IF (matrix_type == dbcsr_type_no_symmetry) THEN
    1300             :                         ELSE
    1301           0 :                            CPABORT("matrix type is NYI")
    1302             :                         END IF
    1303             :                      END IF
    1304             :                   END IF
    1305      228323 :                   block_offset = block_offset + col_blk_size(col)*row_blk_size(row)
    1306             :                END DO
    1307             :             END DO
    1308             :          END IF ! mynode.eq.dest_node
    1309             :       END DO ! loop over domains
    1310             : 
    1311        3080 :       DEALLOCATE (recv_size_cpu, recv_offset_cpu)
    1312        3080 :       DEALLOCATE (recv_size2_cpu, recv_offset2_cpu)
    1313        3080 :       DEALLOCATE (recv_data)
    1314        3080 :       DEALLOCATE (recv_data2)
    1315             : !    DEALLOCATE(subm_row_size,subm_col_size)
    1316        3080 :       DEALLOCATE (first_row, first_col)
    1317             : 
    1318        3080 :       CALL timestop(handle)
    1319             : 
    1320        3080 :    END SUBROUTINE construct_submatrices
    1321             : 
    1322             : ! **************************************************************************************************
    1323             : !> \brief Constructs a DBCSR matrix from submatrices
    1324             : !> \param matrix ...
    1325             : !> \param submatrix ...
    1326             : !> \param distr_pattern ...
    1327             : !> \par History
    1328             : !>       2013.01 created [Rustam Z. Khaliullin]
    1329             : !> \author Rustam Z. Khaliullin
    1330             : ! **************************************************************************************************
    1331        2396 :    SUBROUTINE construct_dbcsr_from_submatrices(matrix, submatrix, distr_pattern)
    1332             : 
    1333             :       TYPE(dbcsr_type), INTENT(INOUT)                    :: matrix
    1334             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    1335             :          INTENT(IN)                                      :: submatrix
    1336             :       TYPE(dbcsr_type), INTENT(IN)                       :: distr_pattern
    1337             : 
    1338             :       CHARACTER(len=*), PARAMETER :: routineN = 'construct_dbcsr_from_submatrices'
    1339             : 
    1340             :       CHARACTER                                          :: matrix_type
    1341             :       INTEGER :: block_offset, col, col_offset, colsize, dest_node, GroupID, handle, iBlock, icol, &
    1342             :          idomain, iNode, irow_subm, ldesc, myNode, nblkcols_tot, nblkrows_tot, ndomains, &
    1343             :          ndomains2, nNodes, recv_size2_total, recv_size_total, row, rowsize, send_size2_total, &
    1344             :          send_size_total, smroff, start_data, unit_nr
    1345        2396 :       INTEGER, ALLOCATABLE, DIMENSION(:) :: offset2_block, offset_block, recv_data2, &
    1346        2396 :          recv_offset2_cpu, recv_offset_cpu, recv_size2_cpu, recv_size_cpu, send_data2, &
    1347        2396 :          send_offset2_cpu, send_offset_cpu, send_size2_cpu, send_size_cpu
    1348        2396 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: recv_descriptor, send_descriptor
    1349        2396 :       INTEGER, DIMENSION(:), POINTER                     :: col_blk_size, row_blk_size
    1350             :       LOGICAL                                            :: transp
    1351        2396 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: recv_data, send_data
    1352        2396 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: new_block
    1353        2396 :       REAL(KIND=dp), DIMENSION(:, :), POINTER            :: block_p
    1354             :       TYPE(cp_logger_type), POINTER                      :: logger
    1355             :       TYPE(dbcsr_distribution_type)                      :: pattern_dist
    1356             :       TYPE(dbcsr_iterator_type)                          :: iter
    1357             :       TYPE(mp_comm_type)                                 :: group
    1358             : 
    1359        2396 :       CALL timeset(routineN, handle)
    1360             : 
    1361             :       ! get a useful output_unit
    1362        2396 :       logger => cp_get_default_logger()
    1363        2396 :       IF (logger%para_env%is_source()) THEN
    1364        1198 :          unit_nr = cp_logger_get_default_unit_nr(logger, local=.TRUE.)
    1365             :       ELSE
    1366             :          unit_nr = -1
    1367             :       END IF
    1368             : 
    1369        2396 :       CALL dbcsr_get_info(matrix, nblkrows_total=nblkrows_tot, nblkcols_total=nblkcols_tot)
    1370        2396 :       ndomains = nblkcols_tot ! RZK-warning not true for atomic distributions
    1371        2396 :       ndomains2 = SIZE(submatrix)
    1372             : 
    1373        2396 :       IF (ndomains .NE. ndomains2) THEN
    1374           0 :          CPABORT("domain mismatch")
    1375             :       END IF
    1376             : 
    1377        2396 :       CALL dbcsr_get_info(distr_pattern, distribution=pattern_dist)
    1378        2396 :       CALL dbcsr_distribution_get(pattern_dist, numnodes=nNodes, group=GroupID, mynode=myNode)
    1379             : 
    1380        2396 :       CALL group%set_handle(GroupID)
    1381             : 
    1382        2396 :       matrix_type = dbcsr_get_matrix_type(matrix)
    1383        2396 :       IF (matrix_type .NE. dbcsr_type_no_symmetry) THEN
    1384           0 :          CPABORT("only non-symmetric matrices so far")
    1385             :       END IF
    1386             : 
    1387             :       ! remove all blocks from the dbcsr matrix
    1388        2396 :       CALL dbcsr_iterator_start(iter, matrix)
    1389       21295 :       DO WHILE (dbcsr_iterator_blocks_left(iter))
    1390       18899 :          CALL dbcsr_iterator_next_block(iter, row, col, block_p)
    1391     1211147 :          block_p(:, :) = 0.0_dp
    1392             :       END DO
    1393        2396 :       CALL dbcsr_iterator_stop(iter)
    1394        2396 :       CALL dbcsr_filter(matrix, 0.1_dp)
    1395             : 
    1396        2396 :       CALL dbcsr_work_create(matrix, work_mutable=.TRUE.)
    1397             : 
    1398        2396 :       ldesc = 2
    1399        7188 :       ALLOCATE (send_descriptor(ldesc, nNodes))
    1400        4792 :       ALLOCATE (recv_descriptor(ldesc, nNodes))
    1401       16772 :       send_descriptor(:, :) = 0
    1402             : 
    1403             :       ! loop over domains - find how much data to send
    1404       18056 :       DO idomain = 1, ndomains
    1405             : 
    1406       18056 :          IF (submatrix(idomain)%domain .GT. 0) THEN
    1407             : 
    1408       41966 :             DO irow_subm = 1, submatrix(idomain)%nbrows
    1409             : 
    1410       34136 :                IF (submatrix(idomain)%nbcols .NE. 1) THEN
    1411           0 :                   CPABORT("corrupt submatrix structure")
    1412             :                END IF
    1413             : 
    1414       34136 :                row = submatrix(idomain)%dbcsr_row(irow_subm)
    1415       34136 :                col = submatrix(idomain)%dbcsr_col(1)
    1416             : 
    1417       34136 :                IF (col .NE. idomain) THEN
    1418           0 :                   CPABORT("corrupt submatrix structure")
    1419             :                END IF
    1420             : 
    1421       34136 :                transp = .FALSE.
    1422             :                CALL dbcsr_get_stored_coordinates(distr_pattern, &
    1423       34136 :                                                  row, idomain, dest_node)
    1424             : 
    1425       34136 :                send_descriptor(1, dest_node + 1) = send_descriptor(1, dest_node + 1) + 1
    1426             :                send_descriptor(2, dest_node + 1) = send_descriptor(2, dest_node + 1) + &
    1427             :                                                    submatrix(idomain)%size_brow(irow_subm)* &
    1428       41966 :                                                    submatrix(idomain)%size_bcol(1)
    1429             : 
    1430             :             END DO ! loop over submatrix blocks
    1431             : 
    1432             :          END IF
    1433             : 
    1434             :       END DO ! loop over domains
    1435             : 
    1436             :       ! communicate number of blocks and their sizes to the other nodes
    1437        2396 :       CALL group%alltoall(send_descriptor, recv_descriptor, ldesc)
    1438             : 
    1439        9584 :       ALLOCATE (send_size_cpu(nNodes), send_offset_cpu(nNodes))
    1440        2396 :       send_offset_cpu(1) = 0
    1441        2396 :       send_size_cpu(1) = send_descriptor(2, 1)
    1442        4792 :       DO iNode = 2, nNodes
    1443        2396 :          send_size_cpu(iNode) = send_descriptor(2, iNode)
    1444             :          send_offset_cpu(iNode) = send_offset_cpu(iNode - 1) + &
    1445        4792 :                                   send_size_cpu(iNode - 1)
    1446             :       END DO
    1447        2396 :       send_size_total = send_offset_cpu(nNodes) + send_size_cpu(nNodes)
    1448             : 
    1449        7188 :       ALLOCATE (recv_size_cpu(nNodes), recv_offset_cpu(nNodes))
    1450        2396 :       recv_offset_cpu(1) = 0
    1451        2396 :       recv_size_cpu(1) = recv_descriptor(2, 1)
    1452        4792 :       DO iNode = 2, nNodes
    1453        2396 :          recv_size_cpu(iNode) = recv_descriptor(2, iNode)
    1454             :          recv_offset_cpu(iNode) = recv_offset_cpu(iNode - 1) + &
    1455        4792 :                                   recv_size_cpu(iNode - 1)
    1456             :       END DO
    1457        2396 :       recv_size_total = recv_offset_cpu(nNodes) + recv_size_cpu(nNodes)
    1458             : 
    1459        7188 :       ALLOCATE (send_size2_cpu(nNodes), send_offset2_cpu(nNodes))
    1460        2396 :       send_offset2_cpu(1) = 0
    1461        2396 :       send_size2_cpu(1) = 2*send_descriptor(1, 1)
    1462        4792 :       DO iNode = 2, nNodes
    1463        2396 :          send_size2_cpu(iNode) = 2*send_descriptor(1, iNode)
    1464             :          send_offset2_cpu(iNode) = send_offset2_cpu(iNode - 1) + &
    1465        4792 :                                    send_size2_cpu(iNode - 1)
    1466             :       END DO
    1467        2396 :       send_size2_total = send_offset2_cpu(nNodes) + send_size2_cpu(nNodes)
    1468             : 
    1469        7188 :       ALLOCATE (recv_size2_cpu(nNodes), recv_offset2_cpu(nNodes))
    1470        2396 :       recv_offset2_cpu(1) = 0
    1471        2396 :       recv_size2_cpu(1) = 2*recv_descriptor(1, 1)
    1472        4792 :       DO iNode = 2, nNodes
    1473        2396 :          recv_size2_cpu(iNode) = 2*recv_descriptor(1, iNode)
    1474             :          recv_offset2_cpu(iNode) = recv_offset2_cpu(iNode - 1) + &
    1475        4792 :                                    recv_size2_cpu(iNode - 1)
    1476             :       END DO
    1477        2396 :       recv_size2_total = recv_offset2_cpu(nNodes) + recv_size2_cpu(nNodes)
    1478             : 
    1479        2396 :       DEALLOCATE (send_descriptor)
    1480        2396 :       DEALLOCATE (recv_descriptor)
    1481             : 
    1482             :       ! collect data from the matrix into send_data
    1483        7188 :       ALLOCATE (send_data(send_size_total))
    1484        7153 :       ALLOCATE (recv_data(recv_size_total))
    1485        7188 :       ALLOCATE (send_data2(send_size2_total))
    1486        7153 :       ALLOCATE (recv_data2(recv_size2_total))
    1487        4792 :       ALLOCATE (offset_block(nNodes))
    1488        4792 :       ALLOCATE (offset2_block(nNodes))
    1489        7188 :       offset_block(:) = 0
    1490        7188 :       offset2_block(:) = 0
    1491             :       ! loop over domains - collect data to send
    1492       18056 :       DO idomain = 1, ndomains
    1493             : 
    1494       18056 :          IF (submatrix(idomain)%domain .GT. 0) THEN
    1495             : 
    1496        7830 :             smroff = 0
    1497       41966 :             DO irow_subm = 1, submatrix(idomain)%nbrows
    1498             : 
    1499       34136 :                row = submatrix(idomain)%dbcsr_row(irow_subm)
    1500       34136 :                col = submatrix(idomain)%dbcsr_col(1)
    1501             : 
    1502       34136 :                rowsize = submatrix(idomain)%size_brow(irow_subm)
    1503       34136 :                colsize = submatrix(idomain)%size_bcol(1)
    1504             : 
    1505       34136 :                transp = .FALSE.
    1506             :                CALL dbcsr_get_stored_coordinates(distr_pattern, &
    1507       34136 :                                                  row, idomain, dest_node)
    1508             : 
    1509             :                ! place the data appropriately
    1510       34136 :                col_offset = 0
    1511      137454 :                DO icol = 1, colsize
    1512             :                   start_data = send_offset_cpu(dest_node + 1) + &
    1513             :                                offset_block(dest_node + 1) + &
    1514      103318 :                                col_offset
    1515             :                   send_data(start_data + 1:start_data + rowsize) = &
    1516     1452416 :                      submatrix(idomain)%mdata(smroff + 1:smroff + rowsize, icol)
    1517      137454 :                   col_offset = col_offset + rowsize
    1518             :                END DO
    1519             :                offset_block(dest_node + 1) = offset_block(dest_node + 1) + &
    1520       34136 :                                              colsize*rowsize
    1521             :                ! fill out row,col information
    1522             :                send_data2(send_offset2_cpu(dest_node + 1) + &
    1523       34136 :                           offset2_block(dest_node + 1) + 1) = row
    1524             :                send_data2(send_offset2_cpu(dest_node + 1) + &
    1525       34136 :                           offset2_block(dest_node + 1) + 2) = col
    1526       34136 :                offset2_block(dest_node + 1) = offset2_block(dest_node + 1) + 2
    1527             : 
    1528       76102 :                smroff = smroff + rowsize
    1529             : 
    1530             :             END DO
    1531             : 
    1532             :          END IF
    1533             : 
    1534             :       END DO ! loop over domains
    1535             : 
    1536             :       ! send-receive all blocks
    1537             :       CALL group%alltoall(send_data, send_size_cpu, send_offset_cpu, &
    1538        2396 :                           recv_data, recv_size_cpu, recv_offset_cpu)
    1539             :       ! send-receive rows and cols of the blocks
    1540             :       CALL group%alltoall(send_data2, send_size2_cpu, send_offset2_cpu, &
    1541        2396 :                           recv_data2, recv_size2_cpu, recv_offset2_cpu)
    1542             : 
    1543        2396 :       DEALLOCATE (send_size_cpu, send_offset_cpu)
    1544        2396 :       DEALLOCATE (send_size2_cpu, send_offset2_cpu)
    1545        2396 :       DEALLOCATE (send_data)
    1546        2396 :       DEALLOCATE (send_data2)
    1547        2396 :       DEALLOCATE (offset_block)
    1548        2396 :       DEALLOCATE (offset2_block)
    1549             : 
    1550             :       ! copy received data into dbcsr matrix
    1551        2396 :       CALL dbcsr_get_info(matrix, col_blk_size=col_blk_size, row_blk_size=row_blk_size)
    1552        7188 :       DO iNode = 1, nNodes
    1553        4792 :          block_offset = 0
    1554       41324 :          DO iBlock = 1, recv_size2_cpu(iNode)/2
    1555             :             ! read the (row,col) of the block
    1556       34136 :             row = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 1)
    1557       34136 :             col = recv_data2(recv_offset2_cpu(iNode) + (iBlock - 1)*2 + 2)
    1558             :             ! copy data from the received array into the matrix block
    1559       34136 :             start_data = recv_offset_cpu(iNode) + block_offset + 1
    1560      136544 :             ALLOCATE (new_block(row_blk_size(row), col_blk_size(col)))
    1561      137454 :             DO icol = 1, col_blk_size(col)
    1562             :                new_block(:, icol) = &
    1563     1452416 :                   recv_data(start_data:start_data + row_blk_size(row) - 1)
    1564      137454 :                start_data = start_data + row_blk_size(row)
    1565             :             END DO
    1566       34136 :             CALL dbcsr_put_block(matrix, row, col, new_block)
    1567       34136 :             DEALLOCATE (new_block)
    1568       38928 :             block_offset = block_offset + col_blk_size(col)*row_blk_size(row)
    1569             :          END DO
    1570             :       END DO
    1571             : 
    1572        2396 :       DEALLOCATE (recv_size_cpu, recv_offset_cpu)
    1573        2396 :       DEALLOCATE (recv_size2_cpu, recv_offset2_cpu)
    1574        2396 :       DEALLOCATE (recv_data)
    1575        2396 :       DEALLOCATE (recv_data2)
    1576             : 
    1577        2396 :       CALL dbcsr_finalize(matrix)
    1578             : 
    1579        2396 :       CALL timestop(handle)
    1580             : 
    1581       11980 :    END SUBROUTINE construct_dbcsr_from_submatrices
    1582             : 
    1583             : ! **************************************************************************************************
    1584             : !> \brief ...
    1585             : !> \param submatrices ...
    1586             : !> \param mpgroup ...
    1587             : ! **************************************************************************************************
    1588           0 :    SUBROUTINE print_submatrices(submatrices, mpgroup)
    1589             : 
    1590             :       TYPE(domain_submatrix_type), DIMENSION(:), &
    1591             :          INTENT(IN)                                      :: submatrices
    1592             :       TYPE(mp_comm_type), INTENT(IN)                     :: mpgroup
    1593             : 
    1594             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'print_submatrices'
    1595             : 
    1596             :       CHARACTER(len=30)                                  :: colstr, formatstr
    1597             :       INTEGER                                            :: handle, i, irow, n, ncols, nrows
    1598             : 
    1599           0 :       CALL timeset(routineN, handle)
    1600             : 
    1601           0 :       n = SIZE(submatrices)
    1602             : 
    1603           0 :       DO i = 1, n
    1604           0 :          nrows = SIZE(submatrices(i)%mdata, 1)
    1605           0 :          ncols = SIZE(submatrices(i)%mdata, 2)
    1606           0 :          WRITE (colstr, *) ncols
    1607           0 :          formatstr = '('//TRIM(ADJUSTL(colstr))//'F16.9)'
    1608           0 :          IF (submatrices(i)%domain .GT. 0) THEN
    1609           0 :             WRITE (*, *) "SUBMATRIX: ", i, nrows, 'x', ncols
    1610           0 :             nrows = SIZE(submatrices(i)%mdata, 1)
    1611           0 :             DO irow = 1, nrows
    1612           0 :                WRITE (*, formatstr) submatrices(i)%mdata(irow, :)
    1613             :             END DO
    1614             :          END IF
    1615           0 :          CALL mpgroup%sync()
    1616             :       END DO
    1617             : 
    1618           0 :       CALL timestop(handle)
    1619             : 
    1620           0 :    END SUBROUTINE print_submatrices
    1621             : 
    1622             : ! **************************************************************************************************
    1623             : !> \brief Reports whether the DBCSR block (row,col) exists in the quencher
    1624             : !> \param map ...
    1625             : !> \param row ...
    1626             : !> \param col ...
    1627             : !> \return ...
    1628             : !> \par History
    1629             : !>       2013.01 created [Rustam Z. Khaliullin]
    1630             : !> \author Rustam Z. Khaliullin
    1631             : ! **************************************************************************************************
    1632           0 :    FUNCTION qblk_exists(map, row, col)
    1633             : 
    1634             :       TYPE(domain_map_type), INTENT(IN)                  :: map
    1635             :       INTEGER, INTENT(IN)                                :: row, col
    1636             :       LOGICAL                                            :: qblk_exists
    1637             : 
    1638             :       INTEGER                                            :: first, last, mid, ndomains
    1639             : 
    1640             : !CALL timeset(routineN,handle)
    1641             : 
    1642           0 :       ndomains = SIZE(map%index1)
    1643             : 
    1644           0 :       qblk_exists = .FALSE.
    1645           0 :       IF (col .LT. 1 .OR. col .GT. ndomains) RETURN
    1646           0 :       first = 1
    1647           0 :       IF (col .GT. 1) first = map%index1(col - 1)
    1648           0 :       last = map%index1(col) - 1
    1649             : 
    1650             :       ! perform binary search within first-last
    1651           0 :       DO WHILE (last .GE. first)
    1652           0 :          mid = first + (last - first)/2
    1653           0 :          IF (map%pairs(mid, 1) .GT. row) THEN
    1654           0 :             last = mid - 1
    1655           0 :          ELSE IF (map%pairs(mid, 1) .LT. row) THEN
    1656           0 :             first = mid + 1
    1657             :          ELSE
    1658             :             qblk_exists = .TRUE. ! SUCCESS!!
    1659             :             EXIT
    1660             :          END IF
    1661             :       END DO
    1662             : 
    1663             :       !CALL timestop(handle)
    1664             : 
    1665             :       RETURN
    1666             : 
    1667             :    END FUNCTION qblk_exists
    1668             : 
    1669             : END MODULE domain_submatrix_methods
    1670             : 

Generated by: LCOV version 1.15