LCOV - code coverage report
Current view: top level - src/dbt - dbt_split.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:9843133) Lines: 193 202 95.5 %
Date: 2024-05-10 06:53:45 Functions: 5 5 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Routines to split blocks and to convert between tensors with different block sizes.
      10             : !> \author Patrick Seewald
      11             : ! **************************************************************************************************
      12             : MODULE dbt_split
      13             : 
      14             :    #:include "dbt_macros.fypp"
      15             :    #:set maxdim = maxrank
      16             :    #:set ndims = range(2,maxdim+1)
      17             : 
      18             :    USE dbt_allocate_wrap, ONLY: allocate_any
      19             :    USE dbt_array_list_methods, ONLY: get_ith_array
      20             :    USE dbt_block, ONLY: dbt_iterator_type, &
      21             :                         dbt_get_block, &
      22             :                         dbt_put_block, &
      23             :                         dbt_iterator_start, &
      24             :                         dbt_iterator_num_blocks, &
      25             :                         dbt_iterator_blocks_left, &
      26             :                         dbt_iterator_stop, &
      27             :                         dbt_iterator_next_block, &
      28             :                         dbt_reserve_blocks
      29             :    USE dbt_index, ONLY: dbt_get_mapping_info, &
      30             :                         dbt_inverse_order
      31             :    USE dbt_types, ONLY: dbt_create, &
      32             :                         dbt_type, &
      33             :                         ndims_tensor, &
      34             :                         dbt_distribution_type, &
      35             :                         dbt_distribution, &
      36             :                         dbt_distribution_destroy, &
      37             :                         dbt_distribution_new_expert, &
      38             :                         dbt_clear, &
      39             :                         dbt_finalize, &
      40             :                         dbt_get_num_blocks, &
      41             :                         dbt_blk_offsets, &
      42             :                         dbt_blk_sizes, &
      43             :                         ndims_matrix_row, &
      44             :                         ndims_matrix_column, &
      45             :                         dbt_filter, &
      46             :                         dbt_copy_contraction_storage
      47             :    USE kinds, ONLY: dp, dp
      48             : 
      49             : #include "../base/base_uses.f90"
      50             :    IMPLICIT NONE
      51             :    PRIVATE
      52             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbt_split'
      53             :    PUBLIC :: &
      54             :       dbt_make_compatible_blocks, &
      55             :       dbt_split_blocks, &
      56             :       dbt_split_blocks_generic, &
      57             :       dbt_split_copyback, &
      58             :       dbt_crop
      59             : 
      60             : CONTAINS
      61             : 
      62             : ! **************************************************************************************************
      63             : !> \brief Split tensor blocks into smaller blocks
      64             : !> \param tensor_in Input tensor
      65             : !> \param tensor_out Output tensor (splitted blocks)
      66             : !> \param blk_size_i block sizes for each of the tensor dimensions
      67             : !> \param nodata don't copy data from tensor_in to tensor_out
      68             : !> \author Patrick Seewald
      69             : ! **************************************************************************************************
      70     1333008 :    SUBROUTINE dbt_split_blocks_generic(tensor_in, tensor_out, ${varlist("blk_size")}$, nodata)
      71             :       TYPE(dbt_type), INTENT(INOUT)               :: tensor_in
      72             :       TYPE(dbt_type), INTENT(OUT)                 :: tensor_out
      73             :       INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL     :: ${varlist("blk_size")}$
      74             :       LOGICAL, INTENT(IN), OPTIONAL                   :: nodata
      75             : 
      76     1925456 :       TYPE(dbt_distribution_type)                 :: dist_old, dist_split
      77             :       TYPE(dbt_iterator_type)                     :: iter
      78      148112 :       INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("nd_dist_split")}$
      79      148112 :       INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("nd_blk_size_split")}$
      80      148112 :       INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("index_split_offset")}$
      81      148112 :       INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("inblock_offset")}$
      82      148112 :       INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("blk_nsplit")}$
      83             :       INTEGER                                         :: ${varlist("split_blk")}$
      84             :       INTEGER :: idim, i, isplit_sum, nsplit, handle, splitsum, bcount
      85      148112 :       INTEGER, DIMENSION(:, :), ALLOCATABLE           :: blks_to_allocate
      86      296224 :       INTEGER, DIMENSION(:), ALLOCATABLE :: dist_d, blk_size_d, blk_size_split_d, dist_split_d
      87      296224 :       INTEGER, DIMENSION(ndims_matrix_row(tensor_in)) :: map1_2d
      88      296224 :       INTEGER, DIMENSION(ndims_matrix_column(tensor_in)) :: map2_2d
      89      148112 :       INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: blk_index, blk_size, blk_offset, &
      90      148112 :                                                      blk_shape
      91             :       INTEGER, DIMENSION(${maxdim}$) :: bi_split, inblock_offset
      92             :       LOGICAL :: found
      93             : 
      94             :       #:for ndim in ndims
      95      148112 :          REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_${ndim}$d
      96             :       #:endfor
      97             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_split_blocks_generic'
      98             : 
      99      148112 :       CALL timeset(routineN, handle)
     100             : 
     101      148112 :       dist_old = dbt_distribution(tensor_in)
     102             : 
     103      521712 :       DO idim = 1, ndims_tensor(tensor_in)
     104      373600 :          CALL get_ith_array(dist_old%nd_dist, idim, dist_d)
     105      373600 :          CALL get_ith_array(tensor_in%blk_sizes, idim, blk_size_d)
     106             : 
     107             :          #:for idim in range(1, maxdim+1)
     108      747200 :             IF (idim == ${idim}$) THEN
     109             :                ! split block index offset for each normal block index
     110     1120800 :                ALLOCATE (index_split_offset_${idim}$ (SIZE(dist_d)))
     111             :                ! how many split blocks for each normal block index
     112      747200 :                ALLOCATE (blk_nsplit_${idim}$ (SIZE(dist_d)))
     113             :                ! data offset of split blocks w.r.t. corresponding normal block
     114     1120800 :                ALLOCATE (inblock_offset_${idim}$ (SIZE(blk_size_${idim}$)))
     115     4554400 :                ALLOCATE (blk_size_split_d, source=blk_size_${idim}$)
     116             :             END IF
     117             :          #:endfor
     118             : 
     119             :          ! distribution vector for split blocks
     120     1120800 :          ALLOCATE (dist_split_d(SIZE(blk_size_split_d)))
     121             : 
     122      373600 :          isplit_sum = 0 ! counting splits
     123     2905488 :          DO i = 1, SIZE(blk_size_d)
     124             :             nsplit = 0 ! number of splits for current normal block
     125             :             splitsum = 0 ! summing split block sizes for current normal block
     126     5965488 :             DO WHILE (splitsum < blk_size_d(i))
     127     3433600 :                nsplit = nsplit + 1
     128     3433600 :                isplit_sum = isplit_sum + 1
     129             :                #:for idim in range(1, maxdim+1)
     130     3433600 :                   IF (idim == ${idim}$) inblock_offset_${idim}$ (isplit_sum) = splitsum
     131             :                #:endfor
     132     3433600 :                dist_split_d(isplit_sum) = dist_d(i)
     133     3433600 :                splitsum = splitsum + blk_size_split_d(isplit_sum)
     134             :             END DO
     135     2531888 :             CPASSERT(splitsum == blk_size_d(i))
     136             :             #:for idim in range(1, maxdim+1)
     137     5437376 :                IF (idim == ${idim}$) THEN
     138     2531888 :                   blk_nsplit_${idim}$ (i) = nsplit
     139     2531888 :                   index_split_offset_${idim}$ (i) = isplit_sum - nsplit
     140             :                END IF
     141             :             #:endfor
     142             :          END DO
     143             : 
     144             :          #:for idim in range(1, maxdim+1)
     145      747200 :             IF (idim == ${idim}$) THEN
     146     4554400 :                ALLOCATE (nd_dist_split_${idim}$, source=dist_split_d)
     147     4554400 :                ALLOCATE (nd_blk_size_split_${idim}$, source=blk_size_split_d)
     148             :             END IF
     149             :          #:endfor
     150      373600 :          DEALLOCATE (dist_split_d)
     151     1268912 :          DEALLOCATE (blk_size_split_d)
     152             : 
     153             :       END DO
     154             : 
     155      148112 :       CALL dbt_get_mapping_info(tensor_in%nd_index_blk, map1_2d=map1_2d, map2_2d=map2_2d)
     156             : 
     157             :       #:for ndim in ndims
     158      296224 :          IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     159             :             CALL dbt_distribution_new_expert(dist_split, tensor_in%pgrid, map1_2d, map2_2d, &
     160      148112 :                                              ${varlist("nd_dist_split", nmax=ndim)}$)
     161             :             CALL dbt_create(tensor_out, tensor_in%name, dist_split, map1_2d, map2_2d, &
     162      148112 :                             ${varlist("nd_blk_size_split", nmax=ndim)}$)
     163             :          END IF
     164             :       #:endfor
     165             : 
     166      148112 :       CALL dbt_distribution_destroy(dist_split)
     167             : 
     168      148112 :       IF (PRESENT(nodata)) THEN
     169       73783 :          IF (nodata) THEN
     170       59609 :             CALL timestop(handle)
     171       59609 :             RETURN
     172             :          END IF
     173             :       END IF
     174             : 
     175             : !$OMP PARALLEL DEFAULT(NONE) &
     176             : !$OMP SHARED(tensor_in,tensor_out) &
     177             : !$OMP SHARED(${varlist("blk_nsplit", nmax=ndim)}$) &
     178             : !$OMP SHARED(${varlist("inblock_offset", nmax=ndim)}$) &
     179             : !$OMP SHARED(${varlist("blk_size", nmax=ndim)}$) &
     180             : !$OMP SHARED(${varlist("index_split_offset", nmax=ndim)}$) &
     181             : !$OMP PRIVATE(iter,found,bcount,blks_to_allocate,bi_split,inblock_offset) &
     182             : !$OMP PRIVATE(blk_index,blk_size,blk_offset,blk_shape) &
     183       88503 : !$OMP PRIVATE(block_2d,block_3d,block_4d)
     184             :       CALL dbt_iterator_start(iter, tensor_in)
     185             : 
     186             :       bcount = 0
     187             :       DO WHILE (dbt_iterator_blocks_left(iter))
     188             :          CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size)
     189             :          #:for ndim in ndims
     190             :             IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     191             :                #:for idim in range(1,ndim+1)
     192             :                   DO split_blk_${idim}$ = 1, blk_nsplit_${idim}$ (blk_index(${idim}$))
     193             :                      #:endfor
     194             :                      bcount = bcount + 1
     195             :                      #:for idim in range(1,ndim+1)
     196             :                         END DO
     197             :                      #:endfor
     198             :                   END IF
     199             :                #:endfor
     200             :             END DO
     201             :             CALL dbt_iterator_stop(iter)
     202             : 
     203             :             ALLOCATE (blks_to_allocate(bcount, ndims_tensor(tensor_in)))
     204             : 
     205             :             CALL dbt_iterator_start(iter, tensor_in)
     206             : 
     207             :             bcount = 0
     208             :             DO WHILE (dbt_iterator_blocks_left(iter))
     209             :                CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
     210             : 
     211             :                #:for ndim in ndims
     212             :                   IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     213             :                      #:for idim in range(1,ndim+1)
     214             :                         DO split_blk_${idim}$ = 1, blk_nsplit_${idim}$ (blk_index(${idim}$))
     215             :                            bi_split(${idim}$) = index_split_offset_${idim}$ (blk_index(${idim}$)) + split_blk_${idim}$
     216             :                            #:endfor
     217             :                            bcount = bcount + 1
     218             :                            blks_to_allocate(bcount, :) = bi_split(1:ndims_tensor(tensor_in))
     219             :                            #:for idim in range(1,ndim+1)
     220             :                               END DO
     221             :                            #:endfor
     222             :                         END IF
     223             :                      #:endfor
     224             :                   END DO
     225             : 
     226             :                   CALL dbt_iterator_stop(iter)
     227             : 
     228             :                   CALL dbt_reserve_blocks(tensor_out, blks_to_allocate)
     229             : 
     230             :                   CALL dbt_iterator_start(iter, tensor_in)
     231             : 
     232             :                   DO WHILE (dbt_iterator_blocks_left(iter))
     233             :                      CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
     234             :                      #:for ndim in ndims
     235             :                         IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     236             :                            CALL dbt_get_block(tensor_in, blk_index, block_${ndim}$d, found)
     237             :                            CPASSERT(found)
     238             :                         END IF
     239             :                      #:endfor
     240             :                      #:for ndim in ndims
     241             :                         IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     242             :                            #:for idim in range(1,ndim+1)
     243             :                               DO split_blk_${idim}$ = 1, blk_nsplit_${idim}$ (blk_index(${idim}$))
     244             :                                  ! split block index
     245             :                                  bi_split(${idim}$) = index_split_offset_${idim}$ (blk_index(${idim}$)) + split_blk_${idim}$
     246             :                                  blk_shape(${idim}$) = blk_size_${idim}$ (bi_split(${idim}$))
     247             :                                  #:endfor
     248             : 
     249             :                                  #:for idim in range(1,ndim+1)
     250             :                                     inblock_offset(${idim}$) = inblock_offset_${idim}$ (bi_split(${idim}$))
     251             :                                  #:endfor
     252             :                                  CALL dbt_put_block(tensor_out, bi_split(1:${ndim}$), &
     253             :                                                     blk_shape, &
     254             :                                                     block_${ndim}$d( &
     255             :                                       ${", ".join(["inblock_offset("+str(idim)+") + 1:inblock_offset("+str(idim)+") + blk_shape("+str(idim)+")" for idim in range(1, ndim+1)])}$))
     256             : 
     257             :                                  #:for idim in range(1,ndim+1)
     258             :                                     END DO
     259             :                                  #:endfor
     260             : 
     261             :                                  DEALLOCATE (block_${ndim}$d)
     262             :                               END IF
     263             :                            #:endfor
     264             :                         END DO
     265             :                         CALL dbt_iterator_stop(iter)
     266             : !$OMP END PARALLEL
     267             : 
     268       88503 :                         CALL dbt_finalize(tensor_out)
     269             : 
     270             :                         ! remove blocks that are exactly 0, these can occur if a cropping operation was performed before splitting
     271       88503 :                         CALL dbt_filter(tensor_out, TINY(0.0_dp))
     272             : 
     273       88503 :                         CALL timestop(handle)
     274             : 
     275      444336 :                      END SUBROUTINE
     276             : 
     277             : ! **************************************************************************************************
     278             : !> \brief Split tensor blocks into smaller blocks of maximum size PRODUCT(block_sizes).
     279             : !> \param tensor_in Input tensor
     280             : !> \param tensor_out Output tensor (split blocks)
     281             : !> \param block_sizes block sizes for each of the tensor dimensions
     282             : !> \param nodata don't copy data from tensor_in to tensor_out
     283             : !> \author Patrick Seewald
     284             : ! **************************************************************************************************
     285        4368 :                      SUBROUTINE dbt_split_blocks(tensor_in, tensor_out, block_sizes, nodata)
     286             : 
     287             :                         TYPE(dbt_type), INTENT(INOUT)               :: tensor_in
     288             :                         TYPE(dbt_type), INTENT(OUT)                 :: tensor_out
     289             :                         INTEGER, DIMENSION(ndims_tensor(tensor_in)), &
     290             :                            INTENT(IN)                                   :: block_sizes
     291             :                         LOGICAL, INTENT(IN), OPTIONAL                   :: nodata
     292             : 
     293         546 :                         INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("nd_blk_size_split")}$
     294             :                         INTEGER :: idim, i, isplit_sum, blk_remainder, nsplit, isplit
     295         546 :                         INTEGER, DIMENSION(:), ALLOCATABLE :: blk_size_d, blk_size_split_d
     296             : 
     297        2002 :                         DO idim = 1, ndims_tensor(tensor_in)
     298        1456 :                            CALL get_ith_array(tensor_in%blk_sizes, idim, blk_size_d)
     299             : 
     300        1456 :                            isplit_sum = 0
     301        3628 :                            DO i = 1, SIZE(blk_size_d)
     302        2172 :                               nsplit = (blk_size_d(i) + block_sizes(idim) - 1)/block_sizes(idim)
     303        3628 :                               isplit_sum = isplit_sum + nsplit
     304             :                            END DO
     305             : 
     306        4368 :                            ALLOCATE (blk_size_split_d(isplit_sum))
     307             : 
     308        1456 :                            isplit_sum = 0
     309        3628 :                            DO i = 1, SIZE(blk_size_d)
     310        2172 :                               nsplit = (blk_size_d(i) + block_sizes(idim) - 1)/block_sizes(idim)
     311        2172 :                               blk_remainder = blk_size_d(i)
     312        8968 :                               DO isplit = 1, nsplit
     313        5340 :                                  isplit_sum = isplit_sum + 1
     314        5340 :                                  blk_size_split_d(isplit_sum) = MIN(block_sizes(idim), blk_remainder)
     315        7512 :                                  blk_remainder = blk_remainder - block_sizes(idim)
     316             :                               END DO
     317             : 
     318             :                            END DO
     319             : 
     320             :                            #:for idim in range(1, maxdim+1)
     321        2912 :                               IF (idim == ${idim}$) THEN
     322        9708 :                                  ALLOCATE (nd_blk_size_split_${idim}$, source=blk_size_split_d)
     323             :                               END IF
     324             :                            #:endfor
     325        3458 :                            DEALLOCATE (blk_size_split_d)
     326             :                         END DO
     327             : 
     328             :                         #:for ndim in ndims
     329        1092 :                            IF (ndims_tensor(tensor_in) == ${ndim}$) CALL dbt_split_blocks_generic(tensor_in, tensor_out, &
     330             :                                                                                       ${varlist("nd_blk_size_split", nmax=ndim)}$, &
     331         546 :                                                                                                   nodata=nodata)
     332             :                         #:endfor
     333             : 
     334         546 :                      END SUBROUTINE
     335             : 
     336             : ! **************************************************************************************************
     337             : !> \brief Copy tensor with split blocks to tensor with original block sizes.
     338             : !> \param tensor_split_in tensor with smaller blocks
     339             : !> \param tensor_out original tensor
     340             : !> \author Patrick Seewald
     341             : ! **************************************************************************************************
     342       73783 :                      SUBROUTINE dbt_split_copyback(tensor_split_in, tensor_out, summation)
     343             :                         TYPE(dbt_type), INTENT(INOUT)               :: tensor_split_in
     344             :                         TYPE(dbt_type), INTENT(INOUT)               :: tensor_out
     345             :                         LOGICAL, INTENT(IN), OPTIONAL                   :: summation
     346       73783 :                         INTEGER, DIMENSION(:), ALLOCATABLE              :: first_split_d, last_split_d
     347       73783 :                         INTEGER, DIMENSION(:), ALLOCATABLE              :: blk_size_split_d, blk_size_d
     348       73783 :                         INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("last_split")}$, &
     349       73783 :                                                                            ${varlist("first_split")}$, &
     350       73783 :                                                                            ${varlist("split")}$
     351       73783 :                      INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("inblock_offset")}$, ${varlist("blk_size_split")}$
     352       73783 :                         INTEGER, DIMENSION(:, :), ALLOCATABLE            :: blks_to_allocate
     353             :                         INTEGER                                         :: idim, iblk, bcount
     354             :                         INTEGER                                         :: ${varlist("iblk")}$, isplit_sum, splitsum
     355             :                         TYPE(dbt_iterator_type)                     :: iter
     356       73783 :                         INTEGER, DIMENSION(ndims_tensor(tensor_out)) :: blk_index, blk_size, blk_offset, blk_shape, blk_index_n
     357             :                         LOGICAL                                         :: found
     358             : 
     359             :                         INTEGER, DIMENSION(${maxdim}$)                  :: inblock_offset
     360             :                         INTEGER                                            :: handle
     361             :                         CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_split_copyback'
     362             :                         #:for ndim in ndims
     363       73783 :                            REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_${ndim}$d
     364       73783 :                            REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_split_${ndim}$d
     365             :                         #:endfor
     366             : 
     367       73783 :                         CALL timeset(routineN, handle)
     368       73783 :                         CPASSERT(tensor_out%valid)
     369       73783 :                         IF (PRESENT(summation)) THEN
     370       14174 :                            IF (.NOT. summation) CALL dbt_clear(tensor_out)
     371             :                         ELSE
     372       59609 :                            CALL dbt_clear(tensor_out)
     373             :                         END IF
     374             : 
     375      259855 :                         DO idim = 1, ndims_tensor(tensor_split_in)
     376      186072 :                            CALL get_ith_array(tensor_split_in%blk_sizes, idim, blk_size_split_d)
     377      186072 :                            CALL get_ith_array(tensor_out%blk_sizes, idim, blk_size_d)
     378             : 
     379             :                            #:for idim in range(1, maxdim+1)
     380      372144 :                               IF (idim == ${idim}$) THEN
     381             :                                  ! data offset of split blocks w.r.t. corresponding normal block
     382      558216 :                                  ALLOCATE (inblock_offset_${idim}$ (SIZE(blk_size_split_d)))
     383             :                                  ! normal block index for each split block
     384      372144 :                                  ALLOCATE (split_${idim}$ (SIZE(blk_size_split_d)))
     385             :                               END IF
     386             :                            #:endfor
     387             : 
     388      558216 :                            ALLOCATE (last_split_d(SIZE(blk_size_d)))
     389      372144 :                            ALLOCATE (first_split_d(SIZE(blk_size_d)))
     390      186072 :                            first_split_d(1) = 1
     391      186072 :                            isplit_sum = 0
     392     1589152 :                            DO iblk = 1, SIZE(blk_size_d)
     393     1403080 :                               splitsum = 0
     394     1403080 :                               IF (iblk .GT. 1) first_split_d(iblk) = last_split_d(iblk - 1) + 1
     395     3117210 :                               DO WHILE (splitsum < blk_size_d(iblk))
     396     1714130 :                                  isplit_sum = isplit_sum + 1
     397             :                                  #:for idim in range(1, maxdim+1)
     398     3428260 :                                     IF (idim == ${idim}$) THEN
     399     1714130 :                                        inblock_offset_${idim}$ (isplit_sum) = splitsum
     400     1714130 :                                        split_${idim}$ (isplit_sum) = iblk
     401             :                                     END IF
     402             :                                  #:endfor
     403     1714130 :                                  splitsum = splitsum + blk_size_split_d(isplit_sum)
     404             :                               END DO
     405     1403080 :                               CPASSERT(splitsum == blk_size_d(iblk))
     406     1589152 :                               last_split_d(iblk) = isplit_sum
     407             :                            END DO
     408             :                            #:for idim in range(1, maxdim+1)
     409      372144 :                               IF (idim == ${idim}$) THEN
     410     1961296 :                                  ALLOCATE (first_split_${idim}$, source=first_split_d)
     411     1775224 :                                  ALLOCATE (last_split_${idim}$, source=last_split_d)
     412     2272346 :                                  ALLOCATE (blk_size_split_${idim}$, source=blk_size_split_d)
     413             :                               END IF
     414             :                            #:endfor
     415      186072 :                            DEALLOCATE (first_split_d, last_split_d)
     416      631999 :                            DEALLOCATE (blk_size_split_d, blk_size_d)
     417             :                         END DO
     418             : 
     419             : !$OMP PARALLEL DEFAULT(NONE) &
     420             : !$OMP SHARED(tensor_split_in,tensor_out,summation) &
     421             : !$OMP SHARED(${varlist("split", nmax=ndim)}$) &
     422             : !$OMP SHARED(${varlist("first_split", nmax=ndim)}$) &
     423             : !$OMP SHARED(${varlist("last_split", nmax=ndim)}$) &
     424             : !$OMP SHARED(${varlist("inblock_offset", nmax=ndim)}$) &
     425             : !$OMP PRIVATE(iter,blks_to_allocate,bcount,blk_index_n) &
     426             : !$OMP PRIVATE(blk_index,blk_size,blk_shape,blk_offset,inblock_offset,found) &
     427       73783 : !$OMP PRIVATE(block_2d,block_3d,block_4d,block_split_2d,block_split_3d,block_split_4d)
     428             :                         CALL dbt_iterator_start(iter, tensor_split_in)
     429             :                         ALLOCATE (blks_to_allocate(dbt_iterator_num_blocks(iter), ndims_tensor(tensor_split_in)))
     430             :                         bcount = 0
     431             :                         DO WHILE (dbt_iterator_blocks_left(iter))
     432             :                            CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size)
     433             :                            #:for ndim in ndims
     434             :                               IF (ndims_tensor(tensor_out) == ${ndim}$) THEN
     435             :                                  #:for idim in range(1,ndim+1)
     436             :                                     blk_index_n(${idim}$) = split_${idim}$ (blk_index(${idim}$))
     437             :                                  #:endfor
     438             :                               END IF
     439             :                            #:endfor
     440             :                            blks_to_allocate(bcount + 1, :) = blk_index_n
     441             :                            bcount = bcount + 1
     442             :                         END DO
     443             :                         CALL dbt_iterator_stop(iter)
     444             :                         CALL dbt_reserve_blocks(tensor_out, blks_to_allocate)
     445             : 
     446             :                         CALL dbt_iterator_start(iter, tensor_out)
     447             :                         DO WHILE (dbt_iterator_blocks_left(iter))
     448             :                            CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
     449             : 
     450             :                            #:for ndim in ndims
     451             :                               IF (ndims_tensor(tensor_out) == ${ndim}$) THEN
     452             :                                  CALL allocate_any(block_${ndim}$d, blk_size)
     453             :                                  block_${ndim}$d = 0.0_dp
     454             :                                  #:for idim in range(1,ndim+1)
     455             :                             DO iblk_${idim}$ = first_split_${idim}$ (blk_index(${idim}$)), last_split_${idim}$ (blk_index(${idim}$))
     456             :                                        #:endfor
     457             :                                        #:for idim in range(1,ndim+1)
     458             :                                           inblock_offset(${idim}$) = inblock_offset_${idim}$ (iblk_${idim}$)
     459             :                                        #:endfor
     460             : 
     461             :                             CALL dbt_get_block(tensor_split_in, [${", ".join(["iblk_"+str(idim) for idim in range(1, ndim+1)])}$], &
     462             :                                                           block_split_${ndim}$d, found)
     463             :                                        IF (found) THEN
     464             :                                           blk_shape(1:${ndim}$) = SHAPE(block_split_${ndim}$d)
     465             :                                           block_${ndim}$d( &
     466             :                      ${", ".join(["inblock_offset("+str(idim)+") + 1:inblock_offset("+str(idim)+") + blk_shape("+str(idim)+")" for idim in range(1, ndim+1)])}$) = &
     467             :                                              block_split_${ndim}$d
     468             :                                        END IF
     469             : 
     470             :                                        #:for idim in range(1,ndim+1)
     471             :                                           END DO
     472             :                                        #:endfor
     473             :                                        CALL dbt_put_block(tensor_out, blk_index, blk_size, block_${ndim}$d, summation=summation)
     474             :                                        DEALLOCATE (block_${ndim}$d)
     475             :                                     END IF
     476             :                                  #:endfor
     477             :                               END DO
     478             :                               CALL dbt_iterator_stop(iter)
     479             : !$OMP END PARALLEL
     480             : 
     481       73783 :                               CALL timestop(handle)
     482             : 
     483      147566 :                            END SUBROUTINE
     484             : 
     485             : ! **************************************************************************************************
     486             : !> \brief split two tensors with same total sizes but different block sizes such that they have
     487             : !>        equal block sizes
     488             : !> \param move_data memory optimization: transfer data s.t. tensor1 and tensor2 may be empty on return
     489             : !> \param tensor1_split tensor 1 with split blocks
     490             : !> \param tensor2_split tensor 2 with split blocks
     491             : !> \param nodata1 don't copy data of tensor 1
     492             : !> \param nodata2 don't copy data of tensor 2
     493             : !> \param
     494             : !> \param
     495             : !> \param
     496             : !> \param
     497             : !> \author Patrick Seewald
     498             : ! **************************************************************************************************
     499     1106745 :                            SUBROUTINE dbt_make_compatible_blocks(tensor1, tensor2, tensor1_split, tensor2_split, &
     500       18482 :                                                                  order, nodata1, nodata2, move_data)
     501             :                               TYPE(dbt_type), INTENT(INOUT) :: tensor1, tensor2
     502             :                               TYPE(dbt_type), INTENT(OUT)   :: tensor1_split, tensor2_split
     503             :                               INTEGER, DIMENSION(ndims_tensor(tensor1)), &
     504             :                                  INTENT(IN), OPTIONAL                        :: order
     505             :                               LOGICAL, INTENT(IN), OPTIONAL     :: nodata1, nodata2, move_data
     506       73783 :                           INTEGER, DIMENSION(:), ALLOCATABLE  :: ${varlist("blk_size_split_1")}$, ${varlist("blk_size_split_2")}$, &
     507       73783 :                                                                      blk_size_d_1, blk_size_d_2, blk_size_d_split
     508             :                               INTEGER :: size_sum_1, size_sum_2, size_sum, bind_1, bind_2, isplit, bs, idim, i
     509             :                               LOGICAL :: move_prv, nodata1_prv, nodata2_prv
     510       73783 :                               INTEGER, DIMENSION(ndims_tensor(tensor1)) :: order_prv
     511             : 
     512       73783 :                               IF (PRESENT(move_data)) THEN
     513       73783 :                                  move_prv = move_data
     514             :                               ELSE
     515             :                                  move_prv = .FALSE.
     516             :                               END IF
     517             : 
     518       73783 :                               IF (PRESENT(nodata1)) THEN
     519           0 :                                  nodata1_prv = nodata1
     520             :                               ELSE
     521             :                                  nodata1_prv = .FALSE.
     522             :                               END IF
     523       73783 :                               IF (PRESENT(nodata2)) THEN
     524       73783 :                                  nodata2_prv = nodata2
     525             :                               ELSE
     526             :                                  nodata2_prv = .FALSE.
     527             :                               END IF
     528             : 
     529       73783 :                               IF (PRESENT(order)) THEN
     530       18482 :                                  order_prv(:) = dbt_inverse_order(order)
     531             :                               ELSE
     532      326177 :                                  order_prv(:) = (/(i, i=1, ndims_tensor(tensor1))/)
     533             :                               END IF
     534             : 
     535      259855 :                               DO idim = 1, ndims_tensor(tensor2)
     536      186072 :                                  CALL get_ith_array(tensor1%blk_sizes, order_prv(idim), blk_size_d_1)
     537      186072 :                                  CALL get_ith_array(tensor2%blk_sizes, idim, blk_size_d_2)
     538      558216 :                                  ALLOCATE (blk_size_d_split(SIZE(blk_size_d_1) + SIZE(blk_size_d_2)))
     539             :                                  size_sum_1 = 0
     540             :                                  size_sum_2 = 0
     541             :                                  size_sum = 0
     542     1900202 :                                  bind_1 = 0
     543     1900202 :                                  bind_2 = 0
     544     1900202 :                                  isplit = 0
     545             : 
     546     1900202 :                                  DO WHILE (bind_1 < SIZE(blk_size_d_1) .AND. bind_2 < SIZE(blk_size_d_2))
     547     1900202 :                                     IF (blk_size_d_1(bind_1 + 1) < blk_size_d_2(bind_2 + 1)) THEN
     548      311050 :                                        bind_1 = bind_1 + 1
     549      311050 :                                        bs = blk_size_d_1(bind_1)
     550      311050 :                                        blk_size_d_2(bind_2 + 1) = blk_size_d_2(bind_2 + 1) - bs
     551             :                                        size_sum = size_sum + bs
     552      311050 :                                        isplit = isplit + 1
     553      311050 :                                        blk_size_d_split(isplit) = bs
     554     1403080 :                                     ELSEIF (blk_size_d_1(bind_1 + 1) > blk_size_d_2(bind_2 + 1)) THEN
     555      587494 :                                        bind_2 = bind_2 + 1
     556      587494 :                                        bs = blk_size_d_2(bind_2)
     557      587494 :                                        blk_size_d_1(bind_1 + 1) = blk_size_d_1(bind_1 + 1) - bs
     558             :                                        size_sum = size_sum + bs
     559      587494 :                                        isplit = isplit + 1
     560      587494 :                                        blk_size_d_split(isplit) = bs
     561             :                                     ELSE
     562      815586 :                                        bind_1 = bind_1 + 1
     563      815586 :                                        bind_2 = bind_2 + 1
     564      815586 :                                        bs = blk_size_d_1(bind_1)
     565             :                                        size_sum = size_sum + bs
     566      815586 :                                        isplit = isplit + 1
     567      815586 :                                        blk_size_d_split(isplit) = bs
     568             :                                     END IF
     569             :                                  END DO
     570             : 
     571      186072 :                                  IF (bind_1 < SIZE(blk_size_d_1)) THEN
     572           0 :                                     bind_1 = bind_1 + 1
     573           0 :                                     bs = blk_size_d_1(bind_1)
     574             :                                     size_sum = size_sum + bs
     575           0 :                                     isplit = isplit + 1
     576           0 :                                     blk_size_d_split(isplit) = bs
     577             :                                  END IF
     578             : 
     579      186072 :                                  IF (bind_2 < SIZE(blk_size_d_2)) THEN
     580           0 :                                     bind_2 = bind_2 + 1
     581           0 :                                     bs = blk_size_d_2(bind_2)
     582             :                                     size_sum = size_sum + bs
     583           0 :                                     isplit = isplit + 1
     584           0 :                                     blk_size_d_split(isplit) = bs
     585             :                                  END IF
     586             : 
     587             :                                  #:for idim in range(1, maxdim+1)
     588      372144 :                                     IF (order_prv(idim) == ${idim}$) THEN
     589     2272346 :                                        ALLOCATE (blk_size_split_1_${idim}$, source=blk_size_d_split(:isplit))
     590             :                                     END IF
     591             :                                  #:endfor
     592             : 
     593             :                                  #:for idim in range(1, maxdim+1)
     594      372144 :                                     IF (idim == ${idim}$) THEN
     595     2272346 :                                        ALLOCATE (blk_size_split_2_${idim}$, source=blk_size_d_split(:isplit))
     596             :                                     END IF
     597             :                                  #:endfor
     598             : 
     599      631999 :                                  DEALLOCATE (blk_size_d_split, blk_size_d_1, blk_size_d_2)
     600             :                               END DO
     601             : 
     602             :                               #:for ndim in ndims
     603      147566 :                                  IF (ndims_tensor(tensor1) == ${ndim}$) THEN
     604       73783 :                    CALL dbt_split_blocks_generic(tensor1, tensor1_split, ${varlist("blk_size_split_1", nmax=ndim)}$, nodata=nodata1)
     605       73783 :                                     IF (move_prv .AND. .NOT. nodata1_prv) CALL dbt_clear(tensor1)
     606       73783 :                    CALL dbt_split_blocks_generic(tensor2, tensor2_split, ${varlist("blk_size_split_2", nmax=ndim)}$, nodata=nodata2)
     607       73783 :                                     IF (move_prv .AND. .NOT. nodata2_prv) CALL dbt_clear(tensor2)
     608             :                                  END IF
     609             :                               #:endfor
     610             : 
     611      147566 :                            END SUBROUTINE
     612             : 
     613             : ! **************************************************************************************************
     614             : !> \author Patrick Seewald
     615             : ! **************************************************************************************************
     616     1124624 :                            SUBROUTINE dbt_crop(tensor_in, tensor_out, bounds, move_data)
     617             :                               TYPE(dbt_type), INTENT(INOUT) :: tensor_in
     618             :                               TYPE(dbt_type), INTENT(OUT) :: tensor_out
     619             :                               INTEGER, DIMENSION(2, ndims_tensor(tensor_in)), INTENT(IN) :: bounds
     620             :                               LOGICAL, INTENT(IN), OPTIONAL :: move_data
     621             : 
     622             :                               CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_crop'
     623             : 
     624      140578 :                               INTEGER, DIMENSION(2, ndims_tensor(tensor_in)) :: blk_bounds
     625             :                               TYPE(dbt_iterator_type)                     :: iter
     626      140578 :                               INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: blk_index, blk_size, blk_offset
     627             :                               LOGICAL :: found, move_data_prv
     628             :                               INTEGER :: handle, idim, iblk_out
     629      140578 :                               INTEGER, DIMENSION(:, :), ALLOCATABLE :: blk_ind_out
     630             :                               #:for ndim in ndims
     631      140578 :                                  REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_${ndim}$d, block_put_${ndim}$d
     632             :                               #:endfor
     633             : 
     634      140578 :                               CALL timeset(routineN, handle)
     635             : 
     636      140578 :                               IF (PRESENT(move_data)) THEN
     637      140578 :                                  move_data_prv = move_data
     638             :                               ELSE
     639             :                                  move_data_prv = .FALSE.
     640             :                               END IF
     641             : 
     642      140578 :                               CALL dbt_create(tensor_in, tensor_out)
     643             : 
     644             : !$OMP PARALLEL DEFAULT(NONE) SHARED(tensor_in,tensor_out,bounds) &
     645             : !$OMP PRIVATE(iter,blk_ind_out,iblk_out,blk_index,blk_size,blk_offset,found,blk_bounds) &
     646      140578 : !$OMP PRIVATE(block_2d,block_put_2d,block_3d,block_put_3d,block_4d,block_put_4d)
     647             : 
     648             :                               ! reserve blocks inside bounds
     649             :                               CALL dbt_iterator_start(iter, tensor_in)
     650             :                               ALLOCATE (blk_ind_out(dbt_iterator_num_blocks(iter), ndims_tensor(tensor_in)))
     651             :                               blk_ind_out(:, :) = 0
     652             :                               iblk_out = 0
     653             :                               blk_loop: DO WHILE (dbt_iterator_blocks_left(iter))
     654             :                                  CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
     655             :                                  DO idim = 1, ndims_tensor(tensor_in)
     656             :                                     IF (bounds(1, idim) > blk_offset(idim) - 1 + blk_size(idim)) CYCLE blk_loop
     657             :                                     IF (bounds(2, idim) < blk_offset(idim)) CYCLE blk_loop
     658             :                                  END DO
     659             :                                  iblk_out = iblk_out + 1
     660             :                                  blk_ind_out(iblk_out, :) = blk_index
     661             :                               END DO blk_loop
     662             :                               CALL dbt_iterator_stop(iter)
     663             : 
     664             :                               CALL dbt_reserve_blocks(tensor_out, blk_ind_out(1:iblk_out, :))
     665             :                               DEALLOCATE (blk_ind_out)
     666             : 
     667             :                               ! copy blocks
     668             :                               CALL dbt_iterator_start(iter, tensor_out)
     669             :                               iter_loop: DO WHILE (dbt_iterator_blocks_left(iter))
     670             :                                  CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
     671             : 
     672             :                                  DO idim = 1, ndims_tensor(tensor_in)
     673             :                                     blk_bounds(1, idim) = MAX(bounds(1, idim) - blk_offset(idim) + 1, 1)
     674             :                                     blk_bounds(2, idim) = MIN(bounds(2, idim) - blk_offset(idim) + 1, blk_size(idim))
     675             :                                  END DO
     676             : 
     677             :                                  #:for ndim in ndims
     678             :                                     IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     679             :                                        CALL dbt_get_block(tensor_in, blk_index, block_${ndim}$d, found)
     680             : 
     681             :                              ALLOCATE (block_put_${ndim}$d(${", ".join(["blk_size("+str(idim)+")" for idim in range(1, ndim+1)])}$))
     682             :                                        block_put_${ndim}$d = 0.0_dp
     683             :    block_put_${ndim}$d(${", ".join(["blk_bounds(1, "+str(idim)+"):blk_bounds(2,"+str(idim)+")" for idim in range(1, ndim+1)])}$) = &
     684             :            block_${ndim}$d(${", ".join(["blk_bounds(1, "+str(idim)+"):blk_bounds(2,"+str(idim)+")" for idim in range(1, ndim+1)])}$)
     685             :                                        CALL dbt_put_block(tensor_out, blk_index, blk_size, block_put_${ndim}$d)
     686             :                                        DEALLOCATE (block_${ndim}$d)
     687             :                                        DEALLOCATE (block_put_${ndim}$d)
     688             :                                     END IF
     689             :                                  #:endfor
     690             :                               END DO iter_loop
     691             :                               CALL dbt_iterator_stop(iter)
     692             : !$OMP END PARALLEL
     693      140578 :                               CALL dbt_finalize(tensor_out)
     694             : 
     695      140578 :                               IF (move_data_prv) CALL dbt_clear(tensor_in)
     696             : 
     697             :                               ! transfer data for batched contraction
     698      140578 :                               CALL dbt_copy_contraction_storage(tensor_in, tensor_out)
     699             : 
     700      140578 :                               CALL timestop(handle)
     701      281156 :                            END SUBROUTINE
     702             : 
     703      148112 :                         END MODULE

Generated by: LCOV version 1.15