LCOV - code coverage report
Current view: top level - src/dbt - dbt_split.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 95.5 % 202 193
Test Date: 2025-07-25 12:55:17 Functions: 100.0 % 5 5

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Routines to split blocks and to convert between tensors with different block sizes.
      10              : !> \author Patrick Seewald
      11              : ! **************************************************************************************************
      12              : MODULE dbt_split
      13              : 
      14              :    #:include "dbt_macros.fypp"
      15              :    #:set maxdim = maxrank
      16              :    #:set ndims = range(2,maxdim+1)
      17              : 
      18              :    USE dbt_allocate_wrap, ONLY: allocate_any
      19              :    USE dbt_array_list_methods, ONLY: get_ith_array
      20              :    USE dbt_block, ONLY: dbt_iterator_type, &
      21              :                         dbt_get_block, &
      22              :                         dbt_put_block, &
      23              :                         dbt_iterator_start, &
      24              :                         dbt_iterator_num_blocks, &
      25              :                         dbt_iterator_blocks_left, &
      26              :                         dbt_iterator_stop, &
      27              :                         dbt_iterator_next_block, &
      28              :                         dbt_reserve_blocks
      29              :    USE dbt_index, ONLY: dbt_get_mapping_info, &
      30              :                         dbt_inverse_order
      31              :    USE dbt_types, ONLY: dbt_create, &
      32              :                         dbt_type, &
      33              :                         ndims_tensor, &
      34              :                         dbt_distribution_type, &
      35              :                         dbt_distribution, &
      36              :                         dbt_distribution_destroy, &
      37              :                         dbt_distribution_new_expert, &
      38              :                         dbt_clear, &
      39              :                         dbt_finalize, &
      40              :                         dbt_get_num_blocks, &
      41              :                         dbt_blk_offsets, &
      42              :                         dbt_blk_sizes, &
      43              :                         ndims_matrix_row, &
      44              :                         ndims_matrix_column, &
      45              :                         dbt_filter, &
      46              :                         dbt_copy_contraction_storage
      47              :    USE kinds, ONLY: dp, dp
      48              : 
      49              : #include "../base/base_uses.f90"
      50              :    IMPLICIT NONE
      51              :    PRIVATE
      52              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'dbt_split'
      53              :    PUBLIC :: &
      54              :       dbt_make_compatible_blocks, &
      55              :       dbt_split_blocks, &
      56              :       dbt_split_blocks_generic, &
      57              :       dbt_split_copyback, &
      58              :       dbt_crop
      59              : 
      60              : CONTAINS
      61              : 
      62              : ! **************************************************************************************************
      63              : !> \brief Split tensor blocks into smaller blocks
      64              : !> \param tensor_in Input tensor
      65              : !> \param tensor_out Output tensor (splitted blocks)
      66              : !> \param blk_size_i block sizes for each of the tensor dimensions
      67              : !> \param nodata don't copy data from tensor_in to tensor_out
      68              : !> \author Patrick Seewald
      69              : ! **************************************************************************************************
      70      1341216 :    SUBROUTINE dbt_split_blocks_generic(tensor_in, tensor_out, ${varlist("blk_size")}$, nodata)
      71              :       TYPE(dbt_type), INTENT(INOUT)               :: tensor_in
      72              :       TYPE(dbt_type), INTENT(OUT)                 :: tensor_out
      73              :       INTEGER, DIMENSION(:), INTENT(IN), OPTIONAL     :: ${varlist("blk_size")}$
      74              :       LOGICAL, INTENT(IN), OPTIONAL                   :: nodata
      75              : 
      76      1937312 :       TYPE(dbt_distribution_type)                 :: dist_old, dist_split
      77              :       TYPE(dbt_iterator_type)                     :: iter
      78       149024 :       INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("nd_dist_split")}$
      79       149024 :       INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("nd_blk_size_split")}$
      80       149024 :       INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("index_split_offset")}$
      81       149024 :       INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("inblock_offset")}$
      82       149024 :       INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("blk_nsplit")}$
      83              :       INTEGER                                         :: ${varlist("split_blk")}$
      84              :       INTEGER :: idim, i, isplit_sum, nsplit, handle, splitsum, bcount
      85       149024 :       INTEGER, DIMENSION(:, :), ALLOCATABLE           :: blks_to_allocate
      86       298048 :       INTEGER, DIMENSION(:), ALLOCATABLE :: dist_d, blk_size_d, blk_size_split_d, dist_split_d
      87       298048 :       INTEGER, DIMENSION(ndims_matrix_row(tensor_in)) :: map1_2d
      88       298048 :       INTEGER, DIMENSION(ndims_matrix_column(tensor_in)) :: map2_2d
      89       149024 :       INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: blk_index, blk_size, blk_offset, &
      90       149024 :                                                      blk_shape
      91              :       INTEGER, DIMENSION(${maxdim}$) :: bi_split, inblock_offset
      92              :       LOGICAL :: found
      93              : 
      94              :       #:for ndim in ndims
      95       149024 :          REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_${ndim}$d
      96              :       #:endfor
      97              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_split_blocks_generic'
      98              : 
      99       149024 :       CALL timeset(routineN, handle)
     100              : 
     101       149024 :       dist_old = dbt_distribution(tensor_in)
     102              : 
     103       521868 :       DO idim = 1, ndims_tensor(tensor_in)
     104       372844 :          CALL get_ith_array(dist_old%nd_dist, idim, dist_d)
     105       372844 :          CALL get_ith_array(tensor_in%blk_sizes, idim, blk_size_d)
     106              : 
     107              :          #:for idim in range(1, maxdim+1)
     108       745688 :             IF (idim == ${idim}$) THEN
     109              :                ! split block index offset for each normal block index
     110      1118532 :                ALLOCATE (index_split_offset_${idim}$ (SIZE(dist_d)))
     111              :                ! how many split blocks for each normal block index
     112       745688 :                ALLOCATE (blk_nsplit_${idim}$ (SIZE(dist_d)))
     113              :                ! data offset of split blocks w.r.t. corresponding normal block
     114      1118532 :                ALLOCATE (inblock_offset_${idim}$ (SIZE(blk_size_${idim}$)))
     115      4779332 :                ALLOCATE (blk_size_split_d, source=blk_size_${idim}$)
     116              :             END IF
     117              :          #:endfor
     118              : 
     119              :          ! distribution vector for split blocks
     120      1118532 :          ALLOCATE (dist_split_d(SIZE(blk_size_split_d)))
     121              : 
     122       372844 :          isplit_sum = 0 ! counting splits
     123      3111740 :          DO i = 1, SIZE(blk_size_d)
     124              :             nsplit = 0 ! number of splits for current normal block
     125              :             splitsum = 0 ! summing split block sizes for current normal block
     126      6399696 :             DO WHILE (splitsum < blk_size_d(i))
     127      3660800 :                nsplit = nsplit + 1
     128      3660800 :                isplit_sum = isplit_sum + 1
     129              :                #:for idim in range(1, maxdim+1)
     130      3660800 :                   IF (idim == ${idim}$) inblock_offset_${idim}$ (isplit_sum) = splitsum
     131              :                #:endfor
     132      3660800 :                dist_split_d(isplit_sum) = dist_d(i)
     133      3660800 :                splitsum = splitsum + blk_size_split_d(isplit_sum)
     134              :             END DO
     135      2738896 :             CPASSERT(splitsum == blk_size_d(i))
     136              :             #:for idim in range(1, maxdim+1)
     137      5850636 :                IF (idim == ${idim}$) THEN
     138      2738896 :                   blk_nsplit_${idim}$ (i) = nsplit
     139      2738896 :                   index_split_offset_${idim}$ (i) = isplit_sum - nsplit
     140              :                END IF
     141              :             #:endfor
     142              :          END DO
     143              : 
     144              :          #:for idim in range(1, maxdim+1)
     145       745688 :             IF (idim == ${idim}$) THEN
     146      4779332 :                ALLOCATE (nd_dist_split_${idim}$, source=dist_split_d)
     147      4779332 :                ALLOCATE (nd_blk_size_split_${idim}$, source=blk_size_split_d)
     148              :             END IF
     149              :          #:endfor
     150       372844 :          DEALLOCATE (dist_split_d)
     151      1267556 :          DEALLOCATE (blk_size_split_d)
     152              : 
     153              :       END DO
     154              : 
     155       149024 :       CALL dbt_get_mapping_info(tensor_in%nd_index_blk, map1_2d=map1_2d, map2_2d=map2_2d)
     156              : 
     157              :       #:for ndim in ndims
     158       298048 :          IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     159              :             CALL dbt_distribution_new_expert(dist_split, tensor_in%pgrid, map1_2d, map2_2d, &
     160       149024 :                                              ${varlist("nd_dist_split", nmax=ndim)}$)
     161              :             CALL dbt_create(tensor_out, tensor_in%name, dist_split, map1_2d, map2_2d, &
     162       149024 :                             ${varlist("nd_blk_size_split", nmax=ndim)}$)
     163              :          END IF
     164              :       #:endfor
     165              : 
     166       149024 :       CALL dbt_distribution_destroy(dist_split)
     167              : 
     168       149024 :       IF (PRESENT(nodata)) THEN
     169        74065 :          IF (nodata) THEN
     170        59509 :             CALL timestop(handle)
     171        59509 :             RETURN
     172              :          END IF
     173              :       END IF
     174              : 
     175              : !$OMP PARALLEL DEFAULT(NONE) &
     176              : !$OMP SHARED(tensor_in,tensor_out) &
     177              : !$OMP SHARED(${varlist("blk_nsplit", nmax=ndim)}$) &
     178              : !$OMP SHARED(${varlist("inblock_offset", nmax=ndim)}$) &
     179              : !$OMP SHARED(${varlist("blk_size", nmax=ndim)}$) &
     180              : !$OMP SHARED(${varlist("index_split_offset", nmax=ndim)}$) &
     181              : !$OMP PRIVATE(iter,found,bcount,blks_to_allocate,bi_split,inblock_offset) &
     182              : !$OMP PRIVATE(blk_index,blk_size,blk_offset,blk_shape) &
     183        89515 : !$OMP PRIVATE(block_2d,block_3d,block_4d)
     184              :       CALL dbt_iterator_start(iter, tensor_in)
     185              : 
     186              :       bcount = 0
     187              :       DO WHILE (dbt_iterator_blocks_left(iter))
     188              :          CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size)
     189              :          #:for ndim in ndims
     190              :             IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     191              :                #:for idim in range(1,ndim+1)
     192              :                   DO split_blk_${idim}$ = 1, blk_nsplit_${idim}$ (blk_index(${idim}$))
     193              :                      #:endfor
     194              :                      bcount = bcount + 1
     195              :                      #:for idim in range(1,ndim+1)
     196              :                         END DO
     197              :                      #:endfor
     198              :                   END IF
     199              :                #:endfor
     200              :             END DO
     201              :             CALL dbt_iterator_stop(iter)
     202              : 
     203              :             ALLOCATE (blks_to_allocate(bcount, ndims_tensor(tensor_in)))
     204              : 
     205              :             CALL dbt_iterator_start(iter, tensor_in)
     206              : 
     207              :             bcount = 0
     208              :             DO WHILE (dbt_iterator_blocks_left(iter))
     209              :                CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
     210              : 
     211              :                #:for ndim in ndims
     212              :                   IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     213              :                      #:for idim in range(1,ndim+1)
     214              :                         DO split_blk_${idim}$ = 1, blk_nsplit_${idim}$ (blk_index(${idim}$))
     215              :                            bi_split(${idim}$) = index_split_offset_${idim}$ (blk_index(${idim}$)) + split_blk_${idim}$
     216              :                            #:endfor
     217              :                            bcount = bcount + 1
     218              :                            blks_to_allocate(bcount, :) = bi_split(1:ndims_tensor(tensor_in))
     219              :                            #:for idim in range(1,ndim+1)
     220              :                               END DO
     221              :                            #:endfor
     222              :                         END IF
     223              :                      #:endfor
     224              :                   END DO
     225              : 
     226              :                   CALL dbt_iterator_stop(iter)
     227              : 
     228              :                   CALL dbt_reserve_blocks(tensor_out, blks_to_allocate)
     229              : 
     230              :                   CALL dbt_iterator_start(iter, tensor_in)
     231              : 
     232              :                   DO WHILE (dbt_iterator_blocks_left(iter))
     233              :                      CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
     234              :                      #:for ndim in ndims
     235              :                         IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     236              :                            CALL dbt_get_block(tensor_in, blk_index, block_${ndim}$d, found)
     237              :                            CPASSERT(found)
     238              :                         END IF
     239              :                      #:endfor
     240              :                      #:for ndim in ndims
     241              :                         IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     242              :                            #:for idim in range(1,ndim+1)
     243              :                               DO split_blk_${idim}$ = 1, blk_nsplit_${idim}$ (blk_index(${idim}$))
     244              :                                  ! split block index
     245              :                                  bi_split(${idim}$) = index_split_offset_${idim}$ (blk_index(${idim}$)) + split_blk_${idim}$
     246              :                                  blk_shape(${idim}$) = blk_size_${idim}$ (bi_split(${idim}$))
     247              :                                  #:endfor
     248              : 
     249              :                                  #:for idim in range(1,ndim+1)
     250              :                                     inblock_offset(${idim}$) = inblock_offset_${idim}$ (bi_split(${idim}$))
     251              :                                  #:endfor
     252              :                                  CALL dbt_put_block(tensor_out, bi_split(1:${ndim}$), &
     253              :                                                     blk_shape, &
     254              :                                                     block_${ndim}$d( &
     255              :                                       ${", ".join(["inblock_offset("+str(idim)+") + 1:inblock_offset("+str(idim)+") + blk_shape("+str(idim)+")" for idim in range(1, ndim+1)])}$))
     256              : 
     257              :                                  #:for idim in range(1,ndim+1)
     258              :                                     END DO
     259              :                                  #:endfor
     260              : 
     261              :                                  DEALLOCATE (block_${ndim}$d)
     262              :                               END IF
     263              :                            #:endfor
     264              :                         END DO
     265              :                         CALL dbt_iterator_stop(iter)
     266              : !$OMP END PARALLEL
     267              : 
     268        89515 :                         CALL dbt_finalize(tensor_out)
     269              : 
     270              :                         ! remove blocks that are exactly 0, these can occur if a cropping operation was performed before splitting
     271        89515 :                         CALL dbt_filter(tensor_out, TINY(0.0_dp))
     272              : 
     273        89515 :                         CALL timestop(handle)
     274              : 
     275       447072 :                      END SUBROUTINE
     276              : 
     277              : ! **************************************************************************************************
     278              : !> \brief Split tensor blocks into smaller blocks of maximum size PRODUCT(block_sizes).
     279              : !> \param tensor_in Input tensor
     280              : !> \param tensor_out Output tensor (split blocks)
     281              : !> \param block_sizes block sizes for each of the tensor dimensions
     282              : !> \param nodata don't copy data from tensor_in to tensor_out
     283              : !> \author Patrick Seewald
     284              : ! **************************************************************************************************
     285         7152 :                      SUBROUTINE dbt_split_blocks(tensor_in, tensor_out, block_sizes, nodata)
     286              : 
     287              :                         TYPE(dbt_type), INTENT(INOUT)               :: tensor_in
     288              :                         TYPE(dbt_type), INTENT(OUT)                 :: tensor_out
     289              :                         INTEGER, DIMENSION(ndims_tensor(tensor_in)), &
     290              :                            INTENT(IN)                                   :: block_sizes
     291              :                         LOGICAL, INTENT(IN), OPTIONAL                   :: nodata
     292              : 
     293          894 :                         INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("nd_blk_size_split")}$
     294              :                         INTEGER :: idim, i, isplit_sum, blk_remainder, nsplit, isplit
     295          894 :                         INTEGER, DIMENSION(:), ALLOCATABLE :: blk_size_d, blk_size_split_d
     296              : 
     297         3278 :                         DO idim = 1, ndims_tensor(tensor_in)
     298         2384 :                            CALL get_ith_array(tensor_in%blk_sizes, idim, blk_size_d)
     299              : 
     300         2384 :                            isplit_sum = 0
     301         5948 :                            DO i = 1, SIZE(blk_size_d)
     302         3564 :                               nsplit = (blk_size_d(i) + block_sizes(idim) - 1)/block_sizes(idim)
     303         5948 :                               isplit_sum = isplit_sum + nsplit
     304              :                            END DO
     305              : 
     306         7152 :                            ALLOCATE (blk_size_split_d(isplit_sum))
     307              : 
     308         2384 :                            isplit_sum = 0
     309         5948 :                            DO i = 1, SIZE(blk_size_d)
     310         3564 :                               nsplit = (blk_size_d(i) + block_sizes(idim) - 1)/block_sizes(idim)
     311         3564 :                               blk_remainder = blk_size_d(i)
     312        15376 :                               DO isplit = 1, nsplit
     313         9428 :                                  isplit_sum = isplit_sum + 1
     314         9428 :                                  blk_size_split_d(isplit_sum) = MIN(block_sizes(idim), blk_remainder)
     315        12992 :                                  blk_remainder = blk_remainder - block_sizes(idim)
     316              :                               END DO
     317              : 
     318              :                            END DO
     319              : 
     320              :                            #:for idim in range(1, maxdim+1)
     321         4768 :                               IF (idim == ${idim}$) THEN
     322        16580 :                                  ALLOCATE (nd_blk_size_split_${idim}$, source=blk_size_split_d)
     323              :                               END IF
     324              :                            #:endfor
     325         5662 :                            DEALLOCATE (blk_size_split_d)
     326              :                         END DO
     327              : 
     328              :                         #:for ndim in ndims
     329         1788 :                            IF (ndims_tensor(tensor_in) == ${ndim}$) CALL dbt_split_blocks_generic(tensor_in, tensor_out, &
     330              :                                                                                       ${varlist("nd_blk_size_split", nmax=ndim)}$, &
     331          894 :                                                                                                   nodata=nodata)
     332              :                         #:endfor
     333              : 
     334          894 :                      END SUBROUTINE
     335              : 
     336              : ! **************************************************************************************************
     337              : !> \brief Copy tensor with split blocks to tensor with original block sizes.
     338              : !> \param tensor_split_in tensor with smaller blocks
     339              : !> \param tensor_out original tensor
     340              : !> \author Patrick Seewald
     341              : ! **************************************************************************************************
     342        74065 :                      SUBROUTINE dbt_split_copyback(tensor_split_in, tensor_out, summation)
     343              :                         TYPE(dbt_type), INTENT(INOUT)               :: tensor_split_in
     344              :                         TYPE(dbt_type), INTENT(INOUT)               :: tensor_out
     345              :                         LOGICAL, INTENT(IN), OPTIONAL                   :: summation
     346        74065 :                         INTEGER, DIMENSION(:), ALLOCATABLE              :: first_split_d, last_split_d
     347        74065 :                         INTEGER, DIMENSION(:), ALLOCATABLE              :: blk_size_split_d, blk_size_d
     348        74065 :                         INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("last_split")}$, &
     349        74065 :                                                                            ${varlist("first_split")}$, &
     350        74065 :                                                                            ${varlist("split")}$
     351        74065 :                      INTEGER, DIMENSION(:), ALLOCATABLE              :: ${varlist("inblock_offset")}$, ${varlist("blk_size_split")}$
     352        74065 :                         INTEGER, DIMENSION(:, :), ALLOCATABLE            :: blks_to_allocate
     353              :                         INTEGER                                         :: idim, iblk, bcount
     354              :                         INTEGER                                         :: ${varlist("iblk")}$, isplit_sum, splitsum
     355              :                         TYPE(dbt_iterator_type)                     :: iter
     356        74065 :                         INTEGER, DIMENSION(ndims_tensor(tensor_out)) :: blk_index, blk_size, blk_offset, blk_shape, blk_index_n
     357              :                         LOGICAL                                         :: found
     358              : 
     359              :                         INTEGER, DIMENSION(${maxdim}$)                  :: inblock_offset
     360              :                         INTEGER                                            :: handle
     361              :                         CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_split_copyback'
     362              :                         #:for ndim in ndims
     363        74065 :                            REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_${ndim}$d
     364        74065 :                            REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_split_${ndim}$d
     365              :                         #:endfor
     366              : 
     367        74065 :                         CALL timeset(routineN, handle)
     368        74065 :                         CPASSERT(tensor_out%valid)
     369        74065 :                         IF (PRESENT(summation)) THEN
     370        14556 :                            IF (.NOT. summation) CALL dbt_clear(tensor_out)
     371              :                         ELSE
     372        59509 :                            CALL dbt_clear(tensor_out)
     373              :                         END IF
     374              : 
     375       259295 :                         DO idim = 1, ndims_tensor(tensor_split_in)
     376       185230 :                            CALL get_ith_array(tensor_split_in%blk_sizes, idim, blk_size_split_d)
     377       185230 :                            CALL get_ith_array(tensor_out%blk_sizes, idim, blk_size_d)
     378              : 
     379              :                            #:for idim in range(1, maxdim+1)
     380       370460 :                               IF (idim == ${idim}$) THEN
     381              :                                  ! data offset of split blocks w.r.t. corresponding normal block
     382       555690 :                                  ALLOCATE (inblock_offset_${idim}$ (SIZE(blk_size_split_d)))
     383              :                                  ! normal block index for each split block
     384       370460 :                                  ALLOCATE (split_${idim}$ (SIZE(blk_size_split_d)))
     385              :                               END IF
     386              :                            #:endfor
     387              : 
     388       555690 :                            ALLOCATE (last_split_d(SIZE(blk_size_d)))
     389       370460 :                            ALLOCATE (first_split_d(SIZE(blk_size_d)))
     390       185230 :                            first_split_d(1) = 1
     391       185230 :                            isplit_sum = 0
     392      1706644 :                            DO iblk = 1, SIZE(blk_size_d)
     393      1521414 :                               splitsum = 0
     394      1521414 :                               IF (iblk .GT. 1) first_split_d(iblk) = last_split_d(iblk - 1) + 1
     395      3347100 :                               DO WHILE (splitsum < blk_size_d(iblk))
     396      1825686 :                                  isplit_sum = isplit_sum + 1
     397              :                                  #:for idim in range(1, maxdim+1)
     398      3651372 :                                     IF (idim == ${idim}$) THEN
     399      1825686 :                                        inblock_offset_${idim}$ (isplit_sum) = splitsum
     400      1825686 :                                        split_${idim}$ (isplit_sum) = iblk
     401              :                                     END IF
     402              :                                  #:endfor
     403      1825686 :                                  splitsum = splitsum + blk_size_split_d(isplit_sum)
     404              :                               END DO
     405      1521414 :                               CPASSERT(splitsum == blk_size_d(iblk))
     406      1706644 :                               last_split_d(iblk) = isplit_sum
     407              :                            END DO
     408              :                            #:for idim in range(1, maxdim+1)
     409       370460 :                               IF (idim == ${idim}$) THEN
     410      2077104 :                                  ALLOCATE (first_split_${idim}$, source=first_split_d)
     411      1891874 :                                  ALLOCATE (last_split_${idim}$, source=last_split_d)
     412      2381376 :                                  ALLOCATE (blk_size_split_${idim}$, source=blk_size_split_d)
     413              :                               END IF
     414              :                            #:endfor
     415       185230 :                            DEALLOCATE (first_split_d, last_split_d)
     416       629755 :                            DEALLOCATE (blk_size_split_d, blk_size_d)
     417              :                         END DO
     418              : 
     419              : !$OMP PARALLEL DEFAULT(NONE) &
     420              : !$OMP SHARED(tensor_split_in,tensor_out,summation) &
     421              : !$OMP SHARED(${varlist("split", nmax=ndim)}$) &
     422              : !$OMP SHARED(${varlist("first_split", nmax=ndim)}$) &
     423              : !$OMP SHARED(${varlist("last_split", nmax=ndim)}$) &
     424              : !$OMP SHARED(${varlist("inblock_offset", nmax=ndim)}$) &
     425              : !$OMP PRIVATE(iter,blks_to_allocate,bcount,blk_index_n) &
     426              : !$OMP PRIVATE(blk_index,blk_size,blk_shape,blk_offset,inblock_offset,found) &
     427        74065 : !$OMP PRIVATE(block_2d,block_3d,block_4d,block_split_2d,block_split_3d,block_split_4d)
     428              :                         CALL dbt_iterator_start(iter, tensor_split_in)
     429              :                         ALLOCATE (blks_to_allocate(dbt_iterator_num_blocks(iter), ndims_tensor(tensor_split_in)))
     430              :                         bcount = 0
     431              :                         DO WHILE (dbt_iterator_blocks_left(iter))
     432              :                            CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size)
     433              :                            #:for ndim in ndims
     434              :                               IF (ndims_tensor(tensor_out) == ${ndim}$) THEN
     435              :                                  #:for idim in range(1,ndim+1)
     436              :                                     blk_index_n(${idim}$) = split_${idim}$ (blk_index(${idim}$))
     437              :                                  #:endfor
     438              :                               END IF
     439              :                            #:endfor
     440              :                            blks_to_allocate(bcount + 1, :) = blk_index_n
     441              :                            bcount = bcount + 1
     442              :                         END DO
     443              :                         CALL dbt_iterator_stop(iter)
     444              :                         CALL dbt_reserve_blocks(tensor_out, blks_to_allocate)
     445              : 
     446              :                         CALL dbt_iterator_start(iter, tensor_out)
     447              :                         DO WHILE (dbt_iterator_blocks_left(iter))
     448              :                            CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
     449              : 
     450              :                            #:for ndim in ndims
     451              :                               IF (ndims_tensor(tensor_out) == ${ndim}$) THEN
     452              :                                  CALL allocate_any(block_${ndim}$d, blk_size)
     453              :                                  block_${ndim}$d = 0.0_dp
     454              :                                  #:for idim in range(1,ndim+1)
     455              :                             DO iblk_${idim}$ = first_split_${idim}$ (blk_index(${idim}$)), last_split_${idim}$ (blk_index(${idim}$))
     456              :                                        #:endfor
     457              :                                        #:for idim in range(1,ndim+1)
     458              :                                           inblock_offset(${idim}$) = inblock_offset_${idim}$ (iblk_${idim}$)
     459              :                                        #:endfor
     460              : 
     461              :                             CALL dbt_get_block(tensor_split_in, [${", ".join(["iblk_"+str(idim) for idim in range(1, ndim+1)])}$], &
     462              :                                                           block_split_${ndim}$d, found)
     463              :                                        IF (found) THEN
     464              :                                           blk_shape(1:${ndim}$) = SHAPE(block_split_${ndim}$d)
     465              :                                           block_${ndim}$d( &
     466              :                      ${", ".join(["inblock_offset("+str(idim)+") + 1:inblock_offset("+str(idim)+") + blk_shape("+str(idim)+")" for idim in range(1, ndim+1)])}$) = &
     467              :                                              block_split_${ndim}$d
     468              :                                        END IF
     469              : 
     470              :                                        #:for idim in range(1,ndim+1)
     471              :                                           END DO
     472              :                                        #:endfor
     473              :                                        CALL dbt_put_block(tensor_out, blk_index, blk_size, block_${ndim}$d, summation=summation)
     474              :                                        DEALLOCATE (block_${ndim}$d)
     475              :                                     END IF
     476              :                                  #:endfor
     477              :                               END DO
     478              :                               CALL dbt_iterator_stop(iter)
     479              : !$OMP END PARALLEL
     480              : 
     481        74065 :                               CALL timestop(handle)
     482              : 
     483       148130 :                            END SUBROUTINE
     484              : 
     485              : ! **************************************************************************************************
     486              : !> \brief split two tensors with same total sizes but different block sizes such that they have
     487              : !>        equal block sizes
     488              : !> \param move_data memory optimization: transfer data s.t. tensor1 and tensor2 may be empty on return
     489              : !> \param tensor1_split tensor 1 with split blocks
     490              : !> \param tensor2_split tensor 2 with split blocks
     491              : !> \param nodata1 don't copy data of tensor 1
     492              : !> \param nodata2 don't copy data of tensor 2
     493              : !> \param
     494              : !> \param
     495              : !> \param
     496              : !> \param
     497              : !> \author Patrick Seewald
     498              : ! **************************************************************************************************
     499      1110975 :                            SUBROUTINE dbt_make_compatible_blocks(tensor1, tensor2, tensor1_split, tensor2_split, &
     500        20000 :                                                                  order, nodata1, nodata2, move_data)
     501              :                               TYPE(dbt_type), INTENT(INOUT) :: tensor1, tensor2
     502              :                               TYPE(dbt_type), INTENT(OUT)   :: tensor1_split, tensor2_split
     503              :                               INTEGER, DIMENSION(ndims_tensor(tensor1)), &
     504              :                                  INTENT(IN), OPTIONAL                        :: order
     505              :                               LOGICAL, INTENT(IN), OPTIONAL     :: nodata1, nodata2, move_data
     506        74065 :                           INTEGER, DIMENSION(:), ALLOCATABLE  :: ${varlist("blk_size_split_1")}$, ${varlist("blk_size_split_2")}$, &
     507        74065 :                                                                      blk_size_d_1, blk_size_d_2, blk_size_d_split
     508              :                               INTEGER :: size_sum_1, size_sum_2, size_sum, bind_1, bind_2, isplit, bs, idim, i
     509              :                               LOGICAL :: move_prv, nodata1_prv, nodata2_prv
     510        74065 :                               INTEGER, DIMENSION(ndims_tensor(tensor1)) :: order_prv
     511              : 
     512        74065 :                               IF (PRESENT(move_data)) THEN
     513        74065 :                                  move_prv = move_data
     514              :                               ELSE
     515              :                                  move_prv = .FALSE.
     516              :                               END IF
     517              : 
     518        74065 :                               IF (PRESENT(nodata1)) THEN
     519            0 :                                  nodata1_prv = nodata1
     520              :                               ELSE
     521              :                                  nodata1_prv = .FALSE.
     522              :                               END IF
     523        74065 :                               IF (PRESENT(nodata2)) THEN
     524        74065 :                                  nodata2_prv = nodata2
     525              :                               ELSE
     526              :                                  nodata2_prv = .FALSE.
     527              :                               END IF
     528              : 
     529        74065 :                               IF (PRESENT(order)) THEN
     530        20000 :                                  order_prv(:) = dbt_inverse_order(order)
     531              :                               ELSE
     532       313861 :                                  order_prv(:) = (/(i, i=1, ndims_tensor(tensor1))/)
     533              :                               END IF
     534              : 
     535       259295 :                               DO idim = 1, ndims_tensor(tensor2)
     536       185230 :                                  CALL get_ith_array(tensor1%blk_sizes, order_prv(idim), blk_size_d_1)
     537       185230 :                                  CALL get_ith_array(tensor2%blk_sizes, idim, blk_size_d_2)
     538       555690 :                                  ALLOCATE (blk_size_d_split(SIZE(blk_size_d_1) + SIZE(blk_size_d_2)))
     539              :                                  size_sum_1 = 0
     540              :                                  size_sum_2 = 0
     541              :                                  size_sum = 0
     542      2010916 :                                  bind_1 = 0
     543      2010916 :                                  bind_2 = 0
     544      2010916 :                                  isplit = 0
     545              : 
     546      2010916 :                                  DO WHILE (bind_1 < SIZE(blk_size_d_1) .AND. bind_2 < SIZE(blk_size_d_2))
     547      2010916 :                                     IF (blk_size_d_1(bind_1 + 1) < blk_size_d_2(bind_2 + 1)) THEN
     548       304272 :                                        bind_1 = bind_1 + 1
     549       304272 :                                        bs = blk_size_d_1(bind_1)
     550       304272 :                                        blk_size_d_2(bind_2 + 1) = blk_size_d_2(bind_2 + 1) - bs
     551              :                                        size_sum = size_sum + bs
     552       304272 :                                        isplit = isplit + 1
     553       304272 :                                        blk_size_d_split(isplit) = bs
     554      1521414 :                                     ELSEIF (blk_size_d_1(bind_1 + 1) > blk_size_d_2(bind_2 + 1)) THEN
     555       611768 :                                        bind_2 = bind_2 + 1
     556       611768 :                                        bs = blk_size_d_2(bind_2)
     557       611768 :                                        blk_size_d_1(bind_1 + 1) = blk_size_d_1(bind_1 + 1) - bs
     558              :                                        size_sum = size_sum + bs
     559       611768 :                                        isplit = isplit + 1
     560       611768 :                                        blk_size_d_split(isplit) = bs
     561              :                                     ELSE
     562       909646 :                                        bind_1 = bind_1 + 1
     563       909646 :                                        bind_2 = bind_2 + 1
     564       909646 :                                        bs = blk_size_d_1(bind_1)
     565              :                                        size_sum = size_sum + bs
     566       909646 :                                        isplit = isplit + 1
     567       909646 :                                        blk_size_d_split(isplit) = bs
     568              :                                     END IF
     569              :                                  END DO
     570              : 
     571       185230 :                                  IF (bind_1 < SIZE(blk_size_d_1)) THEN
     572            0 :                                     bind_1 = bind_1 + 1
     573            0 :                                     bs = blk_size_d_1(bind_1)
     574              :                                     size_sum = size_sum + bs
     575            0 :                                     isplit = isplit + 1
     576            0 :                                     blk_size_d_split(isplit) = bs
     577              :                                  END IF
     578              : 
     579       185230 :                                  IF (bind_2 < SIZE(blk_size_d_2)) THEN
     580            0 :                                     bind_2 = bind_2 + 1
     581            0 :                                     bs = blk_size_d_2(bind_2)
     582              :                                     size_sum = size_sum + bs
     583            0 :                                     isplit = isplit + 1
     584            0 :                                     blk_size_d_split(isplit) = bs
     585              :                                  END IF
     586              : 
     587              :                                  #:for idim in range(1, maxdim+1)
     588       370460 :                                     IF (order_prv(idim) == ${idim}$) THEN
     589      2381376 :                                        ALLOCATE (blk_size_split_1_${idim}$, source=blk_size_d_split(:isplit))
     590              :                                     END IF
     591              :                                  #:endfor
     592              : 
     593              :                                  #:for idim in range(1, maxdim+1)
     594       370460 :                                     IF (idim == ${idim}$) THEN
     595      2381376 :                                        ALLOCATE (blk_size_split_2_${idim}$, source=blk_size_d_split(:isplit))
     596              :                                     END IF
     597              :                                  #:endfor
     598              : 
     599       629755 :                                  DEALLOCATE (blk_size_d_split, blk_size_d_1, blk_size_d_2)
     600              :                               END DO
     601              : 
     602              :                               #:for ndim in ndims
     603       148130 :                                  IF (ndims_tensor(tensor1) == ${ndim}$) THEN
     604        74065 :                    CALL dbt_split_blocks_generic(tensor1, tensor1_split, ${varlist("blk_size_split_1", nmax=ndim)}$, nodata=nodata1)
     605        74065 :                                     IF (move_prv .AND. .NOT. nodata1_prv) CALL dbt_clear(tensor1)
     606        74065 :                    CALL dbt_split_blocks_generic(tensor2, tensor2_split, ${varlist("blk_size_split_2", nmax=ndim)}$, nodata=nodata2)
     607        74065 :                                     IF (move_prv .AND. .NOT. nodata2_prv) CALL dbt_clear(tensor2)
     608              :                                  END IF
     609              :                               #:endfor
     610              : 
     611       148130 :                            END SUBROUTINE
     612              : 
     613              : ! **************************************************************************************************
     614              : !> \author Patrick Seewald
     615              : ! **************************************************************************************************
     616      1189456 :                            SUBROUTINE dbt_crop(tensor_in, tensor_out, bounds, move_data)
     617              :                               TYPE(dbt_type), INTENT(INOUT) :: tensor_in
     618              :                               TYPE(dbt_type), INTENT(OUT) :: tensor_out
     619              :                               INTEGER, DIMENSION(2, ndims_tensor(tensor_in)), INTENT(IN) :: bounds
     620              :                               LOGICAL, INTENT(IN), OPTIONAL :: move_data
     621              : 
     622              :                               CHARACTER(LEN=*), PARAMETER :: routineN = 'dbt_crop'
     623              : 
     624       148682 :                               INTEGER, DIMENSION(2, ndims_tensor(tensor_in)) :: blk_bounds
     625              :                               TYPE(dbt_iterator_type)                     :: iter
     626       148682 :                               INTEGER, DIMENSION(ndims_tensor(tensor_in)) :: blk_index, blk_size, blk_offset
     627              :                               LOGICAL :: found, move_data_prv
     628              :                               INTEGER :: handle, idim, iblk_out
     629       148682 :                               INTEGER, DIMENSION(:, :), ALLOCATABLE :: blk_ind_out
     630              :                               #:for ndim in ndims
     631       148682 :                                  REAL(dp), DIMENSION(${shape_colon(n=ndim)}$), ALLOCATABLE :: block_${ndim}$d, block_put_${ndim}$d
     632              :                               #:endfor
     633              : 
     634       148682 :                               CALL timeset(routineN, handle)
     635              : 
     636       148682 :                               IF (PRESENT(move_data)) THEN
     637       148682 :                                  move_data_prv = move_data
     638              :                               ELSE
     639              :                                  move_data_prv = .FALSE.
     640              :                               END IF
     641              : 
     642       148682 :                               CALL dbt_create(tensor_in, tensor_out)
     643              : 
     644              : !$OMP PARALLEL DEFAULT(NONE) SHARED(tensor_in,tensor_out,bounds) &
     645              : !$OMP PRIVATE(iter,blk_ind_out,iblk_out,blk_index,blk_size,blk_offset,found,blk_bounds) &
     646       148682 : !$OMP PRIVATE(block_2d,block_put_2d,block_3d,block_put_3d,block_4d,block_put_4d)
     647              : 
     648              :                               ! reserve blocks inside bounds
     649              :                               CALL dbt_iterator_start(iter, tensor_in)
     650              :                               ALLOCATE (blk_ind_out(dbt_iterator_num_blocks(iter), ndims_tensor(tensor_in)))
     651              :                               blk_ind_out(:, :) = 0
     652              :                               iblk_out = 0
     653              :                               blk_loop: DO WHILE (dbt_iterator_blocks_left(iter))
     654              :                                  CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
     655              :                                  DO idim = 1, ndims_tensor(tensor_in)
     656              :                                     IF (bounds(1, idim) > blk_offset(idim) - 1 + blk_size(idim)) CYCLE blk_loop
     657              :                                     IF (bounds(2, idim) < blk_offset(idim)) CYCLE blk_loop
     658              :                                  END DO
     659              :                                  iblk_out = iblk_out + 1
     660              :                                  blk_ind_out(iblk_out, :) = blk_index
     661              :                               END DO blk_loop
     662              :                               CALL dbt_iterator_stop(iter)
     663              : 
     664              :                               CALL dbt_reserve_blocks(tensor_out, blk_ind_out(1:iblk_out, :))
     665              :                               DEALLOCATE (blk_ind_out)
     666              : 
     667              :                               ! copy blocks
     668              :                               CALL dbt_iterator_start(iter, tensor_out)
     669              :                               iter_loop: DO WHILE (dbt_iterator_blocks_left(iter))
     670              :                                  CALL dbt_iterator_next_block(iter, blk_index, blk_size=blk_size, blk_offset=blk_offset)
     671              : 
     672              :                                  DO idim = 1, ndims_tensor(tensor_in)
     673              :                                     blk_bounds(1, idim) = MAX(bounds(1, idim) - blk_offset(idim) + 1, 1)
     674              :                                     blk_bounds(2, idim) = MIN(bounds(2, idim) - blk_offset(idim) + 1, blk_size(idim))
     675              :                                  END DO
     676              : 
     677              :                                  #:for ndim in ndims
     678              :                                     IF (ndims_tensor(tensor_in) == ${ndim}$) THEN
     679              :                                        CALL dbt_get_block(tensor_in, blk_index, block_${ndim}$d, found)
     680              : 
     681              :                              ALLOCATE (block_put_${ndim}$d(${", ".join(["blk_size("+str(idim)+")" for idim in range(1, ndim+1)])}$))
     682              :                                        block_put_${ndim}$d = 0.0_dp
     683              :    block_put_${ndim}$d(${", ".join(["blk_bounds(1, "+str(idim)+"):blk_bounds(2,"+str(idim)+")" for idim in range(1, ndim+1)])}$) = &
     684              :            block_${ndim}$d(${", ".join(["blk_bounds(1, "+str(idim)+"):blk_bounds(2,"+str(idim)+")" for idim in range(1, ndim+1)])}$)
     685              :                                        CALL dbt_put_block(tensor_out, blk_index, blk_size, block_put_${ndim}$d)
     686              :                                        DEALLOCATE (block_${ndim}$d)
     687              :                                        DEALLOCATE (block_put_${ndim}$d)
     688              :                                     END IF
     689              :                                  #:endfor
     690              :                               END DO iter_loop
     691              :                               CALL dbt_iterator_stop(iter)
     692              : !$OMP END PARALLEL
     693       148682 :                               CALL dbt_finalize(tensor_out)
     694              : 
     695       148682 :                               IF (move_data_prv) CALL dbt_clear(tensor_in)
     696              : 
     697              :                               ! transfer data for batched contraction
     698       148682 :                               CALL dbt_copy_contraction_storage(tensor_in, tensor_out)
     699              : 
     700       148682 :                               CALL timestop(handle)
     701       297364 :                            END SUBROUTINE
     702              : 
     703       149024 :                         END MODULE
        

Generated by: LCOV version 2.0-1