LCOV - code coverage report
Current view: top level - src - distribution_methods.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 92.3 % 599 553
Test Date: 2025-12-04 06:27:48 Functions: 100.0 % 9 9

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Distribution methods for atoms, particles, or molecules
      10              : !> \par History
      11              : !>      - 1d-distribution of molecules and particles (Sep. 2003, MK)
      12              : !>      - 2d-distribution for Quickstep updated with molecules (Oct. 2003, MK)
      13              : !> \author MK (22.08.2003)
      14              : ! **************************************************************************************************
      15              : MODULE distribution_methods
      16              :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      17              :                                               get_atomic_kind,&
      18              :                                               get_atomic_kind_set
      19              :    USE basis_set_types,                 ONLY: get_gto_basis_set,&
      20              :                                               gto_basis_set_type
      21              :    USE cell_types,                      ONLY: cell_type,&
      22              :                                               pbc,&
      23              :                                               real_to_scaled,&
      24              :                                               scaled_to_real
      25              :    USE cp_array_utils,                  ONLY: cp_1d_i_p_type
      26              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      27              :    USE cp_dbcsr_api,                    ONLY: dbcsr_distribution_get_num_images
      28              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      29              :                                               cp_logger_get_default_io_unit,&
      30              :                                               cp_logger_get_default_unit_nr,&
      31              :                                               cp_logger_type
      32              :    USE cp_min_heap,                     ONLY: cp_heap_fill,&
      33              :                                               cp_heap_get_first,&
      34              :                                               cp_heap_new,&
      35              :                                               cp_heap_release,&
      36              :                                               cp_heap_reset_first,&
      37              :                                               cp_heap_type
      38              :    USE cp_output_handling,              ONLY: cp_p_file,&
      39              :                                               cp_print_key_finished_output,&
      40              :                                               cp_print_key_should_output,&
      41              :                                               cp_print_key_unit_nr
      42              :    USE distribution_1d_types,           ONLY: distribution_1d_create,&
      43              :                                               distribution_1d_type
      44              :    USE distribution_2d_types,           ONLY: distribution_2d_create,&
      45              :                                               distribution_2d_type,&
      46              :                                               distribution_2d_write
      47              :    USE input_constants,                 ONLY: model_block_count,&
      48              :                                               model_block_lmax
      49              :    USE input_section_types,             ONLY: section_vals_get_subs_vals,&
      50              :                                               section_vals_type,&
      51              :                                               section_vals_val_get
      52              :    USE kinds,                           ONLY: dp,&
      53              :                                               int_8
      54              :    USE machine,                         ONLY: m_flush
      55              :    USE mathconstants,                   ONLY: pi
      56              :    USE mathlib,                         ONLY: gcd,&
      57              :                                               lcm
      58              :    USE molecule_kind_types,             ONLY: get_molecule_kind,&
      59              :                                               get_molecule_kind_set,&
      60              :                                               molecule_kind_type
      61              :    USE molecule_types,                  ONLY: molecule_type
      62              :    USE parallel_rng_types,              ONLY: UNIFORM,&
      63              :                                               rng_stream_type
      64              :    USE particle_types,                  ONLY: particle_type
      65              :    USE qs_kind_types,                   ONLY: get_qs_kind,&
      66              :                                               qs_kind_type
      67              :    USE util,                            ONLY: sort
      68              : #include "./base/base_uses.f90"
      69              : 
      70              :    IMPLICIT NONE
      71              : 
      72              :    PRIVATE
      73              : 
      74              : ! *** Global parameters (in this module) ***
      75              : 
      76              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'distribution_methods'
      77              : 
      78              : ! *** Public subroutines ***
      79              : 
      80              :    PUBLIC :: distribute_molecules_1d, &
      81              :              distribute_molecules_2d
      82              : 
      83              : CONTAINS
      84              : 
      85              : ! **************************************************************************************************
      86              : !> \brief Distribute molecules and particles
      87              : !> \param atomic_kind_set particle (atomic) kind information
      88              : !> \param particle_set particle information
      89              : !> \param local_particles distribution of particles created by this routine
      90              : !> \param molecule_kind_set molecule kind information
      91              : !> \param molecule_set molecule information
      92              : !> \param local_molecules distribution of molecules created by this routine
      93              : !> \param force_env_section ...
      94              : !> \param prev_molecule_kind_set previous molecule kind information, used with
      95              : !>        prev_local_molecules
      96              : !> \param prev_local_molecules previous distribution of molecules, new one will
      97              : !>        be identical if all the prev_* arguments are present and associated
      98              : !> \par History
      99              : !>      none
     100              : !> \author MK (Jun. 2003)
     101              : ! **************************************************************************************************
     102        11229 :    SUBROUTINE distribute_molecules_1d(atomic_kind_set, particle_set, &
     103              :                                       local_particles, &
     104              :                                       molecule_kind_set, molecule_set, &
     105              :                                       local_molecules, force_env_section, &
     106              :                                       prev_molecule_kind_set, &
     107              :                                       prev_local_molecules)
     108              : 
     109              :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     110              :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     111              :       TYPE(distribution_1d_type), POINTER                :: local_particles
     112              :       TYPE(molecule_kind_type), DIMENSION(:), POINTER    :: molecule_kind_set
     113              :       TYPE(molecule_type), DIMENSION(:), POINTER         :: molecule_set
     114              :       TYPE(distribution_1d_type), POINTER                :: local_molecules
     115              :       TYPE(section_vals_type), POINTER                   :: force_env_section
     116              :       TYPE(molecule_kind_type), DIMENSION(:), OPTIONAL, &
     117              :          POINTER                                         :: prev_molecule_kind_set
     118              :       TYPE(distribution_1d_type), OPTIONAL, POINTER      :: prev_local_molecules
     119              : 
     120              :       CHARACTER(len=*), PARAMETER :: routineN = 'distribute_molecules_1d'
     121              : 
     122              :       INTEGER :: atom_a, bin, handle, iatom, imolecule, imolecule_kind, imolecule_local, &
     123              :          imolecule_prev_kind, iparticle_kind, ipe, iw, kind_a, molecule_a, n, natom, nbins, nload, &
     124              :          nmolecule, nmolecule_kind, nparticle_kind, nsgf, output_unit
     125              :       INTEGER(int_8)                                     :: bin_price
     126              :       INTEGER(int_8), ALLOCATABLE, DIMENSION(:)          :: workload_count, workload_fill
     127        11229 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nmolecule_local, nparticle_local, work
     128        11229 :       INTEGER, DIMENSION(:), POINTER                     :: molecule_list
     129              :       LOGICAL                                            :: found, has_prev_subsys_info, is_local
     130        11229 :       TYPE(cp_1d_i_p_type), ALLOCATABLE, DIMENSION(:)    :: local_molecule
     131              :       TYPE(cp_heap_type)                                 :: bin_heap_count, bin_heap_fill
     132              :       TYPE(cp_logger_type), POINTER                      :: logger
     133              :       TYPE(molecule_kind_type), POINTER                  :: molecule_kind
     134              : 
     135        11229 :       CALL timeset(routineN, handle)
     136              : 
     137        11229 :       has_prev_subsys_info = .FALSE.
     138        11229 :       IF (PRESENT(prev_local_molecules) .AND. &
     139              :           PRESENT(prev_molecule_kind_set)) THEN
     140         2643 :          IF (ASSOCIATED(prev_local_molecules) .AND. &
     141              :              ASSOCIATED(prev_molecule_kind_set)) THEN
     142           44 :             has_prev_subsys_info = .TRUE.
     143              :          END IF
     144              :       END IF
     145              : 
     146        11229 :       logger => cp_get_default_logger()
     147              : 
     148              :       ASSOCIATE (group => logger%para_env, mype => logger%para_env%mepos + 1, &
     149              :                  npe => logger%para_env%num_pe)
     150              : 
     151        33687 :          ALLOCATE (workload_count(npe))
     152        32642 :          workload_count(:) = 0
     153              : 
     154        22458 :          ALLOCATE (workload_fill(npe))
     155        32642 :          workload_fill(:) = 0
     156              : 
     157        11229 :          nmolecule_kind = SIZE(molecule_kind_set)
     158              : 
     159        33687 :          ALLOCATE (nmolecule_local(nmolecule_kind))
     160       150368 :          nmolecule_local(:) = 0
     161              : 
     162       184055 :          ALLOCATE (local_molecule(nmolecule_kind))
     163              : 
     164        11229 :          nparticle_kind = SIZE(atomic_kind_set)
     165              : 
     166        33687 :          ALLOCATE (nparticle_local(nparticle_kind))
     167        39133 :          nparticle_local(:) = 0
     168              : 
     169        11229 :          nbins = npe
     170              : 
     171        11229 :          CALL cp_heap_new(bin_heap_count, nbins)
     172        11229 :          CALL cp_heap_fill(bin_heap_count, workload_count)
     173              : 
     174        11229 :          CALL cp_heap_new(bin_heap_fill, nbins)
     175        11229 :          CALL cp_heap_fill(bin_heap_fill, workload_fill)
     176              : 
     177       150368 :          DO imolecule_kind = 1, nmolecule_kind
     178              : 
     179       139139 :             molecule_kind => molecule_kind_set(imolecule_kind)
     180              : 
     181       139139 :             NULLIFY (molecule_list)
     182              : 
     183              : !     *** Get the number of molecules and the number of ***
     184              : !     *** atoms in each molecule of that molecular kind ***
     185              : 
     186              :             CALL get_molecule_kind(molecule_kind=molecule_kind, &
     187              :                                    molecule_list=molecule_list, &
     188              :                                    natom=natom, &
     189       139139 :                                    nsgf=nsgf)
     190              : 
     191              : !     *** Consider the number of atoms or basis ***
     192              : !     *** functions which depends on the method ***
     193              : 
     194       139139 :             nload = MAX(natom, nsgf)
     195       139139 :             nmolecule = SIZE(molecule_list)
     196              : 
     197              : !     *** Get the number of local molecules of the current molecule kind ***
     198              : 
     199       447121 :             DO imolecule = 1, nmolecule
     200       447121 :                IF (has_prev_subsys_info) THEN
     201       297660 :                   DO imolecule_prev_kind = 1, SIZE(prev_molecule_kind_set)
     202      3188004 :                      IF (ANY(prev_local_molecules%list(imolecule_prev_kind)%array( &
     203         5060 :                              1:prev_local_molecules%n_el(imolecule_prev_kind)) == molecule_list(imolecule))) THEN
     204              :                         ! molecule used to be local
     205         2530 :                         nmolecule_local(imolecule_kind) = nmolecule_local(imolecule_kind) + 1
     206              :                      END IF
     207              :                   END DO
     208              :                ELSE
     209       302922 :                   CALL cp_heap_get_first(bin_heap_count, bin, bin_price, found)
     210       302922 :                   IF (.NOT. found) &
     211            0 :                      CPABORT("No topmost heap element found.")
     212              : 
     213       302922 :                   ipe = bin
     214       302922 :                   IF (bin_price /= workload_count(ipe)) &
     215            0 :                      CPABORT("inconsistent heap")
     216              : 
     217       302922 :                   workload_count(ipe) = workload_count(ipe) + nload
     218       302922 :                   IF (ipe == mype) THEN
     219       166625 :                      nmolecule_local(imolecule_kind) = nmolecule_local(imolecule_kind) + 1
     220              :                   END IF
     221              : 
     222       302922 :                   bin_price = workload_count(ipe)
     223       302922 :                   CALL cp_heap_reset_first(bin_heap_count, bin_price)
     224              :                END IF
     225              :             END DO
     226              : 
     227              : !     *** Distribute the molecules ***
     228       139139 :             n = nmolecule_local(imolecule_kind)
     229              : 
     230       139139 :             IF (n > 0) THEN
     231       224058 :                ALLOCATE (local_molecule(imolecule_kind)%array(n))
     232              :             ELSE
     233        64453 :                NULLIFY (local_molecule(imolecule_kind)%array)
     234              :             END IF
     235              : 
     236              :             imolecule_local = 0
     237       597489 :             DO imolecule = 1, nmolecule
     238       307982 :                is_local = .FALSE.
     239       307982 :                IF (has_prev_subsys_info) THEN
     240       297660 :                   DO imolecule_prev_kind = 1, SIZE(prev_molecule_kind_set)
     241      3188004 :                      IF (ANY(prev_local_molecules%list(imolecule_prev_kind)%array( &
     242         5060 :                              1:prev_local_molecules%n_el(imolecule_prev_kind)) == molecule_list(imolecule))) THEN
     243         2530 :                         is_local = .TRUE.
     244              :                      END IF
     245              :                   END DO
     246              :                ELSE
     247       302922 :                   CALL cp_heap_get_first(bin_heap_fill, bin, bin_price, found)
     248       302922 :                   IF (.NOT. found) &
     249            0 :                      CPABORT("No topmost heap element found.")
     250              : 
     251       302922 :                   ipe = bin
     252       302922 :                   IF (bin_price /= workload_fill(ipe)) &
     253            0 :                      CPABORT("inconsistent heap")
     254              : 
     255       302922 :                   workload_fill(ipe) = workload_fill(ipe) + nload
     256       302922 :                   is_local = (ipe == mype)
     257              :                END IF
     258       307982 :                IF (is_local) THEN
     259       169155 :                   imolecule_local = imolecule_local + 1
     260       169155 :                   molecule_a = molecule_list(imolecule)
     261       169155 :                   local_molecule(imolecule_kind)%array(imolecule_local) = molecule_a
     262       584481 :                   DO iatom = 1, natom
     263       415326 :                      atom_a = molecule_set(molecule_a)%first_atom + iatom - 1
     264              : 
     265              :                      CALL get_atomic_kind(atomic_kind=particle_set(atom_a)%atomic_kind, &
     266       415326 :                                           kind_number=kind_a)
     267       584481 :                      nparticle_local(kind_a) = nparticle_local(kind_a) + 1
     268              :                   END DO
     269              :                END IF
     270       447121 :                IF (.NOT. has_prev_subsys_info) THEN
     271       302922 :                   bin_price = workload_fill(ipe)
     272       302922 :                   CALL cp_heap_reset_first(bin_heap_fill, bin_price)
     273              :                END IF
     274              :             END DO
     275              : 
     276              :          END DO
     277              : 
     278        32642 :          IF (ANY(workload_fill /= workload_count)) &
     279            0 :             CPABORT("Inconsistent heaps encountered")
     280              : 
     281        11229 :          CALL cp_heap_release(bin_heap_count)
     282        11229 :          CALL cp_heap_release(bin_heap_fill)
     283              : 
     284              : !   *** Create the local molecule structure ***
     285              : 
     286              :          CALL distribution_1d_create(local_molecules, &
     287              :                                      n_el=nmolecule_local, &
     288        11229 :                                      para_env=logger%para_env)
     289              : 
     290              : !   *** Create the local particle structure ***
     291              : 
     292              :          CALL distribution_1d_create(local_particles, &
     293              :                                      n_el=nparticle_local, &
     294        11229 :                                      para_env=logger%para_env)
     295              : 
     296              : !   *** Store the generated local molecule and particle distributions ***
     297              : 
     298        39133 :          nparticle_local(:) = 0
     299              : 
     300       150368 :          DO imolecule_kind = 1, nmolecule_kind
     301              : 
     302       139139 :             IF (nmolecule_local(imolecule_kind) == 0) CYCLE
     303              : 
     304              :             local_molecules%list(imolecule_kind)%array(:) = &
     305       243841 :                local_molecule(imolecule_kind)%array(:)
     306              : 
     307        74686 :             molecule_kind => molecule_kind_set(imolecule_kind)
     308              : 
     309              :             CALL get_molecule_kind(molecule_kind=molecule_kind, &
     310        74686 :                                    natom=natom)
     311              : 
     312       255070 :             DO imolecule = 1, nmolecule_local(imolecule_kind)
     313       169155 :                molecule_a = local_molecule(imolecule_kind)%array(imolecule)
     314       723620 :                DO iatom = 1, natom
     315       415326 :                   atom_a = molecule_set(molecule_a)%first_atom + iatom - 1
     316              :                   CALL get_atomic_kind(atomic_kind=particle_set(atom_a)%atomic_kind, &
     317       415326 :                                        kind_number=kind_a)
     318       415326 :                   nparticle_local(kind_a) = nparticle_local(kind_a) + 1
     319       584481 :                   local_particles%list(kind_a)%array(nparticle_local(kind_a)) = atom_a
     320              :                END DO
     321              :             END DO
     322              : 
     323              :          END DO
     324              : 
     325              : !   *** Print distribution, if requested ***
     326              : 
     327        11229 :          IF (BTEST(cp_print_key_should_output(logger%iter_info, &
     328        33687 :                                               force_env_section, "PRINT%DISTRIBUTION1D"), cp_p_file)) THEN
     329              : 
     330              :             output_unit = cp_print_key_unit_nr(logger, force_env_section, "PRINT%DISTRIBUTION1D", &
     331          130 :                                                extension=".Log")
     332              : 
     333          130 :             iw = output_unit
     334          130 :             IF (output_unit < 0) iw = cp_logger_get_default_unit_nr(logger, LOCAL=.TRUE.)
     335              : 
     336              : !     *** Print molecule distribution ***
     337              : 
     338          390 :             ALLOCATE (work(npe))
     339          390 :             work(:) = 0
     340              : 
     341          416 :             work(mype) = SUM(nmolecule_local)
     342          130 :             CALL group%sum(work)
     343              : 
     344          130 :             IF (output_unit > 0) THEN
     345              :                WRITE (UNIT=output_unit, &
     346              :                       FMT="(/, T2, A, T51, A, /, (T52, I6, T73, I8))") &
     347           65 :                   "DISTRIBUTION OF THE MOLECULES", &
     348           65 :                   "Process    Number of molecules", &
     349          260 :                   (ipe - 1, work(ipe), ipe=1, npe)
     350              :                WRITE (UNIT=output_unit, FMT="(T55, A3, T73, I8)") &
     351          195 :                   "Sum", SUM(work)
     352           65 :                CALL m_flush(output_unit)
     353              :             END IF
     354              : 
     355          130 :             CALL group%sync()
     356              : 
     357          390 :             DO ipe = 1, npe
     358          260 :                IF (ipe == mype) THEN
     359              :                   WRITE (UNIT=iw, FMT="(/, T3, A)") &
     360          130 :                      "Process   Kind   Local molecules (global indices)"
     361          416 :                   DO imolecule_kind = 1, nmolecule_kind
     362          416 :                      IF (imolecule_kind == 1) THEN
     363              :                         WRITE (UNIT=iw, FMT="(T4, I6, 2X, I5, (T21, 10I6))") &
     364          130 :                            ipe - 1, imolecule_kind, &
     365          330 :                            (local_molecules%list(imolecule_kind)%array(imolecule), &
     366          590 :                             imolecule=1, nmolecule_local(imolecule_kind))
     367              :                      ELSE
     368              :                         WRITE (UNIT=iw, FMT="(T12, I5, (T21, 10I6))") &
     369          156 :                            imolecule_kind, &
     370          234 :                            (local_molecules%list(imolecule_kind)%array(imolecule), &
     371          546 :                             imolecule=1, nmolecule_local(imolecule_kind))
     372              :                      END IF
     373              :                   END DO
     374              :                END IF
     375          260 :                CALL m_flush(iw)
     376          390 :                CALL group%sync()
     377              :             END DO
     378              : 
     379              : !     *** Print particle distribution ***
     380              : 
     381          390 :             work(:) = 0
     382              : 
     383          436 :             work(mype) = SUM(nparticle_local)
     384          130 :             CALL group%sum(work)
     385              : 
     386          130 :             IF (output_unit > 0) THEN
     387              :                WRITE (UNIT=output_unit, &
     388              :                       FMT="(/, T2, A, T51, A, /, (T52, I6, T73, I8))") &
     389           65 :                   "DISTRIBUTION OF THE PARTICLES", &
     390           65 :                   "Process    Number of particles", &
     391          260 :                   (ipe - 1, work(ipe), ipe=1, npe)
     392              :                WRITE (UNIT=output_unit, FMT="(T55, A3, T73, I8)") &
     393          195 :                   "Sum", SUM(work)
     394           65 :                CALL m_flush(output_unit)
     395              :             END IF
     396              : 
     397          130 :             CALL group%sync()
     398              : 
     399          390 :             DO ipe = 1, npe
     400          260 :                IF (ipe == mype) THEN
     401              :                   WRITE (UNIT=iw, FMT="(/, T3, A)") &
     402          130 :                      "Process   Kind   Local particles (global indices)"
     403          436 :                   DO iparticle_kind = 1, nparticle_kind
     404          436 :                      IF (iparticle_kind == 1) THEN
     405              :                         WRITE (UNIT=iw, FMT="(T4, I6, 2X, I5, (T20, 10I6))") &
     406          130 :                            ipe - 1, iparticle_kind, &
     407          586 :                            (local_particles%list(iparticle_kind)%array(iatom), &
     408          846 :                             iatom=1, nparticle_local(iparticle_kind))
     409              :                      ELSE
     410              :                         WRITE (UNIT=iw, FMT="(T12, I5, (T20, 10I6))") &
     411          176 :                            iparticle_kind, &
     412         1084 :                            (local_particles%list(iparticle_kind)%array(iatom), &
     413         1436 :                             iatom=1, nparticle_local(iparticle_kind))
     414              :                      END IF
     415              :                   END DO
     416              :                END IF
     417          260 :                CALL m_flush(iw)
     418          390 :                CALL group%sync()
     419              :             END DO
     420          130 :             DEALLOCATE (work)
     421              : 
     422              :             CALL cp_print_key_finished_output(output_unit, logger, force_env_section, &
     423          130 :                                               "PRINT%DISTRIBUTION1D")
     424              :          END IF
     425              :       END ASSOCIATE
     426              : !   *** Release work storage ***
     427              : 
     428        11229 :       DEALLOCATE (workload_count)
     429              : 
     430        11229 :       DEALLOCATE (workload_fill)
     431              : 
     432        11229 :       DEALLOCATE (nmolecule_local)
     433              : 
     434        11229 :       DEALLOCATE (nparticle_local)
     435              : 
     436       150368 :       DO imolecule_kind = 1, nmolecule_kind
     437       150368 :          IF (ASSOCIATED(local_molecule(imolecule_kind)%array)) THEN
     438        74686 :             DEALLOCATE (local_molecule(imolecule_kind)%array)
     439              :          END IF
     440              :       END DO
     441        11229 :       DEALLOCATE (local_molecule)
     442              : 
     443        11229 :       CALL timestop(handle)
     444              : 
     445        22458 :    END SUBROUTINE distribute_molecules_1d
     446              : 
     447              : ! **************************************************************************************************
     448              : !> \brief Distributes the particle pairs creating a 2d distribution optimally
     449              : !>      suited for quickstep
     450              : !> \param cell ...
     451              : !> \param atomic_kind_set ...
     452              : !> \param particle_set ...
     453              : !> \param qs_kind_set ...
     454              : !> \param molecule_kind_set ...
     455              : !> \param molecule_set ...
     456              : !> \param distribution_2d the distribution that will be created by this
     457              : !>                         method
     458              : !> \param blacs_env the parallel environment at the basis of the
     459              : !>                   distribution
     460              : !> \param force_env_section ...
     461              : !> \par History
     462              : !>      - local_rows & cols blocksize optimizations (Aug. 2003, MK)
     463              : !>      - cleanup of distribution_2d (Sep. 2003, fawzi)
     464              : !>      - update for molecules (Oct. 2003, MK)
     465              : !> \author fawzi (Feb. 2003)
     466              : !> \note
     467              : !>      Intermediate generation of a 2d distribution of the molecules, but
     468              : !>      only the corresponding particle (atomic) distribution is currently
     469              : !>      used. The 2d distribution of the molecules is deleted, but may easily
     470              : !>      be recovered (MK).
     471              : ! **************************************************************************************************
     472         8408 :    SUBROUTINE distribute_molecules_2d(cell, atomic_kind_set, particle_set, &
     473              :                                       qs_kind_set, molecule_kind_set, molecule_set, &
     474              :                                       distribution_2d, blacs_env, force_env_section)
     475              :       TYPE(cell_type), POINTER                           :: cell
     476              :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     477              :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     478              :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     479              :       TYPE(molecule_kind_type), DIMENSION(:), POINTER    :: molecule_kind_set
     480              :       TYPE(molecule_type), DIMENSION(:), POINTER         :: molecule_set
     481              :       TYPE(distribution_2d_type), POINTER                :: distribution_2d
     482              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     483              :       TYPE(section_vals_type), POINTER                   :: force_env_section
     484              : 
     485              :       CHARACTER(len=*), PARAMETER :: routineN = 'distribute_molecules_2d'
     486              : 
     487              :       INTEGER :: cluster_price, cost_model, handle, iatom, iatom_mol, iatom_one, ikind, imol, &
     488              :          imolecule, imolecule_kind, iparticle_kind, ipcol, iprow, iw, kind_a, n, natom, natom_mol, &
     489              :          nclusters, nmolecule, nmolecule_kind, nparticle_kind, nsgf, output_unit
     490         8408 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cluster_list, cluster_prices, &
     491         8408 :                                                             nparticle_local_col, &
     492         8408 :                                                             nparticle_local_row, work
     493         8408 :       INTEGER, DIMENSION(:), POINTER                     :: lmax_basis, molecule_list
     494         8408 :       INTEGER, DIMENSION(:, :), POINTER                  :: cluster_col_distribution, &
     495         8408 :                                                             cluster_row_distribution, &
     496         8408 :                                                             col_distribution, row_distribution
     497              :       LOGICAL :: basic_cluster_optimization, basic_optimization, basic_spatial_optimization, &
     498              :          molecular_distribution, skip_optimization
     499         8408 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: coords, pbc_scaled_coords
     500              :       REAL(KIND=dp), DIMENSION(3)                        :: center
     501         8408 :       TYPE(cp_1d_i_p_type), DIMENSION(:), POINTER        :: local_particle_col, local_particle_row
     502              :       TYPE(cp_logger_type), POINTER                      :: logger
     503              :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis_set
     504              :       TYPE(molecule_kind_type), POINTER                  :: molecule_kind
     505              :       TYPE(section_vals_type), POINTER                   :: distribution_section
     506              : 
     507              : !...
     508              : 
     509         8408 :       CALL timeset(routineN, handle)
     510              : 
     511         8408 :       logger => cp_get_default_logger()
     512              : 
     513         8408 :       distribution_section => section_vals_get_subs_vals(force_env_section, "DFT%QS%DISTRIBUTION")
     514              : 
     515         8408 :       CALL section_vals_val_get(distribution_section, "2D_MOLECULAR_DISTRIBUTION", l_val=molecular_distribution)
     516         8408 :       CALL section_vals_val_get(distribution_section, "SKIP_OPTIMIZATION", l_val=skip_optimization)
     517         8408 :       CALL section_vals_val_get(distribution_section, "BASIC_OPTIMIZATION", l_val=basic_optimization)
     518         8408 :       CALL section_vals_val_get(distribution_section, "BASIC_SPATIAL_OPTIMIZATION", l_val=basic_spatial_optimization)
     519         8408 :       CALL section_vals_val_get(distribution_section, "BASIC_CLUSTER_OPTIMIZATION", l_val=basic_cluster_optimization)
     520              : 
     521         8408 :       CALL section_vals_val_get(distribution_section, "COST_MODEL", i_val=cost_model)
     522              :       !
     523              : 
     524              :       ASSOCIATE (group => blacs_env%para_env, myprow => blacs_env%mepos(1) + 1, mypcol => blacs_env%mepos(2) + 1, &
     525              :                  nprow => blacs_env%num_pe(1), npcol => blacs_env%num_pe(2))
     526              : 
     527         8408 :          nmolecule_kind = SIZE(molecule_kind_set)
     528         8408 :          CALL get_molecule_kind_set(molecule_kind_set, nmolecule=nmolecule)
     529              : 
     530         8408 :          nparticle_kind = SIZE(atomic_kind_set)
     531         8408 :          CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, natom=natom)
     532              : 
     533              :          !
     534              :          ! we need to generate two representations of the distribution, one as a straight array with global particles
     535              :          ! one ordered wrt to kinds and only listing the local particles
     536              :          !
     537        25224 :          ALLOCATE (row_distribution(natom, 2))
     538        16816 :          ALLOCATE (col_distribution(natom, 2))
     539              :          ! Initialize the distributions to -1, as the second dimension only gets set with cluster optimization
     540              :          ! but the information is needed by dbcsr
     541       239356 :          row_distribution = -1; col_distribution = -1
     542              : 
     543        41291 :          ALLOCATE (local_particle_col(nparticle_kind))
     544        32883 :          ALLOCATE (local_particle_row(nparticle_kind))
     545        25224 :          ALLOCATE (nparticle_local_row(nparticle_kind))
     546        16816 :          ALLOCATE (nparticle_local_col(nparticle_kind))
     547              : 
     548         8408 :          IF (basic_optimization .OR. basic_spatial_optimization .OR. basic_cluster_optimization) THEN
     549              : 
     550         8408 :             IF (molecular_distribution) THEN
     551            2 :                nclusters = nmolecule
     552              :             ELSE
     553              :                nclusters = natom
     554              :             END IF
     555              : 
     556        25224 :             ALLOCATE (cluster_list(nclusters))
     557        16816 :             ALLOCATE (cluster_prices(nclusters))
     558        25224 :             ALLOCATE (cluster_row_distribution(nclusters, 2))
     559        16816 :             ALLOCATE (cluster_col_distribution(nclusters, 2))
     560       247732 :             cluster_row_distribution = -1; cluster_col_distribution = -1
     561              : 
     562              :             ! Fill in the clusters and their prices
     563         8408 :             CALL section_vals_val_get(distribution_section, "COST_MODEL", i_val=cost_model)
     564         8408 :             IF (.NOT. molecular_distribution) THEN
     565        57723 :                DO iatom = 1, natom
     566        49317 :                   IF (iatom > nclusters) &
     567            0 :                      CPABORT("Bounds error")
     568        49317 :                   CALL get_atomic_kind(particle_set(iatom)%atomic_kind, kind_number=ikind)
     569        49317 :                   cluster_list(iatom) = iatom
     570        49317 :                   SELECT CASE (cost_model)
     571              :                   CASE (model_block_count)
     572        49317 :                      CALL get_qs_kind(qs_kind_set(ikind), nsgf=nsgf)
     573        49317 :                      cluster_price = nsgf
     574              :                   CASE (model_block_lmax)
     575            0 :                      CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set)
     576            0 :                      CALL get_gto_basis_set(orb_basis_set, lmax=lmax_basis)
     577            0 :                      cluster_price = MAXVAL(lmax_basis)
     578              :                   CASE default
     579            0 :                      CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set)
     580            0 :                      CALL get_gto_basis_set(orb_basis_set, lmax=lmax_basis)
     581        49317 :                      cluster_price = 8 + (MAXVAL(lmax_basis)**2)
     582              :                   END SELECT
     583       107040 :                   cluster_prices(iatom) = cluster_price
     584              :                END DO
     585              :             ELSE
     586              :                imol = 0
     587            4 :                DO imolecule_kind = 1, nmolecule_kind
     588            2 :                   molecule_kind => molecule_kind_set(imolecule_kind)
     589            2 :                   CALL get_molecule_kind(molecule_kind=molecule_kind, molecule_list=molecule_list, natom=natom_mol)
     590            8 :                   DO imolecule = 1, SIZE(molecule_list)
     591            4 :                      imol = imol + 1
     592            4 :                      cluster_list(imol) = imol
     593            4 :                      cluster_price = 0
     594           16 :                      DO iatom_mol = 1, natom_mol
     595           12 :                         iatom = molecule_set(molecule_list(imolecule))%first_atom + iatom_mol - 1
     596           12 :                         CALL get_atomic_kind(particle_set(iatom)%atomic_kind, kind_number=ikind)
     597           16 :                         SELECT CASE (cost_model)
     598              :                         CASE (model_block_count)
     599           12 :                            CALL get_qs_kind(qs_kind_set(ikind), nsgf=nsgf)
     600           12 :                            cluster_price = cluster_price + nsgf
     601              :                         CASE (model_block_lmax)
     602            0 :                            CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set)
     603            0 :                            CALL get_gto_basis_set(orb_basis_set, lmax=lmax_basis)
     604            0 :                            cluster_price = cluster_price + MAXVAL(lmax_basis)
     605              :                         CASE default
     606            0 :                            CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set)
     607            0 :                            CALL get_gto_basis_set(orb_basis_set, lmax=lmax_basis)
     608           12 :                            cluster_price = cluster_price + 8 + (MAXVAL(lmax_basis)**2)
     609              :                         END SELECT
     610              :                      END DO
     611            6 :                      cluster_prices(imol) = cluster_price
     612              :                   END DO
     613              :                END DO
     614              :             END IF
     615              : 
     616              :             ! And distribute
     617         8408 :             IF (basic_optimization) THEN
     618              :                CALL make_basic_distribution(cluster_list, cluster_prices, &
     619         8240 :                                             nprow, cluster_row_distribution(:, 1), npcol, cluster_col_distribution(:, 1))
     620              :             ELSE
     621          168 :                IF (basic_cluster_optimization) THEN
     622            4 :                   IF (molecular_distribution) &
     623            0 :                      CPABORT("clustering and molecular blocking NYI")
     624           16 :                   ALLOCATE (pbc_scaled_coords(3, natom), coords(3, natom))
     625          118 :                   DO iatom = 1, natom
     626          114 :                      CALL real_to_scaled(pbc_scaled_coords(:, iatom), pbc(particle_set(iatom)%r(:), cell), cell)
     627          118 :                      coords(:, iatom) = pbc(particle_set(iatom)%r(:), cell)
     628              :                   END DO
     629              :                   CALL make_cluster_distribution(coords, pbc_scaled_coords, cell, cluster_prices, &
     630            4 :                                                  nprow, cluster_row_distribution, npcol, cluster_col_distribution)
     631              :                ELSE ! basic_spatial_optimization
     632          492 :                   ALLOCATE (pbc_scaled_coords(3, nclusters))
     633          164 :                   IF (.NOT. molecular_distribution) THEN
     634              :                      ! just scaled coords
     635         2658 :                      DO iatom = 1, natom
     636         2658 :                         CALL real_to_scaled(pbc_scaled_coords(:, iatom), pbc(particle_set(iatom)%r(:), cell), cell)
     637              :                      END DO
     638              :                   ELSE
     639              :                      ! use scaled coords of geometric center, folding when appropriate
     640              :                      imol = 0
     641            0 :                      DO imolecule_kind = 1, nmolecule_kind
     642            0 :                         molecule_kind => molecule_kind_set(imolecule_kind)
     643            0 :                         CALL get_molecule_kind(molecule_kind=molecule_kind, molecule_list=molecule_list, natom=natom_mol)
     644            0 :                         DO imolecule = 1, SIZE(molecule_list)
     645            0 :                            imol = imol + 1
     646            0 :                            iatom_one = molecule_set(molecule_list(imolecule))%first_atom
     647            0 :                            center = 0.0_dp
     648            0 :                            DO iatom_mol = 1, natom_mol
     649            0 :                               iatom = molecule_set(molecule_list(imolecule))%first_atom + iatom_mol - 1
     650              :                               center = center + &
     651            0 :                                    pbc(particle_set(iatom)%r(:) - particle_set(iatom_one)%r(:), cell) + particle_set(iatom_one)%r(:)
     652              :                            END DO
     653            0 :                            center = center/natom_mol
     654            0 :                            CALL real_to_scaled(pbc_scaled_coords(:, imol), pbc(center, cell), cell)
     655              :                         END DO
     656              :                      END DO
     657              :                   END IF
     658              : 
     659              :                   CALL make_basic_spatial_distribution(pbc_scaled_coords, cluster_prices, &
     660          164 :                                                        nprow, cluster_row_distribution(:, 1), npcol, cluster_col_distribution(:, 1))
     661              : 
     662          164 :                   DEALLOCATE (pbc_scaled_coords)
     663              :                END IF
     664              :             END IF
     665              : 
     666              :             ! And assign back
     667         8408 :             IF (.NOT. molecular_distribution) THEN
     668       247704 :                row_distribution = cluster_row_distribution
     669       247704 :                col_distribution = cluster_col_distribution
     670              :             ELSE
     671              :                imol = 0
     672            4 :                DO imolecule_kind = 1, nmolecule_kind
     673            2 :                   molecule_kind => molecule_kind_set(imolecule_kind)
     674            2 :                   CALL get_molecule_kind(molecule_kind=molecule_kind, molecule_list=molecule_list, natom=natom_mol)
     675            8 :                   DO imolecule = 1, SIZE(molecule_list)
     676            4 :                      imol = imol + 1
     677           18 :                      DO iatom_mol = 1, natom_mol
     678           12 :                         iatom = molecule_set(molecule_list(imolecule))%first_atom + iatom_mol - 1
     679           72 :                         row_distribution(iatom, :) = cluster_row_distribution(imol, :)
     680           76 :                         col_distribution(iatom, :) = cluster_col_distribution(imol, :)
     681              :                      END DO
     682              :                   END DO
     683              :                END DO
     684              :             END IF
     685              : 
     686              :             ! cleanup
     687         8408 :             DEALLOCATE (cluster_list)
     688         8408 :             DEALLOCATE (cluster_prices)
     689         8408 :             DEALLOCATE (cluster_row_distribution)
     690        16816 :             DEALLOCATE (cluster_col_distribution)
     691              : 
     692              :          ELSE
     693              :             ! expects nothing else
     694            0 :             CPABORT("")
     695              :          END IF
     696              : 
     697              :          ! prepare the lists of local particles
     698              : 
     699              :          ! count local particles of a given kind
     700        24475 :          nparticle_local_col = 0
     701        24475 :          nparticle_local_row = 0
     702        57737 :          DO iatom = 1, natom
     703        49329 :             CALL get_atomic_kind(atomic_kind=particle_set(iatom)%atomic_kind, kind_number=kind_a)
     704        49329 :             IF (row_distribution(iatom, 1) == myprow) nparticle_local_row(kind_a) = nparticle_local_row(kind_a) + 1
     705       107066 :             IF (col_distribution(iatom, 1) == mypcol) nparticle_local_col(kind_a) = nparticle_local_col(kind_a) + 1
     706              :          END DO
     707              : 
     708              :          ! allocate space
     709        24475 :          DO iparticle_kind = 1, nparticle_kind
     710        16067 :             n = nparticle_local_row(iparticle_kind)
     711        43444 :             ALLOCATE (local_particle_row(iparticle_kind)%array(n))
     712              : 
     713        16067 :             n = nparticle_local_col(iparticle_kind)
     714        56609 :             ALLOCATE (local_particle_col(iparticle_kind)%array(n))
     715              :          END DO
     716              : 
     717              :          ! store
     718        24475 :          nparticle_local_col = 0
     719        24475 :          nparticle_local_row = 0
     720        57737 :          DO iatom = 1, natom
     721        49329 :             CALL get_atomic_kind(atomic_kind=particle_set(iatom)%atomic_kind, kind_number=kind_a)
     722        49329 :             IF (row_distribution(iatom, 1) == myprow) THEN
     723        25693 :                nparticle_local_row(kind_a) = nparticle_local_row(kind_a) + 1
     724        25693 :                local_particle_row(kind_a)%array(nparticle_local_row(kind_a)) = iatom
     725              :             END IF
     726       107066 :             IF (col_distribution(iatom, 1) == mypcol) THEN
     727        49329 :                nparticle_local_col(kind_a) = nparticle_local_col(kind_a) + 1
     728        49329 :                local_particle_col(kind_a)%array(nparticle_local_col(kind_a)) = iatom
     729              :             END IF
     730              :          END DO
     731              : 
     732              : !   *** Generate the 2d distribution structure  but take care of the zero offsets required
     733        57737 :          row_distribution(:, 1) = row_distribution(:, 1) - 1
     734        57737 :          col_distribution(:, 1) = col_distribution(:, 1) - 1
     735              :          CALL distribution_2d_create(distribution_2d, &
     736              :                                      row_distribution_ptr=row_distribution, &
     737              :                                      col_distribution_ptr=col_distribution, &
     738              :                                      local_rows_ptr=local_particle_row, &
     739              :                                      local_cols_ptr=local_particle_col, &
     740         8408 :                                      blacs_env=blacs_env)
     741              : 
     742         8408 :          NULLIFY (local_particle_row)
     743         8408 :          NULLIFY (local_particle_col)
     744         8408 :          NULLIFY (row_distribution)
     745         8408 :          NULLIFY (col_distribution)
     746              : 
     747              : !   *** Print distribution, if requested ***
     748         8408 :          IF (BTEST(cp_print_key_should_output(logger%iter_info, &
     749         8408 :                                               force_env_section, "PRINT%DISTRIBUTION"), cp_p_file)) THEN
     750              : 
     751              :             output_unit = cp_print_key_unit_nr(logger, force_env_section, "PRINT%DISTRIBUTION", &
     752           80 :                                                extension=".Log")
     753              : 
     754              : !     *** Print row distribution ***
     755              : 
     756          240 :             ALLOCATE (work(nprow))
     757          236 :             work(:) = 0
     758              : 
     759          228 :             IF (mypcol == 1) work(myprow) = SUM(distribution_2d%n_local_rows)
     760              : 
     761           80 :             CALL group%sum(work)
     762              : 
     763           80 :             IF (output_unit > 0) THEN
     764              :                WRITE (UNIT=output_unit, &
     765              :                       FMT="(/, T2, A, /, T15, A, /, (T16, I10, T41, I10, T71, I10))") &
     766           40 :                   "DISTRIBUTION OF THE PARTICLES (ROWS)", &
     767           40 :                   "Process row      Number of particles         Number of matrix rows", &
     768          158 :                   (iprow - 1, work(iprow), -1, iprow=1, nprow)
     769              :                WRITE (UNIT=output_unit, FMT="(T23, A3, T41, I10, T71, I10)") &
     770          118 :                   "Sum", SUM(work), -1
     771           40 :                CALL m_flush(output_unit)
     772              :             END IF
     773              : 
     774           80 :             DEALLOCATE (work)
     775              : 
     776              : !     *** Print column distribution ***
     777              : 
     778          240 :             ALLOCATE (work(npcol))
     779          160 :             work(:) = 0
     780              : 
     781          156 :             IF (myprow == 1) work(mypcol) = SUM(distribution_2d%n_local_cols)
     782              : 
     783           80 :             CALL group%sum(work)
     784              : 
     785           80 :             IF (output_unit > 0) THEN
     786              :                WRITE (UNIT=output_unit, &
     787              :                       FMT="(/, T2, A, /, T15, A, /, (T16, I10, T41, I10, T71, I10))") &
     788           40 :                   "DISTRIBUTION OF THE PARTICLES (COLUMNS)", &
     789           40 :                   "Process col      Number of particles      Number of matrix columns", &
     790          120 :                   (ipcol - 1, work(ipcol), -1, ipcol=1, npcol)
     791              :                WRITE (UNIT=output_unit, FMT="(T23, A3, T41, I10, T71, I10)") &
     792           80 :                   "Sum", SUM(work), -1
     793           40 :                CALL m_flush(output_unit)
     794              :             END IF
     795              : 
     796           80 :             DEALLOCATE (work)
     797              : 
     798              :             CALL cp_print_key_finished_output(output_unit, logger, force_env_section, &
     799           80 :                                               "PRINT%DISTRIBUTION")
     800              :          END IF
     801              :       END ASSOCIATE
     802              : 
     803         8408 :       IF (BTEST(cp_print_key_should_output(logger%iter_info, &
     804              :                                            force_env_section, "PRINT%DISTRIBUTION2D"), cp_p_file)) THEN
     805              : 
     806           70 :          iw = cp_logger_get_default_unit_nr(logger, LOCAL=.TRUE.)
     807              :          CALL distribution_2d_write(distribution_2d, &
     808              :                                     unit_nr=iw, &
     809              :                                     local=.TRUE., &
     810           70 :                                     long_description=.TRUE.)
     811              : 
     812              :       END IF
     813              : 
     814              : !   *** Release work storage ***
     815              : 
     816         8408 :       DEALLOCATE (nparticle_local_row)
     817              : 
     818         8408 :       DEALLOCATE (nparticle_local_col)
     819              : 
     820         8408 :       CALL timestop(handle)
     821              : 
     822        25224 :    END SUBROUTINE distribute_molecules_2d
     823              : 
     824              : ! **************************************************************************************************
     825              : !> \brief Creates a basic distribution
     826              : !> \param cluster_list ...
     827              : !> \param cluster_prices ...
     828              : !> \param nprows ...
     829              : !> \param row_distribution ...
     830              : !> \param npcols ...
     831              : !> \param col_distribution ...
     832              : !> \par History
     833              : !> - Created 2010-08-06 UB
     834              : ! **************************************************************************************************
     835         8240 :    SUBROUTINE make_basic_distribution(cluster_list, cluster_prices, &
     836         8240 :                                       nprows, row_distribution, npcols, col_distribution)
     837              :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: cluster_list, cluster_prices
     838              :       INTEGER, INTENT(IN)                                :: nprows
     839              :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: row_distribution
     840              :       INTEGER, INTENT(IN)                                :: npcols
     841              :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: col_distribution
     842              : 
     843              :       CHARACTER(len=*), PARAMETER :: routineN = 'make_basic_distribution'
     844              : 
     845              :       INTEGER                                            :: bin, cluster, cluster_index, &
     846              :                                                             cluster_price, nbins, nclusters, pcol, &
     847              :                                                             pgrid_gcd, prow, timing_handle
     848              :       INTEGER(int_8)                                     :: bin_price
     849              :       LOGICAL                                            :: found
     850              :       TYPE(cp_heap_type)                                 :: bin_heap
     851              : 
     852              : !   ---------------------------------------------------------------------------
     853              : 
     854         8240 :       CALL timeset(routineN, timing_handle)
     855         8240 :       nbins = lcm(nprows, npcols)
     856         8240 :       pgrid_gcd = gcd(nprows, npcols)
     857         8240 :       CALL sort(cluster_prices, SIZE(cluster_list), cluster_list)
     858         8240 :       CALL cp_heap_new(bin_heap, nbins)
     859        39776 :       CALL cp_heap_fill(bin_heap, [(0_int_8, bin=1, nbins)])
     860              :       !
     861         8240 :       nclusters = SIZE(cluster_list)
     862              :       ! Put the most expensive cluster in the bin with the smallest
     863              :       ! price and repeat.
     864        54953 :       DO cluster_index = nclusters, 1, -1
     865        46713 :          cluster = cluster_list(cluster_index)
     866        46713 :          CALL cp_heap_get_first(bin_heap, bin, bin_price, found)
     867        46713 :          IF (.NOT. found) &
     868            0 :             CPABORT("No topmost heap element found.")
     869              :          !
     870        46713 :          prow = INT((bin - 1)*pgrid_gcd/npcols)
     871        46713 :          IF (prow >= nprows) &
     872            0 :             CPABORT("Invalid process row.")
     873        46713 :          pcol = INT((bin - 1)*pgrid_gcd/nprows)
     874        46713 :          IF (pcol >= npcols) &
     875            0 :             CPABORT("Invalid process column.")
     876        46713 :          row_distribution(cluster) = prow + 1
     877        46713 :          col_distribution(cluster) = pcol + 1
     878              :          !
     879        46713 :          cluster_price = cluster_prices(cluster_index)
     880        46713 :          bin_price = bin_price + cluster_price
     881       101666 :          CALL cp_heap_reset_first(bin_heap, bin_price)
     882              :       END DO
     883         8240 :       CALL cp_heap_release(bin_heap)
     884         8240 :       CALL timestop(timing_handle)
     885         8240 :    END SUBROUTINE make_basic_distribution
     886              : 
     887              : ! **************************************************************************************************
     888              : !> \brief Creates a basic spatial distribution
     889              : !>        that tries to make the corresponding blocks as homogeneous as possible
     890              : !> \param pbc_scaled_coords ...
     891              : !> \param costs ...
     892              : !> \param nprows ...
     893              : !> \param row_distribution ...
     894              : !> \param npcols ...
     895              : !> \param col_distribution ...
     896              : !> \par History
     897              : !> - Created 2010-11-11 Joost VandeVondele
     898              : ! **************************************************************************************************
     899          164 :    SUBROUTINE make_basic_spatial_distribution(pbc_scaled_coords, costs, &
     900          164 :                                               nprows, row_distribution, npcols, col_distribution)
     901              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: pbc_scaled_coords
     902              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: costs
     903              :       INTEGER, INTENT(IN)                                :: nprows
     904              :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: row_distribution
     905              :       INTEGER, INTENT(IN)                                :: npcols
     906              :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: col_distribution
     907              : 
     908              :       CHARACTER(len=*), PARAMETER :: routineN = 'make_basic_spatial_distribution'
     909              : 
     910              :       INTEGER                                            :: handle, iatom, natoms, nbins, pgrid_gcd
     911              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: bin_costs, distribution
     912              : 
     913          164 :       CALL timeset(routineN, handle)
     914              : 
     915          164 :       natoms = SIZE(costs)
     916          164 :       nbins = lcm(nprows, npcols)
     917          164 :       pgrid_gcd = gcd(nprows, npcols)
     918          820 :       ALLOCATE (bin_costs(nbins), distribution(natoms))
     919          492 :       bin_costs = 0
     920              : 
     921         5152 :       CALL spatial_recurse(pbc_scaled_coords, costs, [(iatom, iatom=1, natoms)], bin_costs, distribution, 0)
     922              : 
     923              :       ! WRITE(*, *) "Final bin costs: ", bin_costs
     924              : 
     925              :       ! final row_distribution / col_distribution
     926         2658 :       DO iatom = 1, natoms
     927         2494 :          row_distribution(iatom) = (distribution(iatom) - 1)*pgrid_gcd/npcols + 1
     928         2658 :          col_distribution(iatom) = (distribution(iatom) - 1)*pgrid_gcd/nprows + 1
     929              :       END DO
     930              : 
     931          164 :       DEALLOCATE (bin_costs, distribution)
     932              : 
     933          164 :       CALL timestop(handle)
     934              : 
     935          164 :    END SUBROUTINE make_basic_spatial_distribution
     936              : 
     937              : ! **************************************************************************************************
     938              : !> \brief ...
     939              : !> \param pbc_scaled_coords ...
     940              : !> \param costs ...
     941              : !> \param indices ...
     942              : !> \param bin_costs ...
     943              : !> \param distribution ...
     944              : !> \param level ...
     945              : ! **************************************************************************************************
     946         3168 :    RECURSIVE SUBROUTINE spatial_recurse(pbc_scaled_coords, costs, indices, bin_costs, distribution, level)
     947              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: pbc_scaled_coords
     948              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: costs, indices
     949              :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: bin_costs, distribution
     950              :       INTEGER, INTENT(IN)                                :: level
     951              : 
     952              :       INTEGER                                            :: iatom, ibin, natoms, nbins, nhalf
     953         3168 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_costs_sorted, atom_permutation, &
     954         3168 :                                                             bin_costs_sorted, permutation
     955         3168 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: coord
     956              : 
     957         3168 :       natoms = SIZE(costs)
     958         3168 :       nbins = SIZE(bin_costs)
     959         3168 :       nhalf = (natoms + 1)/2
     960              : 
     961         3168 :       IF (natoms <= nbins) THEN
     962              :          ! assign the most expensive atom to the least costly bin
     963         6664 :          ALLOCATE (bin_costs_sorted(nbins), permutation(nbins))
     964         4998 :          bin_costs_sorted(:) = bin_costs
     965         1666 :          CALL sort(bin_costs_sorted, nbins, permutation)
     966         6664 :          ALLOCATE (atom_costs_sorted(natoms), atom_permutation(natoms))
     967         4160 :          atom_costs_sorted(:) = costs
     968         1666 :          CALL sort(atom_costs_sorted, natoms, atom_permutation)
     969         1666 :          ibin = 0
     970              :          ! WRITE(*, *) "Dealing with a new bunch of atoms "
     971         4160 :          DO iatom = natoms, 1, -1
     972         2494 :             ibin = ibin + 1
     973              :             ! WRITE(*, *) "atom", indices(atom_permutation(iatom)), "cost", atom_costs_sorted(iatom), &
     974              :             !            "bin", permutation(ibin), "its cost", bin_costs(permutation(ibin))
     975              :             ! WRITE(100, '(A, I0, 3F12.6)') "A", permutation(ibin), pbc_scaled_coords(:, atom_permutation(iatom))
     976         2494 :             bin_costs(permutation(ibin)) = bin_costs(permutation(ibin)) + atom_costs_sorted(iatom)
     977         4160 :             distribution(indices(atom_permutation(iatom))) = permutation(ibin)
     978              :          END DO
     979         1666 :          DEALLOCATE (bin_costs_sorted, permutation, atom_costs_sorted, atom_permutation)
     980              :       ELSE
     981              :          ! divide atoms in two subsets, sorting according to their coordinates, alternatively x, y, z
     982              :          ! recursively do this for both subsets
     983         7510 :          ALLOCATE (coord(natoms), permutation(natoms))
     984        12216 :          coord(:) = pbc_scaled_coords(MOD(level, 3) + 1, :)
     985         1502 :          CALL sort(coord, natoms, permutation)
     986              :          CALL spatial_recurse(pbc_scaled_coords(:, permutation(1:nhalf)), costs(permutation(1:nhalf)), &
     987        36098 :                               indices(permutation(1:nhalf)), bin_costs, distribution, level + 1)
     988              :          CALL spatial_recurse(pbc_scaled_coords(:, permutation(nhalf + 1:)), costs(permutation(nhalf + 1:)), &
     989        31190 :                               indices(permutation(nhalf + 1:)), bin_costs, distribution, level + 1)
     990         1502 :          DEALLOCATE (coord, permutation)
     991              :       END IF
     992              : 
     993         3168 :    END SUBROUTINE spatial_recurse
     994              : 
     995              : ! **************************************************************************************************
     996              : !> \brief creates a distribution placing close by atoms into clusters and
     997              : !>        putting them on the same processors. Load balancing is
     998              : !>        performed by balancing sum of the cluster costs per processor
     999              : !> \param coords coordinates of the system
    1000              : !> \param scaled_coords scaled coordinates
    1001              : !> \param cell the cell_type
    1002              : !> \param costs costs per atomic block
    1003              : !> \param nprows number of precessors per row on the 2d grid
    1004              : !> \param row_distribution the resulting distribution over proc_rows of atomic blocks
    1005              : !> \param npcols number of precessors per col on the 2d grid
    1006              : !> \param col_distribution the resulting distribution over proc_cols of atomic blocks
    1007              : ! **************************************************************************************************
    1008            4 :    SUBROUTINE make_cluster_distribution(coords, scaled_coords, cell, costs, &
    1009            4 :                                         nprows, row_distribution, npcols, col_distribution)
    1010              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: coords, scaled_coords
    1011              :       TYPE(cell_type), POINTER                           :: cell
    1012              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: costs
    1013              :       INTEGER, INTENT(IN)                                :: nprows
    1014              :       INTEGER, DIMENSION(:, :), INTENT(OUT)              :: row_distribution
    1015              :       INTEGER, INTENT(IN)                                :: npcols
    1016              :       INTEGER, DIMENSION(:, :), INTENT(OUT)              :: col_distribution
    1017              : 
    1018              :       CHARACTER(len=*), PARAMETER :: routineN = 'make_cluster_distribution'
    1019              : 
    1020              :       INTEGER                                            :: handle, i, icluster, level, natom, &
    1021              :                                                             output_unit
    1022              :       INTEGER(KIND=int_8)                                :: ncluster
    1023              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_to_cluster, cluster_cost, &
    1024            4 :                                                             cluster_count, cluster_to_col, &
    1025              :                                                             cluster_to_row, piv_cost, proc_cost, &
    1026            4 :                                                             sorted_cost
    1027              :       REAL(KIND=dp)                                      :: fold(3)
    1028            4 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: cluster_center, cluster_high, cluster_low
    1029              : 
    1030            4 :       CALL timeset(routineN, handle)
    1031              : 
    1032            4 :       output_unit = cp_logger_get_default_io_unit()
    1033              : 
    1034            4 :       natom = SIZE(costs)
    1035          118 :       ncluster = dbcsr_distribution_get_num_images(SUM(costs), natom, nprows, npcols)
    1036           12 :       ALLOCATE (atom_to_cluster(natom))
    1037           12 :       ALLOCATE (cluster_cost(ncluster))
    1038            8 :       ALLOCATE (cluster_to_row(ncluster))
    1039            8 :       ALLOCATE (cluster_to_col(ncluster))
    1040            8 :       ALLOCATE (sorted_cost(ncluster))
    1041            8 :       ALLOCATE (piv_cost(ncluster))
    1042           16 :       cluster_cost(:) = 0
    1043              : 
    1044            4 :       icluster = 0
    1045            4 :       CALL cluster_recurse(coords, scaled_coords, cell, costs, atom_to_cluster, ncluster, icluster, cluster_cost)
    1046              : 
    1047           16 :       sorted_cost(:) = cluster_cost(:)
    1048            4 :       CALL sort(sorted_cost, INT(ncluster), piv_cost)
    1049              : 
    1050           12 :       ALLOCATE (proc_cost(nprows))
    1051           12 :       proc_cost = 0; level = 1
    1052            4 :       CALL assign_clusters(cluster_cost, piv_cost, proc_cost, cluster_to_row, nprows)
    1053              : 
    1054           12 :       DEALLOCATE (proc_cost); ALLOCATE (proc_cost(npcols))
    1055            8 :       proc_cost = 0; level = 1
    1056            4 :       CALL assign_clusters(cluster_cost, piv_cost, proc_cost, cluster_to_col, npcols)
    1057              : 
    1058          118 :       DO i = 1, natom
    1059          114 :          row_distribution(i, 1) = cluster_to_row(atom_to_cluster(i))
    1060          114 :          row_distribution(i, 2) = atom_to_cluster(i)
    1061          114 :          col_distribution(i, 1) = cluster_to_col(atom_to_cluster(i))
    1062          118 :          col_distribution(i, 2) = atom_to_cluster(i)
    1063              :       END DO
    1064              : 
    1065              :       ! generate some statistics on clusters
    1066           12 :       ALLOCATE (cluster_center(3, ncluster))
    1067            8 :       ALLOCATE (cluster_low(3, ncluster))
    1068            8 :       ALLOCATE (cluster_high(3, ncluster))
    1069            8 :       ALLOCATE (cluster_count(ncluster))
    1070           16 :       cluster_count = 0
    1071          118 :       DO i = 1, natom
    1072          114 :          cluster_count(atom_to_cluster(i)) = cluster_count(atom_to_cluster(i)) + 1
    1073          460 :          cluster_center(:, atom_to_cluster(i)) = coords(:, i)
    1074              :       END DO
    1075           52 :       cluster_low = HUGE(0.0_dp)/2
    1076           52 :       cluster_high = -HUGE(0.0_dp)/2
    1077          118 :       DO i = 1, natom
    1078          798 :          fold = pbc(coords(:, i) - cluster_center(:, atom_to_cluster(i)), cell) + cluster_center(:, atom_to_cluster(i))
    1079          456 :          cluster_low(:, atom_to_cluster(i)) = MIN(cluster_low(:, atom_to_cluster(i)), fold(:))
    1080          460 :          cluster_high(:, atom_to_cluster(i)) = MAX(cluster_high(:, atom_to_cluster(i)), fold(:))
    1081              :       END DO
    1082            4 :       IF (output_unit > 0) THEN
    1083            2 :          WRITE (output_unit, *)
    1084            2 :          WRITE (output_unit, '(T2,A)') "Cluster distribution information"
    1085            2 :          WRITE (output_unit, '(T2,A,T48,I8)') "Number of atoms", natom
    1086            2 :          WRITE (output_unit, '(T2,A,T48,I8)') "Number of clusters", ncluster
    1087            8 :          WRITE (output_unit, '(T2,A,T48,I8)') "Largest cluster in atoms", MAXVAL(cluster_count)
    1088            8 :          WRITE (output_unit, '(T2,A,T48,I8)') "Smallest cluster in atoms", MINVAL(cluster_count)
    1089            2 :          WRITE (output_unit, '(T2,A,T48,F8.3,I8)') "Largest cartesian extend [a.u.]/cluster x=", &
    1090           10 :             MAXVAL(cluster_high(1, :) - cluster_low(1, :), MASK=(cluster_count > 0)), &
    1091           14 :             MAXLOC(cluster_high(1, :) - cluster_low(1, :), MASK=(cluster_count > 0))
    1092            2 :          WRITE (output_unit, '(T2,A,T48,F8.3,I8)') "Largest cartesian extend [a.u.]/cluster y=", &
    1093           10 :             MAXVAL(cluster_high(2, :) - cluster_low(2, :), MASK=(cluster_count > 0)), &
    1094           14 :             MAXLOC(cluster_high(2, :) - cluster_low(2, :), MASK=(cluster_count > 0))
    1095            2 :          WRITE (output_unit, '(T2,A,T48,F8.3,I8)') "Largest cartesian extend [a.u.]/cluster z=", &
    1096           10 :             MAXVAL(cluster_high(3, :) - cluster_low(3, :), MASK=(cluster_count > 0)), &
    1097           14 :             MAXLOC(cluster_high(3, :) - cluster_low(3, :), MASK=(cluster_count > 0))
    1098              :       END IF
    1099              : 
    1100            4 :       DEALLOCATE (atom_to_cluster, cluster_cost, cluster_to_row, cluster_to_col, sorted_cost, piv_cost, proc_cost)
    1101            4 :       CALL timestop(handle)
    1102              : 
    1103            8 :    END SUBROUTINE make_cluster_distribution
    1104              : 
    1105              : ! **************************************************************************************************
    1106              : !> \brief assigns the clusters to processors, tryimg to balance the cost on the nodes
    1107              : !> \param cluster_cost vector with the cost of each cluster
    1108              : !> \param piv_cost pivoting vector sorting the cluster_cost
    1109              : !> \param proc_cost cost per processor, on input 0 everywhere
    1110              : !> \param cluster_assign assgnment of clusters on proc
    1111              : !> \param nproc number of processor over which clusters are distributed
    1112              : ! **************************************************************************************************
    1113            8 :    SUBROUTINE assign_clusters(cluster_cost, piv_cost, proc_cost, cluster_assign, nproc)
    1114              :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cluster_cost, piv_cost, proc_cost, &
    1115              :                                                             cluster_assign
    1116              :       INTEGER                                            :: nproc
    1117              : 
    1118              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'assign_clusters'
    1119              : 
    1120              :       INTEGER                                            :: handle, i, ilevel, offset, &
    1121           16 :                                                             piv_pcost(nproc), sort_proc_cost(nproc)
    1122              : 
    1123            8 :       CALL timeset(routineN, handle)
    1124              : 
    1125           26 :       DO ilevel = 1, SIZE(cluster_cost)/nproc
    1126           42 :          sort_proc_cost(:) = proc_cost(:)
    1127           18 :          CALL sort(sort_proc_cost, nproc, piv_pcost)
    1128              : 
    1129           18 :          offset = (SIZE(cluster_cost)/nproc - ilevel + 1)*nproc + 1
    1130           50 :          DO i = 1, nproc
    1131           24 :             cluster_assign(piv_cost(offset - i)) = piv_pcost(i)
    1132           42 :             proc_cost(piv_pcost(i)) = proc_cost(piv_pcost(i)) + cluster_cost(piv_cost(offset - i))
    1133              :          END DO
    1134              :       END DO
    1135              : 
    1136            8 :       CALL timestop(handle)
    1137              : 
    1138            8 :    END SUBROUTINE assign_clusters
    1139              : 
    1140              : ! **************************************************************************************************
    1141              : !> \brief recursive routine to cluster atoms.
    1142              : !>        Low level uses a modified KMEANS algorithm
    1143              : !>        recursion is used to reduce cost.
    1144              : !>        each level will subdivide a cluster into smaller clusters
    1145              : !>        If only a single split is necessary atoms are assigned to the current cluster
    1146              : !> \param coord coordinates of the system
    1147              : !> \param scaled_coord scaled coordinates
    1148              : !> \param cell the cell_type
    1149              : !> \param costs costs per atomic block
    1150              : !> \param cluster_inds the atom_to cluster mapping
    1151              : !> \param ncluster number of clusters still to be created on a given recursion level
    1152              : !> \param icluster the index of the current cluster to be created
    1153              : !> \param fin_cluster_cost total cost of the final clusters
    1154              : ! **************************************************************************************************
    1155           16 :    RECURSIVE SUBROUTINE cluster_recurse(coord, scaled_coord, cell, costs, cluster_inds, ncluster, icluster, fin_cluster_cost)
    1156              :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: coord, scaled_coord
    1157              :       TYPE(cell_type), POINTER                           :: cell
    1158              :       INTEGER, DIMENSION(:), INTENT(IN)                  :: costs
    1159              :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: cluster_inds
    1160              :       INTEGER(KIND=int_8), INTENT(INOUT)                 :: ncluster
    1161              :       INTEGER, INTENT(INOUT)                             :: icluster
    1162              :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: fin_cluster_cost
    1163              : 
    1164              :       INTEGER                                            :: i, ibeg, iend, maxv(1), min_seed, &
    1165              :                                                             natoms, nleft, nsplits, seed, tot_cost
    1166           16 :       INTEGER(KIND=int_8), ALLOCATABLE, DIMENSION(:)     :: ncluster_new
    1167           16 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cluster_cost, inds_tmp, nat_cluster, piv
    1168              :       LOGICAL                                            :: found
    1169              :       REAL(KIND=dp)                                      :: balance, balance_new, conv
    1170              : 
    1171           16 :       natoms = SIZE(coord, 2)
    1172              :       ! This is a bit of an arbitrary choice, simply a try to avoid too many clusters on large systems and too few for balancing on
    1173              :       ! small systems or subclusters
    1174           16 :       IF (natoms <= 1) THEN
    1175            2 :          nsplits = 1
    1176              :       ELSE
    1177           14 :          nsplits = MIN(INT(MIN(INT(MAX(6, INT(60.00/LOG(REAL(natoms, KIND=dp)))), KIND=int_8), ncluster)), natoms)
    1178              :       END IF
    1179           16 :       IF (nsplits == 1) THEN
    1180           12 :          icluster = icluster + 1
    1181          126 :          cluster_inds = icluster
    1182          126 :          fin_cluster_cost(icluster) = SUM(costs)
    1183              :       ELSE
    1184           36 :          ALLOCATE (cluster_cost(nsplits), ncluster_new(nsplits), inds_tmp(natoms), piv(natoms), nat_cluster(nsplits))
    1185              :          ! initialise some values
    1186           16 :          cluster_cost = 0; seed = 300; found = .TRUE.; min_seed = seed
    1187            4 :          CALL kmeans(nsplits, coord, scaled_coord, cell, cluster_inds, nat_cluster, seed, conv)
    1188           36 :          balance = MAXVAL(REAL(nat_cluster, KIND=dp))/MINVAL(REAL(nat_cluster, KIND=dp))
    1189              : 
    1190              :          ! If the system is small enough try to do better in terms of balancing number of atoms per cluster
    1191              :          ! by changing the seed for the initial guess
    1192            4 :          IF (natoms < 1000 .AND. balance > 1.1) THEN
    1193           24 :             found = .FALSE.
    1194           24 :             DO i = 1, 5
    1195           24 :                IF (balance > 1.1) THEN
    1196           20 :                   CALL kmeans(nsplits, coord, scaled_coord, cell, cluster_inds, nat_cluster, seed + i*40, conv)
    1197          160 :                   balance_new = MAXVAL(REAL(nat_cluster, KIND=dp))/MINVAL(REAL(nat_cluster, KIND=dp))
    1198           20 :                   IF (balance_new < balance) THEN
    1199            0 :                      balance = balance_new
    1200            0 :                      min_seed = seed + i*40;
    1201              :                   END IF
    1202              :                ELSE
    1203              :                   found = .TRUE.
    1204              :                   EXIT
    1205              :                END IF
    1206              :             END DO
    1207              :          END IF
    1208              :          !If we do not match the convergence than recompute at least the best assignment
    1209            4 :          IF (.NOT. found) CALL kmeans(nsplits, coord, scaled_coord, cell, cluster_inds, nat_cluster, min_seed, conv)
    1210              : 
    1211              :          ! compute the cost of each cluster to decide how many splits have to be performed on the next lower level
    1212          118 :          DO i = 1, natoms
    1213          118 :             cluster_cost(cluster_inds(i)) = cluster_cost(cluster_inds(i)) + costs(i)
    1214              :          END DO
    1215           16 :          tot_cost = SUM(cluster_cost)
    1216              :          ! compute new splitting, can be done more elegant
    1217           16 :          ncluster_new(:) = ncluster*cluster_cost(:)/tot_cost
    1218           16 :          nleft = INT(ncluster - SUM(ncluster_new))
    1219              :          ! As we won't have empty clusters, we can not have 0 as new size, so we correct for this at first
    1220           16 :          DO i = 1, nsplits
    1221           16 :             IF (ncluster_new(i) == 0) THEN
    1222            6 :                ncluster_new(i) = 1
    1223            6 :                nleft = nleft - 1
    1224              :             END IF
    1225              :          END DO
    1226              :          ! now comes the next part that the number of clusters will not match anymore, so try to correct in a meaningful way without
    1227              :          ! introducing 0 sized blocks again
    1228            4 :          IF (nleft /= 0) THEN
    1229            0 :             DO i = 1, ABS(nleft)
    1230            0 :                IF (nleft < 0) THEN
    1231            0 :                   maxv = MINLOC(cluster_cost/ncluster_new)
    1232            0 :                   IF (ncluster_new(maxv(1)) /= 1) THEN
    1233            0 :                      ncluster_new(maxv) = ncluster_new(maxv) - 1
    1234              :                   ELSE
    1235            0 :                      maxv = MAXLOC(ncluster_new)
    1236            0 :                      ncluster_new(maxv) = ncluster_new(maxv) - 1
    1237              :                   END IF
    1238              :                ELSE
    1239            0 :                   maxv = MAXLOC(cluster_cost/ncluster_new)
    1240            0 :                   ncluster_new(maxv) = ncluster_new(maxv) + 1
    1241              :                END IF
    1242              :             END DO
    1243              :          END IF
    1244              : 
    1245              :          !Now get the permutations to sort the atoms in the nsplits clusters for the next level of iteration
    1246          118 :          inds_tmp(:) = cluster_inds(:)
    1247            4 :          CALL sort(inds_tmp, natoms, piv)
    1248              : 
    1249            4 :          ibeg = 1; iend = 0
    1250           16 :          DO i = 1, nsplits
    1251           12 :             IF (nat_cluster(i) == 0) CYCLE
    1252           12 :             iend = iend + nat_cluster(i)
    1253              :             CALL cluster_recurse(coord(:, piv(ibeg:iend)), scaled_coord(:, piv(ibeg:iend)), cell, costs(piv(ibeg:iend)), &
    1254         1038 :                                  inds_tmp(ibeg:iend), ncluster_new(i), icluster, fin_cluster_cost)
    1255           16 :             ibeg = ibeg + nat_cluster(i)
    1256              :          END DO
    1257              :          ! copy the sorted cluster IDs on the old layout, inds_tmp gets set at the lowest level of recursion
    1258          118 :          cluster_inds(piv(:)) = inds_tmp
    1259            4 :          DEALLOCATE (cluster_cost, ncluster_new, inds_tmp, piv, nat_cluster)
    1260              : 
    1261              :       END IF
    1262              : 
    1263           16 :    END SUBROUTINE cluster_recurse
    1264              : 
    1265              : ! **************************************************************************************************
    1266              : !> \brief A modified version of the kmeans algorithm.
    1267              : !>        The assignment has a penalty function in case clusters become
    1268              : !>        larger than average. Like this more even sized clusters are created
    1269              : !>        trading it for locality
    1270              : !> \param ncent number of centers to be created
    1271              : !> \param coord coordinates
    1272              : !> \param scaled_coord scaled coord
    1273              : !> \param cell the cell_type
    1274              : !> \param cluster atom to cluster assignment
    1275              : !> \param nat_cl atoms per cluster
    1276              : !> \param seed seed for the RNG. Algorithm might need multiple tries to deliver best results
    1277              : !> \param tot_var the total variance of the clusters around the centers
    1278              : ! **************************************************************************************************
    1279           28 :    SUBROUTINE kmeans(ncent, coord, scaled_coord, cell, cluster, nat_cl, seed, tot_var)
    1280              :       INTEGER                                            :: ncent
    1281              :       REAL(KIND=dp), DIMENSION(:, :)                     :: coord, scaled_coord
    1282              :       TYPE(cell_type), POINTER                           :: cell
    1283              :       INTEGER, DIMENSION(:)                              :: cluster, nat_cl
    1284              :       INTEGER                                            :: seed
    1285              :       REAL(KIND=dp)                                      :: tot_var
    1286              : 
    1287              :       CHARACTER(len=*), PARAMETER                        :: routineN = 'kmeans'
    1288              : 
    1289              :       INTEGER                                            :: handle, i, ind, itn, j, nat, oldc
    1290              :       LOGICAL                                            :: changed
    1291           56 :       REAL(KIND=dp) :: average(3, ncent, 2), cent_coord(3, ncent), devi, deviat(ncent), dist, &
    1292           56 :          dvec(3), old_var, rn, scaled_cent(3, ncent), var_cl(ncent)
    1293           28 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: dmat
    1294              :       REAL(KIND=dp), DIMENSION(3, 2)                     :: initial_seed
    1295              :       TYPE(rng_stream_type)                              :: rng_stream
    1296              : 
    1297           28 :       CALL timeset(routineN, handle)
    1298              : 
    1299          252 :       initial_seed = REAL(seed, dp); nat = SIZE(coord, 2)
    1300          112 :       ALLOCATE (dmat(ncent, nat))
    1301              : 
    1302              :       rng_stream = rng_stream_type(name="kmeans uniform distribution [0,1]", &
    1303           28 :                                    distribution_type=UNIFORM, seed=initial_seed)
    1304              : 
    1305              : ! try to find a clever initial guess with centers being somewhat distributed
    1306           28 :       rn = rng_stream%next()
    1307           28 :       ind = CEILING(rn*nat)
    1308          112 :       cent_coord(:, 1) = coord(:, ind)
    1309           84 :       DO i = 2, ncent
    1310           28 :          DO
    1311          928 :             rn = rng_stream%next()
    1312          928 :             ind = CEILING(rn*nat)
    1313         3712 :             cent_coord(:, i) = coord(:, ind)
    1314              :             devi = HUGE(1.0_dp)
    1315         2004 :             DO j = 1, i - 1
    1316         1076 :                dvec = pbc(cent_coord(:, j), cent_coord(:, i), cell)
    1317         4304 :                dist = SQRT(DOT_PRODUCT(dvec, dvec))
    1318         2004 :                IF (dist < devi) devi = dist
    1319              :             END DO
    1320          928 :             rn = rng_stream%next()
    1321          928 :             IF (rn < devi**2/169.0) EXIT
    1322              :          END DO
    1323              :       END DO
    1324              : 
    1325              : ! Now start the KMEANS but penalise it in case it starts packing too many atoms into a single set
    1326              : ! Unfoirtunatelz as this is dependent on what happened before it cant be parallel
    1327          826 :       cluster = 0; old_var = HUGE(1.0_dp)
    1328          106 :       DO itn = 1, 1000
    1329         1210 :          changed = .FALSE.; var_cl = 0.0_dp; tot_var = 0.0_dp; nat_cl = 0; deviat = 0.0_dp
    1330              : !      !$OMP PARALLEL DO PRIVATE(i,j,dvec)
    1331         4402 :          DO i = 1, nat
    1332        21418 :             DO j = 1, ncent
    1333        17016 :                dvec = pbc(cent_coord(:, j), coord(:, i), cell)
    1334        72360 :                dmat(j, i) = DOT_PRODUCT(dvec, dvec)
    1335              :             END DO
    1336              :          END DO
    1337         4402 :          DO i = 1, nat
    1338         4296 :             devi = HUGE(1.0_dp); oldc = cluster(i)
    1339        21312 :             DO j = 1, ncent
    1340        17016 :                dist = dmat(j, i) + MAX(nat_cl(j)**2/nat*ncent, nat/ncent)
    1341        21312 :                IF (dist < devi) THEN
    1342         8760 :                   devi = dist; cluster(i) = j
    1343              :                END IF
    1344              :             END DO
    1345         4296 :             deviat(cluster(i)) = deviat(cluster(i)) + SQRT(devi)
    1346         4296 :             nat_cl(cluster(i)) = nat_cl(cluster(i)) + 1
    1347         4296 :             tot_var = tot_var + devi
    1348         4402 :             IF (oldc /= cluster(i)) changed = .TRUE.
    1349              :          END DO
    1350              :          ! get the update of the centers done, add a new one in case one center lost all its atoms
    1351              :          ! the algorithm would survive, but its nice to really create what you demand
    1352          106 :          IF (tot_var >= old_var) EXIT
    1353          106 :          IF (changed) THEN
    1354              :             ! Here misery of computing the center of geometry of the clusters in PBC.
    1355              :             ! The mapping on the unit circle allows to circumvent all problems
    1356         2506 :             average = 0.0_dp
    1357         3576 :             DO i = 1, SIZE(coord, 2)
    1358        13992 :                average(:, cluster(i), 1) = average(:, cluster(i), 1) + COS(scaled_coord(:, i)*2.0_dp*pi)
    1359        14070 :                average(:, cluster(i), 2) = average(:, cluster(i), 2) + SIN(scaled_coord(:, i)*2.0_dp*pi)
    1360              :             END DO
    1361              : 
    1362          362 :             DO i = 1, ncent
    1363          362 :                IF (nat_cl(i) == 0) THEN
    1364            0 :                   rn = rng_stream%next()
    1365            0 :                   scaled_cent(:, i) = scaled_coord(:, CEILING(rn*nat))
    1366              :                ELSE
    1367         1136 :                   average(:, i, 1) = average(:, i, 1)/REAL(nat_cl(i), dp)
    1368         1136 :                   average(:, i, 2) = average(:, i, 2)/REAL(nat_cl(i), dp)
    1369         1136 :                   scaled_cent(:, i) = (ATAN2(-average(:, i, 2), -average(:, i, 1)) + pi)/(2.0_dp*pi)
    1370          284 :                   CALL scaled_to_real(cent_coord(:, i), scaled_cent(:, i), cell)
    1371              :                END IF
    1372              :             END DO
    1373              :          ELSE
    1374              :             EXIT
    1375              :          END IF
    1376              :       END DO
    1377              : 
    1378           28 :       CALL timestop(handle)
    1379              : 
    1380          728 :    END SUBROUTINE kmeans
    1381              : 
    1382              : END MODULE distribution_methods
        

Generated by: LCOV version 2.0-1