LCOV - code coverage report
Current view: top level - src - distribution_methods.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:9843133) Lines: 553 599 92.3 %
Date: 2024-05-10 06:53:45 Functions: 9 9 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Distribution methods for atoms, particles, or molecules
      10             : !> \par History
      11             : !>      - 1d-distribution of molecules and particles (Sep. 2003, MK)
      12             : !>      - 2d-distribution for Quickstep updated with molecules (Oct. 2003, MK)
      13             : !> \author MK (22.08.2003)
      14             : ! **************************************************************************************************
      15             : MODULE distribution_methods
      16             :    USE atomic_kind_types,               ONLY: atomic_kind_type,&
      17             :                                               get_atomic_kind,&
      18             :                                               get_atomic_kind_set
      19             :    USE basis_set_types,                 ONLY: get_gto_basis_set,&
      20             :                                               gto_basis_set_type
      21             :    USE cell_types,                      ONLY: cell_type,&
      22             :                                               pbc,&
      23             :                                               real_to_scaled,&
      24             :                                               scaled_to_real
      25             :    USE cp_array_utils,                  ONLY: cp_1d_i_p_type
      26             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_type
      27             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      28             :                                               cp_logger_get_default_io_unit,&
      29             :                                               cp_logger_get_default_unit_nr,&
      30             :                                               cp_logger_type
      31             :    USE cp_min_heap,                     ONLY: cp_heap_fill,&
      32             :                                               cp_heap_get_first,&
      33             :                                               cp_heap_new,&
      34             :                                               cp_heap_release,&
      35             :                                               cp_heap_reset_first,&
      36             :                                               cp_heap_type
      37             :    USE cp_output_handling,              ONLY: cp_p_file,&
      38             :                                               cp_print_key_finished_output,&
      39             :                                               cp_print_key_should_output,&
      40             :                                               cp_print_key_unit_nr
      41             :    USE dbcsr_api,                       ONLY: dbcsr_distribution_get_num_images
      42             :    USE distribution_1d_types,           ONLY: distribution_1d_create,&
      43             :                                               distribution_1d_type
      44             :    USE distribution_2d_types,           ONLY: distribution_2d_create,&
      45             :                                               distribution_2d_type,&
      46             :                                               distribution_2d_write
      47             :    USE input_constants,                 ONLY: model_block_count,&
      48             :                                               model_block_lmax
      49             :    USE input_section_types,             ONLY: section_vals_get_subs_vals,&
      50             :                                               section_vals_type,&
      51             :                                               section_vals_val_get
      52             :    USE kinds,                           ONLY: dp,&
      53             :                                               int_8
      54             :    USE machine,                         ONLY: m_flush
      55             :    USE mathconstants,                   ONLY: pi
      56             :    USE mathlib,                         ONLY: gcd,&
      57             :                                               lcm
      58             :    USE molecule_kind_types,             ONLY: get_molecule_kind,&
      59             :                                               get_molecule_kind_set,&
      60             :                                               molecule_kind_type
      61             :    USE molecule_types,                  ONLY: molecule_type
      62             :    USE parallel_rng_types,              ONLY: UNIFORM,&
      63             :                                               rng_stream_type
      64             :    USE particle_types,                  ONLY: particle_type
      65             :    USE qs_kind_types,                   ONLY: get_qs_kind,&
      66             :                                               qs_kind_type
      67             :    USE util,                            ONLY: sort
      68             : #include "./base/base_uses.f90"
      69             : 
      70             :    IMPLICIT NONE
      71             : 
      72             :    PRIVATE
      73             : 
      74             : ! *** Global parameters (in this module) ***
      75             : 
      76             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'distribution_methods'
      77             : 
      78             : ! *** Public subroutines ***
      79             : 
      80             :    PUBLIC :: distribute_molecules_1d, &
      81             :              distribute_molecules_2d
      82             : 
      83             : CONTAINS
      84             : 
      85             : ! **************************************************************************************************
      86             : !> \brief Distribute molecules and particles
      87             : !> \param atomic_kind_set particle (atomic) kind information
      88             : !> \param particle_set particle information
      89             : !> \param local_particles distribution of particles created by this routine
      90             : !> \param molecule_kind_set molecule kind information
      91             : !> \param molecule_set molecule information
      92             : !> \param local_molecules distribution of molecules created by this routine
      93             : !> \param force_env_section ...
      94             : !> \param prev_molecule_kind_set previous molecule kind information, used with
      95             : !>        prev_local_molecules
      96             : !> \param prev_local_molecules previous distribution of molecules, new one will
      97             : !>        be identical if all the prev_* arguments are present and associated
      98             : !> \par History
      99             : !>      none
     100             : !> \author MK (Jun. 2003)
     101             : ! **************************************************************************************************
     102       10187 :    SUBROUTINE distribute_molecules_1d(atomic_kind_set, particle_set, &
     103             :                                       local_particles, &
     104             :                                       molecule_kind_set, molecule_set, &
     105             :                                       local_molecules, force_env_section, &
     106             :                                       prev_molecule_kind_set, &
     107             :                                       prev_local_molecules)
     108             : 
     109             :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     110             :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     111             :       TYPE(distribution_1d_type), POINTER                :: local_particles
     112             :       TYPE(molecule_kind_type), DIMENSION(:), POINTER    :: molecule_kind_set
     113             :       TYPE(molecule_type), DIMENSION(:), POINTER         :: molecule_set
     114             :       TYPE(distribution_1d_type), POINTER                :: local_molecules
     115             :       TYPE(section_vals_type), POINTER                   :: force_env_section
     116             :       TYPE(molecule_kind_type), DIMENSION(:), OPTIONAL, &
     117             :          POINTER                                         :: prev_molecule_kind_set
     118             :       TYPE(distribution_1d_type), OPTIONAL, POINTER      :: prev_local_molecules
     119             : 
     120             :       CHARACTER(len=*), PARAMETER :: routineN = 'distribute_molecules_1d'
     121             : 
     122             :       INTEGER :: atom_a, bin, handle, iatom, imolecule, imolecule_kind, imolecule_local, &
     123             :          imolecule_prev_kind, iparticle_kind, ipe, iw, kind_a, molecule_a, n, natom, nbins, nload, &
     124             :          nmolecule, nmolecule_kind, nparticle_kind, nsgf, output_unit
     125             :       INTEGER(int_8)                                     :: bin_price
     126             :       INTEGER(int_8), ALLOCATABLE, DIMENSION(:)          :: workload_count, workload_fill
     127       10187 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: nmolecule_local, nparticle_local, work
     128       10187 :       INTEGER, DIMENSION(:), POINTER                     :: molecule_list
     129             :       LOGICAL                                            :: found, has_prev_subsys_info, is_local
     130       10187 :       TYPE(cp_1d_i_p_type), ALLOCATABLE, DIMENSION(:)    :: local_molecule
     131             :       TYPE(cp_heap_type)                                 :: bin_heap_count, bin_heap_fill
     132             :       TYPE(cp_logger_type), POINTER                      :: logger
     133             :       TYPE(molecule_kind_type), POINTER                  :: molecule_kind
     134             : 
     135       10187 :       CALL timeset(routineN, handle)
     136             : 
     137       10187 :       has_prev_subsys_info = .FALSE.
     138       10187 :       IF (PRESENT(prev_local_molecules) .AND. &
     139             :           PRESENT(prev_molecule_kind_set)) THEN
     140        2639 :          IF (ASSOCIATED(prev_local_molecules) .AND. &
     141             :              ASSOCIATED(prev_molecule_kind_set)) THEN
     142          44 :             has_prev_subsys_info = .TRUE.
     143             :          END IF
     144             :       END IF
     145             : 
     146       10187 :       logger => cp_get_default_logger()
     147             : 
     148             :       ASSOCIATE (group => logger%para_env, mype => logger%para_env%mepos + 1, &
     149             :                  npe => logger%para_env%num_pe)
     150             : 
     151       30561 :          ALLOCATE (workload_count(npe))
     152       29574 :          workload_count(:) = 0
     153             : 
     154       20374 :          ALLOCATE (workload_fill(npe))
     155       29574 :          workload_fill(:) = 0
     156             : 
     157       10187 :          nmolecule_kind = SIZE(molecule_kind_set)
     158             : 
     159       30561 :          ALLOCATE (nmolecule_local(nmolecule_kind))
     160      145016 :          nmolecule_local(:) = 0
     161             : 
     162      175577 :          ALLOCATE (local_molecule(nmolecule_kind))
     163             : 
     164       10187 :          nparticle_kind = SIZE(atomic_kind_set)
     165             : 
     166       30561 :          ALLOCATE (nparticle_local(nparticle_kind))
     167       35999 :          nparticle_local(:) = 0
     168             : 
     169       10187 :          nbins = npe
     170             : 
     171       10187 :          CALL cp_heap_new(bin_heap_count, nbins)
     172       10187 :          CALL cp_heap_fill(bin_heap_count, workload_count)
     173             : 
     174       10187 :          CALL cp_heap_new(bin_heap_fill, nbins)
     175       10187 :          CALL cp_heap_fill(bin_heap_fill, workload_fill)
     176             : 
     177      145016 :          DO imolecule_kind = 1, nmolecule_kind
     178             : 
     179      134829 :             molecule_kind => molecule_kind_set(imolecule_kind)
     180             : 
     181      134829 :             NULLIFY (molecule_list)
     182             : 
     183             : !     *** Get the number of molecules and the number of ***
     184             : !     *** atoms in each molecule of that molecular kind ***
     185             : 
     186             :             CALL get_molecule_kind(molecule_kind=molecule_kind, &
     187             :                                    molecule_list=molecule_list, &
     188             :                                    natom=natom, &
     189      134829 :                                    nsgf=nsgf)
     190             : 
     191             : !     *** Consider the number of atoms or basis ***
     192             : !     *** functions which depends on the method ***
     193             : 
     194      134829 :             nload = MAX(natom, nsgf)
     195      134829 :             nmolecule = SIZE(molecule_list)
     196             : 
     197             : !     *** Get the number of local molecules of the current molecule kind ***
     198             : 
     199      437611 :             DO imolecule = 1, nmolecule
     200      437611 :                IF (has_prev_subsys_info) THEN
     201      297660 :                   DO imolecule_prev_kind = 1, SIZE(prev_molecule_kind_set)
     202     3188004 :                      IF (ANY(prev_local_molecules%list(imolecule_prev_kind)%array( &
     203        5060 :                              1:prev_local_molecules%n_el(imolecule_prev_kind)) == molecule_list(imolecule))) THEN
     204             :                         ! molecule used to be local
     205        2530 :                         nmolecule_local(imolecule_kind) = nmolecule_local(imolecule_kind) + 1
     206             :                      END IF
     207             :                   END DO
     208             :                ELSE
     209      297722 :                   CALL cp_heap_get_first(bin_heap_count, bin, bin_price, found)
     210      297722 :                   IF (.NOT. found) &
     211           0 :                      CPABORT("No topmost heap element found.")
     212             : 
     213      297722 :                   ipe = bin
     214      297722 :                   IF (bin_price /= workload_count(ipe)) &
     215           0 :                      CPABORT("inconsistent heap")
     216             : 
     217      297722 :                   workload_count(ipe) = workload_count(ipe) + nload
     218      297722 :                   IF (ipe == mype) THEN
     219      163942 :                      nmolecule_local(imolecule_kind) = nmolecule_local(imolecule_kind) + 1
     220             :                   END IF
     221             : 
     222      297722 :                   bin_price = workload_count(ipe)
     223      297722 :                   CALL cp_heap_reset_first(bin_heap_count, bin_price)
     224             :                END IF
     225             :             END DO
     226             : 
     227             : !     *** Distribute the molecules ***
     228      134829 :             n = nmolecule_local(imolecule_kind)
     229             : 
     230      134829 :             IF (n > 0) THEN
     231      217383 :                ALLOCATE (local_molecule(imolecule_kind)%array(n))
     232             :             ELSE
     233       62368 :                NULLIFY (local_molecule(imolecule_kind)%array)
     234             :             END IF
     235             : 
     236             :             imolecule_local = 0
     237      582627 :             DO imolecule = 1, nmolecule
     238      302782 :                is_local = .FALSE.
     239      302782 :                IF (has_prev_subsys_info) THEN
     240      297660 :                   DO imolecule_prev_kind = 1, SIZE(prev_molecule_kind_set)
     241     3188004 :                      IF (ANY(prev_local_molecules%list(imolecule_prev_kind)%array( &
     242        5060 :                              1:prev_local_molecules%n_el(imolecule_prev_kind)) == molecule_list(imolecule))) THEN
     243        2530 :                         is_local = .TRUE.
     244             :                      END IF
     245             :                   END DO
     246             :                ELSE
     247      297722 :                   CALL cp_heap_get_first(bin_heap_fill, bin, bin_price, found)
     248      297722 :                   IF (.NOT. found) &
     249           0 :                      CPABORT("No topmost heap element found.")
     250             : 
     251      297722 :                   ipe = bin
     252      297722 :                   IF (bin_price /= workload_fill(ipe)) &
     253           0 :                      CPABORT("inconsistent heap")
     254             : 
     255      297722 :                   workload_fill(ipe) = workload_fill(ipe) + nload
     256      297722 :                   is_local = (ipe == mype)
     257             :                END IF
     258      302782 :                IF (is_local) THEN
     259      166472 :                   imolecule_local = imolecule_local + 1
     260      166472 :                   molecule_a = molecule_list(imolecule)
     261      166472 :                   local_molecule(imolecule_kind)%array(imolecule_local) = molecule_a
     262      579549 :                   DO iatom = 1, natom
     263      413077 :                      atom_a = molecule_set(molecule_a)%first_atom + iatom - 1
     264             : 
     265             :                      CALL get_atomic_kind(atomic_kind=particle_set(atom_a)%atomic_kind, &
     266      413077 :                                           kind_number=kind_a)
     267      579549 :                      nparticle_local(kind_a) = nparticle_local(kind_a) + 1
     268             :                   END DO
     269             :                END IF
     270      437611 :                IF (.NOT. has_prev_subsys_info) THEN
     271      297722 :                   bin_price = workload_fill(ipe)
     272      297722 :                   CALL cp_heap_reset_first(bin_heap_fill, bin_price)
     273             :                END IF
     274             :             END DO
     275             : 
     276             :          END DO
     277             : 
     278       29574 :          IF (ANY(workload_fill .NE. workload_count)) &
     279           0 :             CPABORT("Inconsistent heaps encountered")
     280             : 
     281       10187 :          CALL cp_heap_release(bin_heap_count)
     282       10187 :          CALL cp_heap_release(bin_heap_fill)
     283             : 
     284             : !   *** Create the local molecule structure ***
     285             : 
     286             :          CALL distribution_1d_create(local_molecules, &
     287             :                                      n_el=nmolecule_local, &
     288       10187 :                                      para_env=logger%para_env)
     289             : 
     290             : !   *** Create the local particle structure ***
     291             : 
     292             :          CALL distribution_1d_create(local_particles, &
     293             :                                      n_el=nparticle_local, &
     294       10187 :                                      para_env=logger%para_env)
     295             : 
     296             : !   *** Store the generated local molecule and particle distributions ***
     297             : 
     298       35999 :          nparticle_local(:) = 0
     299             : 
     300      145016 :          DO imolecule_kind = 1, nmolecule_kind
     301             : 
     302      134829 :             IF (nmolecule_local(imolecule_kind) == 0) CYCLE
     303             : 
     304             :             local_molecules%list(imolecule_kind)%array(:) = &
     305      238933 :                local_molecule(imolecule_kind)%array(:)
     306             : 
     307       72461 :             molecule_kind => molecule_kind_set(imolecule_kind)
     308             : 
     309             :             CALL get_molecule_kind(molecule_kind=molecule_kind, &
     310       72461 :                                    natom=natom)
     311             : 
     312      249120 :             DO imolecule = 1, nmolecule_local(imolecule_kind)
     313      166472 :                molecule_a = local_molecule(imolecule_kind)%array(imolecule)
     314      714378 :                DO iatom = 1, natom
     315      413077 :                   atom_a = molecule_set(molecule_a)%first_atom + iatom - 1
     316             :                   CALL get_atomic_kind(atomic_kind=particle_set(atom_a)%atomic_kind, &
     317      413077 :                                        kind_number=kind_a)
     318      413077 :                   nparticle_local(kind_a) = nparticle_local(kind_a) + 1
     319      579549 :                   local_particles%list(kind_a)%array(nparticle_local(kind_a)) = atom_a
     320             :                END DO
     321             :             END DO
     322             : 
     323             :          END DO
     324             : 
     325             : !   *** Print distribution, if requested ***
     326             : 
     327       10187 :          IF (BTEST(cp_print_key_should_output(logger%iter_info, &
     328       30561 :                                               force_env_section, "PRINT%DISTRIBUTION1D"), cp_p_file)) THEN
     329             : 
     330             :             output_unit = cp_print_key_unit_nr(logger, force_env_section, "PRINT%DISTRIBUTION1D", &
     331         130 :                                                extension=".Log")
     332             : 
     333         130 :             iw = output_unit
     334         130 :             IF (output_unit < 0) iw = cp_logger_get_default_unit_nr(logger, LOCAL=.TRUE.)
     335             : 
     336             : !     *** Print molecule distribution ***
     337             : 
     338         390 :             ALLOCATE (work(npe))
     339         390 :             work(:) = 0
     340             : 
     341         416 :             work(mype) = SUM(nmolecule_local)
     342         130 :             CALL group%sum(work)
     343             : 
     344         130 :             IF (output_unit > 0) THEN
     345             :                WRITE (UNIT=output_unit, &
     346             :                       FMT="(/, T2, A, T51, A, /, (T52, I6, T73, I8))") &
     347          65 :                   "DISTRIBUTION OF THE MOLECULES", &
     348          65 :                   "Process    Number of molecules", &
     349         260 :                   (ipe - 1, work(ipe), ipe=1, npe)
     350             :                WRITE (UNIT=output_unit, FMT="(T55, A3, T73, I8)") &
     351         195 :                   "Sum", SUM(work)
     352          65 :                CALL m_flush(output_unit)
     353             :             END IF
     354             : 
     355         130 :             CALL group%sync()
     356             : 
     357         390 :             DO ipe = 1, npe
     358         260 :                IF (ipe == mype) THEN
     359             :                   WRITE (UNIT=iw, FMT="(/, T3, A)") &
     360         130 :                      "Process   Kind   Local molecules (global indices)"
     361         416 :                   DO imolecule_kind = 1, nmolecule_kind
     362         416 :                      IF (imolecule_kind == 1) THEN
     363             :                         WRITE (UNIT=iw, FMT="(T4, I6, 2X, I5, (T21, 10I6))") &
     364         130 :                            ipe - 1, imolecule_kind, &
     365         330 :                            (local_molecules%list(imolecule_kind)%array(imolecule), &
     366         590 :                             imolecule=1, nmolecule_local(imolecule_kind))
     367             :                      ELSE
     368             :                         WRITE (UNIT=iw, FMT="(T12, I5, (T21, 10I6))") &
     369         156 :                            imolecule_kind, &
     370         234 :                            (local_molecules%list(imolecule_kind)%array(imolecule), &
     371         546 :                             imolecule=1, nmolecule_local(imolecule_kind))
     372             :                      END IF
     373             :                   END DO
     374             :                END IF
     375         260 :                CALL m_flush(iw)
     376         390 :                CALL group%sync()
     377             :             END DO
     378             : 
     379             : !     *** Print particle distribution ***
     380             : 
     381         390 :             work(:) = 0
     382             : 
     383         436 :             work(mype) = SUM(nparticle_local)
     384         130 :             CALL group%sum(work)
     385             : 
     386         130 :             IF (output_unit > 0) THEN
     387             :                WRITE (UNIT=output_unit, &
     388             :                       FMT="(/, T2, A, T51, A, /, (T52, I6, T73, I8))") &
     389          65 :                   "DISTRIBUTION OF THE PARTICLES", &
     390          65 :                   "Process    Number of particles", &
     391         260 :                   (ipe - 1, work(ipe), ipe=1, npe)
     392             :                WRITE (UNIT=output_unit, FMT="(T55, A3, T73, I8)") &
     393         195 :                   "Sum", SUM(work)
     394          65 :                CALL m_flush(output_unit)
     395             :             END IF
     396             : 
     397         130 :             CALL group%sync()
     398             : 
     399         390 :             DO ipe = 1, npe
     400         260 :                IF (ipe == mype) THEN
     401             :                   WRITE (UNIT=iw, FMT="(/, T3, A)") &
     402         130 :                      "Process   Kind   Local particles (global indices)"
     403         436 :                   DO iparticle_kind = 1, nparticle_kind
     404         436 :                      IF (iparticle_kind == 1) THEN
     405             :                         WRITE (UNIT=iw, FMT="(T4, I6, 2X, I5, (T20, 10I6))") &
     406         130 :                            ipe - 1, iparticle_kind, &
     407         586 :                            (local_particles%list(iparticle_kind)%array(iatom), &
     408         846 :                             iatom=1, nparticle_local(iparticle_kind))
     409             :                      ELSE
     410             :                         WRITE (UNIT=iw, FMT="(T12, I5, (T20, 10I6))") &
     411         176 :                            iparticle_kind, &
     412        1084 :                            (local_particles%list(iparticle_kind)%array(iatom), &
     413        1436 :                             iatom=1, nparticle_local(iparticle_kind))
     414             :                      END IF
     415             :                   END DO
     416             :                END IF
     417         260 :                CALL m_flush(iw)
     418         390 :                CALL group%sync()
     419             :             END DO
     420         130 :             DEALLOCATE (work)
     421             : 
     422             :             CALL cp_print_key_finished_output(output_unit, logger, force_env_section, &
     423         130 :                                               "PRINT%DISTRIBUTION1D")
     424             :          END IF
     425             :       END ASSOCIATE
     426             : !   *** Release work storage ***
     427             : 
     428       10187 :       DEALLOCATE (workload_count)
     429             : 
     430       10187 :       DEALLOCATE (workload_fill)
     431             : 
     432       10187 :       DEALLOCATE (nmolecule_local)
     433             : 
     434       10187 :       DEALLOCATE (nparticle_local)
     435             : 
     436      145016 :       DO imolecule_kind = 1, nmolecule_kind
     437      145016 :          IF (ASSOCIATED(local_molecule(imolecule_kind)%array)) THEN
     438       72461 :             DEALLOCATE (local_molecule(imolecule_kind)%array)
     439             :          END IF
     440             :       END DO
     441       10187 :       DEALLOCATE (local_molecule)
     442             : 
     443       10187 :       CALL timestop(handle)
     444             : 
     445       20374 :    END SUBROUTINE distribute_molecules_1d
     446             : 
     447             : ! **************************************************************************************************
     448             : !> \brief Distributes the particle pairs creating a 2d distribution optimally
     449             : !>      suited for quickstep
     450             : !> \param cell ...
     451             : !> \param atomic_kind_set ...
     452             : !> \param particle_set ...
     453             : !> \param qs_kind_set ...
     454             : !> \param molecule_kind_set ...
     455             : !> \param molecule_set ...
     456             : !> \param distribution_2d the distribution that will be created by this
     457             : !>                         method
     458             : !> \param blacs_env the parallel environment at the basis of the
     459             : !>                   distribution
     460             : !> \param force_env_section ...
     461             : !> \par History
     462             : !>      - local_rows & cols blocksize optimizations (Aug. 2003, MK)
     463             : !>      - cleanup of distribution_2d (Sep. 2003, fawzi)
     464             : !>      - update for molecules (Oct. 2003, MK)
     465             : !> \author fawzi (Feb. 2003)
     466             : !> \note
     467             : !>      Intermediate generation of a 2d distribution of the molecules, but
     468             : !>      only the corresponding particle (atomic) distribution is currently
     469             : !>      used. The 2d distribution of the molecules is deleted, but may easily
     470             : !>      be recovered (MK).
     471             : ! **************************************************************************************************
     472        7368 :    SUBROUTINE distribute_molecules_2d(cell, atomic_kind_set, particle_set, &
     473             :                                       qs_kind_set, molecule_kind_set, molecule_set, &
     474             :                                       distribution_2d, blacs_env, force_env_section)
     475             :       TYPE(cell_type), POINTER                           :: cell
     476             :       TYPE(atomic_kind_type), DIMENSION(:), POINTER      :: atomic_kind_set
     477             :       TYPE(particle_type), DIMENSION(:), POINTER         :: particle_set
     478             :       TYPE(qs_kind_type), DIMENSION(:), POINTER          :: qs_kind_set
     479             :       TYPE(molecule_kind_type), DIMENSION(:), POINTER    :: molecule_kind_set
     480             :       TYPE(molecule_type), DIMENSION(:), POINTER         :: molecule_set
     481             :       TYPE(distribution_2d_type), POINTER                :: distribution_2d
     482             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
     483             :       TYPE(section_vals_type), POINTER                   :: force_env_section
     484             : 
     485             :       CHARACTER(len=*), PARAMETER :: routineN = 'distribute_molecules_2d'
     486             : 
     487             :       INTEGER :: cluster_price, cost_model, handle, iatom, iatom_mol, iatom_one, ikind, imol, &
     488             :          imolecule, imolecule_kind, iparticle_kind, ipcol, iprow, iw, kind_a, n, natom, natom_mol, &
     489             :          nclusters, nmolecule, nmolecule_kind, nparticle_kind, nsgf, output_unit
     490        7368 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cluster_list, cluster_prices, &
     491        7368 :                                                             nparticle_local_col, &
     492        7368 :                                                             nparticle_local_row, work
     493        7368 :       INTEGER, DIMENSION(:), POINTER                     :: lmax_basis, molecule_list
     494        7368 :       INTEGER, DIMENSION(:, :), POINTER                  :: cluster_col_distribution, &
     495        7368 :                                                             cluster_row_distribution, &
     496        7368 :                                                             col_distribution, row_distribution
     497             :       LOGICAL :: basic_cluster_optimization, basic_optimization, basic_spatial_optimization, &
     498             :          molecular_distribution, skip_optimization
     499        7368 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: coords, pbc_scaled_coords
     500             :       REAL(KIND=dp), DIMENSION(3)                        :: center
     501        7368 :       TYPE(cp_1d_i_p_type), DIMENSION(:), POINTER        :: local_particle_col, local_particle_row
     502             :       TYPE(cp_logger_type), POINTER                      :: logger
     503             :       TYPE(gto_basis_set_type), POINTER                  :: orb_basis_set
     504             :       TYPE(molecule_kind_type), POINTER                  :: molecule_kind
     505             :       TYPE(section_vals_type), POINTER                   :: distribution_section
     506             : 
     507             : !...
     508             : 
     509        7368 :       CALL timeset(routineN, handle)
     510             : 
     511        7368 :       logger => cp_get_default_logger()
     512             : 
     513        7368 :       distribution_section => section_vals_get_subs_vals(force_env_section, "DFT%QS%DISTRIBUTION")
     514             : 
     515        7368 :       CALL section_vals_val_get(distribution_section, "2D_MOLECULAR_DISTRIBUTION", l_val=molecular_distribution)
     516        7368 :       CALL section_vals_val_get(distribution_section, "SKIP_OPTIMIZATION", l_val=skip_optimization)
     517        7368 :       CALL section_vals_val_get(distribution_section, "BASIC_OPTIMIZATION", l_val=basic_optimization)
     518        7368 :       CALL section_vals_val_get(distribution_section, "BASIC_SPATIAL_OPTIMIZATION", l_val=basic_spatial_optimization)
     519        7368 :       CALL section_vals_val_get(distribution_section, "BASIC_CLUSTER_OPTIMIZATION", l_val=basic_cluster_optimization)
     520             : 
     521        7368 :       CALL section_vals_val_get(distribution_section, "COST_MODEL", i_val=cost_model)
     522             :       !
     523             : 
     524             :       ASSOCIATE (group => blacs_env%para_env, myprow => blacs_env%mepos(1) + 1, mypcol => blacs_env%mepos(2) + 1, &
     525             :                  nprow => blacs_env%num_pe(1), npcol => blacs_env%num_pe(2))
     526             : 
     527        7368 :          nmolecule_kind = SIZE(molecule_kind_set)
     528        7368 :          CALL get_molecule_kind_set(molecule_kind_set, nmolecule=nmolecule)
     529             : 
     530        7368 :          nparticle_kind = SIZE(atomic_kind_set)
     531        7368 :          CALL get_atomic_kind_set(atomic_kind_set=atomic_kind_set, natom=natom)
     532             : 
     533             :          !
     534             :          ! we need to generate two representations of the distribution, one as a straight array with global particles
     535             :          ! one ordered wrt to kinds and only listing the local particles
     536             :          !
     537       22104 :          ALLOCATE (row_distribution(natom, 2))
     538       14736 :          ALLOCATE (col_distribution(natom, 2))
     539             :          ! Initialize the distributions to -1, as the second dimension only gets set with cluster optimization
     540             :          ! but the information is needed by dbcsr
     541      218532 :          row_distribution = -1; col_distribution = -1
     542             : 
     543       36079 :          ALLOCATE (local_particle_col(nparticle_kind))
     544       28711 :          ALLOCATE (local_particle_row(nparticle_kind))
     545       22104 :          ALLOCATE (nparticle_local_row(nparticle_kind))
     546       14736 :          ALLOCATE (nparticle_local_col(nparticle_kind))
     547             : 
     548        7368 :          IF (basic_optimization .OR. basic_spatial_optimization .OR. basic_cluster_optimization) THEN
     549             : 
     550        7368 :             IF (molecular_distribution) THEN
     551           2 :                nclusters = nmolecule
     552             :             ELSE
     553             :                nclusters = natom
     554             :             END IF
     555             : 
     556       22104 :             ALLOCATE (cluster_list(nclusters))
     557       14736 :             ALLOCATE (cluster_prices(nclusters))
     558       22104 :             ALLOCATE (cluster_row_distribution(nclusters, 2))
     559       14736 :             ALLOCATE (cluster_col_distribution(nclusters, 2))
     560      225868 :             cluster_row_distribution = -1; cluster_col_distribution = -1
     561             : 
     562             :             ! Fill in the clusters and their prices
     563        7368 :             CALL section_vals_val_get(distribution_section, "COST_MODEL", i_val=cost_model)
     564        7368 :             IF (.NOT. molecular_distribution) THEN
     565       52777 :                DO iatom = 1, natom
     566       45411 :                   IF (iatom .GT. nclusters) &
     567           0 :                      CPABORT("Bounds error")
     568       45411 :                   CALL get_atomic_kind(particle_set(iatom)%atomic_kind, kind_number=ikind)
     569       45411 :                   cluster_list(iatom) = iatom
     570       45411 :                   SELECT CASE (cost_model)
     571             :                   CASE (model_block_count)
     572       45411 :                      CALL get_qs_kind(qs_kind_set(ikind), nsgf=nsgf)
     573       45411 :                      cluster_price = nsgf
     574             :                   CASE (model_block_lmax)
     575           0 :                      CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set)
     576           0 :                      CALL get_gto_basis_set(orb_basis_set, lmax=lmax_basis)
     577           0 :                      cluster_price = MAXVAL(lmax_basis)
     578             :                   CASE default
     579           0 :                      CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set)
     580           0 :                      CALL get_gto_basis_set(orb_basis_set, lmax=lmax_basis)
     581       45411 :                      cluster_price = 8 + (MAXVAL(lmax_basis)**2)
     582             :                   END SELECT
     583       98188 :                   cluster_prices(iatom) = cluster_price
     584             :                END DO
     585             :             ELSE
     586             :                imol = 0
     587           4 :                DO imolecule_kind = 1, nmolecule_kind
     588           2 :                   molecule_kind => molecule_kind_set(imolecule_kind)
     589           2 :                   CALL get_molecule_kind(molecule_kind=molecule_kind, molecule_list=molecule_list, natom=natom_mol)
     590           8 :                   DO imolecule = 1, SIZE(molecule_list)
     591           4 :                      imol = imol + 1
     592           4 :                      cluster_list(imol) = imol
     593           4 :                      cluster_price = 0
     594          16 :                      DO iatom_mol = 1, natom_mol
     595          12 :                         iatom = molecule_set(molecule_list(imolecule))%first_atom + iatom_mol - 1
     596          12 :                         CALL get_atomic_kind(particle_set(iatom)%atomic_kind, kind_number=ikind)
     597          16 :                         SELECT CASE (cost_model)
     598             :                         CASE (model_block_count)
     599          12 :                            CALL get_qs_kind(qs_kind_set(ikind), nsgf=nsgf)
     600          12 :                            cluster_price = cluster_price + nsgf
     601             :                         CASE (model_block_lmax)
     602           0 :                            CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set)
     603           0 :                            CALL get_gto_basis_set(orb_basis_set, lmax=lmax_basis)
     604           0 :                            cluster_price = cluster_price + MAXVAL(lmax_basis)
     605             :                         CASE default
     606           0 :                            CALL get_qs_kind(qs_kind_set(ikind), basis_set=orb_basis_set)
     607           0 :                            CALL get_gto_basis_set(orb_basis_set, lmax=lmax_basis)
     608          12 :                            cluster_price = cluster_price + 8 + (MAXVAL(lmax_basis)**2)
     609             :                         END SELECT
     610             :                      END DO
     611           6 :                      cluster_prices(imol) = cluster_price
     612             :                   END DO
     613             :                END DO
     614             :             END IF
     615             : 
     616             :             ! And distribute
     617        7368 :             IF (basic_optimization) THEN
     618             :                CALL make_basic_distribution(cluster_list, cluster_prices, &
     619        7198 :                                             nprow, cluster_row_distribution(:, 1), npcol, cluster_col_distribution(:, 1))
     620             :             ELSE
     621         170 :                IF (basic_cluster_optimization) THEN
     622           4 :                   IF (molecular_distribution) &
     623           0 :                      CPABORT("clustering and molecular blocking NYI")
     624          16 :                   ALLOCATE (pbc_scaled_coords(3, natom), coords(3, natom))
     625         118 :                   DO iatom = 1, natom
     626         114 :                      CALL real_to_scaled(pbc_scaled_coords(:, iatom), pbc(particle_set(iatom)%r(:), cell), cell)
     627         118 :                      coords(:, iatom) = pbc(particle_set(iatom)%r(:), cell)
     628             :                   END DO
     629             :                   CALL make_cluster_distribution(coords, pbc_scaled_coords, cell, cluster_prices, &
     630           4 :                                                  nprow, cluster_row_distribution, npcol, cluster_col_distribution)
     631             :                ELSE ! basic_spatial_optimization
     632         498 :                   ALLOCATE (pbc_scaled_coords(3, nclusters))
     633         166 :                   IF (.NOT. molecular_distribution) THEN
     634             :                      ! just scaled coords
     635        2696 :                      DO iatom = 1, natom
     636        2696 :                         CALL real_to_scaled(pbc_scaled_coords(:, iatom), pbc(particle_set(iatom)%r(:), cell), cell)
     637             :                      END DO
     638             :                   ELSE
     639             :                      ! use scaled coords of geometric center, folding when appropriate
     640             :                      imol = 0
     641           0 :                      DO imolecule_kind = 1, nmolecule_kind
     642           0 :                         molecule_kind => molecule_kind_set(imolecule_kind)
     643           0 :                         CALL get_molecule_kind(molecule_kind=molecule_kind, molecule_list=molecule_list, natom=natom_mol)
     644           0 :                         DO imolecule = 1, SIZE(molecule_list)
     645           0 :                            imol = imol + 1
     646           0 :                            iatom_one = molecule_set(molecule_list(imolecule))%first_atom
     647           0 :                            center = 0.0_dp
     648           0 :                            DO iatom_mol = 1, natom_mol
     649           0 :                               iatom = molecule_set(molecule_list(imolecule))%first_atom + iatom_mol - 1
     650             :                               center = center + &
     651           0 :                                    pbc(particle_set(iatom)%r(:) - particle_set(iatom_one)%r(:), cell) + particle_set(iatom_one)%r(:)
     652             :                            END DO
     653           0 :                            center = center/natom_mol
     654           0 :                            CALL real_to_scaled(pbc_scaled_coords(:, imol), pbc(center, cell), cell)
     655             :                         END DO
     656             :                      END DO
     657             :                   END IF
     658             : 
     659             :                   CALL make_basic_spatial_distribution(pbc_scaled_coords, cluster_prices, &
     660         166 :                                                        nprow, cluster_row_distribution(:, 1), npcol, cluster_col_distribution(:, 1))
     661             : 
     662         166 :                   DEALLOCATE (pbc_scaled_coords)
     663             :                END IF
     664             :             END IF
     665             : 
     666             :             ! And assign back
     667        7368 :             IF (.NOT. molecular_distribution) THEN
     668      225840 :                row_distribution = cluster_row_distribution
     669      225840 :                col_distribution = cluster_col_distribution
     670             :             ELSE
     671             :                imol = 0
     672           4 :                DO imolecule_kind = 1, nmolecule_kind
     673           2 :                   molecule_kind => molecule_kind_set(imolecule_kind)
     674           2 :                   CALL get_molecule_kind(molecule_kind=molecule_kind, molecule_list=molecule_list, natom=natom_mol)
     675           8 :                   DO imolecule = 1, SIZE(molecule_list)
     676           4 :                      imol = imol + 1
     677          18 :                      DO iatom_mol = 1, natom_mol
     678          12 :                         iatom = molecule_set(molecule_list(imolecule))%first_atom + iatom_mol - 1
     679          72 :                         row_distribution(iatom, :) = cluster_row_distribution(imol, :)
     680          76 :                         col_distribution(iatom, :) = cluster_col_distribution(imol, :)
     681             :                      END DO
     682             :                   END DO
     683             :                END DO
     684             :             END IF
     685             : 
     686             :             ! cleanup
     687        7368 :             DEALLOCATE (cluster_list)
     688        7368 :             DEALLOCATE (cluster_prices)
     689        7368 :             DEALLOCATE (cluster_row_distribution)
     690       14736 :             DEALLOCATE (cluster_col_distribution)
     691             : 
     692             :          ELSE
     693             :             ! expects nothing else
     694           0 :             CPABORT("")
     695             :          END IF
     696             : 
     697             :          ! prepare the lists of local particles
     698             : 
     699             :          ! count local particles of a given kind
     700       21343 :          nparticle_local_col = 0
     701       21343 :          nparticle_local_row = 0
     702       52791 :          DO iatom = 1, natom
     703       45423 :             CALL get_atomic_kind(atomic_kind=particle_set(iatom)%atomic_kind, kind_number=kind_a)
     704       45423 :             IF (row_distribution(iatom, 1) == myprow) nparticle_local_row(kind_a) = nparticle_local_row(kind_a) + 1
     705       98214 :             IF (col_distribution(iatom, 1) == mypcol) nparticle_local_col(kind_a) = nparticle_local_col(kind_a) + 1
     706             :          END DO
     707             : 
     708             :          ! allocate space
     709       21343 :          DO iparticle_kind = 1, nparticle_kind
     710       13975 :             n = nparticle_local_row(iparticle_kind)
     711       37900 :             ALLOCATE (local_particle_row(iparticle_kind)%array(n))
     712             : 
     713       13975 :             n = nparticle_local_col(iparticle_kind)
     714       49293 :             ALLOCATE (local_particle_col(iparticle_kind)%array(n))
     715             :          END DO
     716             : 
     717             :          ! store
     718       21343 :          nparticle_local_col = 0
     719       21343 :          nparticle_local_row = 0
     720       52791 :          DO iatom = 1, natom
     721       45423 :             CALL get_atomic_kind(atomic_kind=particle_set(iatom)%atomic_kind, kind_number=kind_a)
     722       45423 :             IF (row_distribution(iatom, 1) == myprow) THEN
     723       23635 :                nparticle_local_row(kind_a) = nparticle_local_row(kind_a) + 1
     724       23635 :                local_particle_row(kind_a)%array(nparticle_local_row(kind_a)) = iatom
     725             :             END IF
     726       98214 :             IF (col_distribution(iatom, 1) == mypcol) THEN
     727       45423 :                nparticle_local_col(kind_a) = nparticle_local_col(kind_a) + 1
     728       45423 :                local_particle_col(kind_a)%array(nparticle_local_col(kind_a)) = iatom
     729             :             END IF
     730             :          END DO
     731             : 
     732             : !   *** Generate the 2d distribution structure  but take care of the zero offsets required
     733       52791 :          row_distribution(:, 1) = row_distribution(:, 1) - 1
     734       52791 :          col_distribution(:, 1) = col_distribution(:, 1) - 1
     735             :          CALL distribution_2d_create(distribution_2d, &
     736             :                                      row_distribution_ptr=row_distribution, &
     737             :                                      col_distribution_ptr=col_distribution, &
     738             :                                      local_rows_ptr=local_particle_row, &
     739             :                                      local_cols_ptr=local_particle_col, &
     740        7368 :                                      blacs_env=blacs_env)
     741             : 
     742        7368 :          NULLIFY (local_particle_row)
     743        7368 :          NULLIFY (local_particle_col)
     744        7368 :          NULLIFY (row_distribution)
     745        7368 :          NULLIFY (col_distribution)
     746             : 
     747             : !   *** Print distribution, if requested ***
     748        7368 :          IF (BTEST(cp_print_key_should_output(logger%iter_info, &
     749        7368 :                                               force_env_section, "PRINT%DISTRIBUTION"), cp_p_file)) THEN
     750             : 
     751             :             output_unit = cp_print_key_unit_nr(logger, force_env_section, "PRINT%DISTRIBUTION", &
     752          80 :                                                extension=".Log")
     753             : 
     754             : !     *** Print row distribution ***
     755             : 
     756         240 :             ALLOCATE (work(nprow))
     757         236 :             work(:) = 0
     758             : 
     759         228 :             IF (mypcol == 1) work(myprow) = SUM(distribution_2d%n_local_rows)
     760             : 
     761          80 :             CALL group%sum(work)
     762             : 
     763          80 :             IF (output_unit > 0) THEN
     764             :                WRITE (UNIT=output_unit, &
     765             :                       FMT="(/, T2, A, /, T15, A, /, (T16, I10, T41, I10, T71, I10))") &
     766          40 :                   "DISTRIBUTION OF THE PARTICLES (ROWS)", &
     767          40 :                   "Process row      Number of particles         Number of matrix rows", &
     768         158 :                   (iprow - 1, work(iprow), -1, iprow=1, nprow)
     769             :                WRITE (UNIT=output_unit, FMT="(T23, A3, T41, I10, T71, I10)") &
     770         118 :                   "Sum", SUM(work), -1
     771          40 :                CALL m_flush(output_unit)
     772             :             END IF
     773             : 
     774          80 :             DEALLOCATE (work)
     775             : 
     776             : !     *** Print column distribution ***
     777             : 
     778         240 :             ALLOCATE (work(npcol))
     779         160 :             work(:) = 0
     780             : 
     781         156 :             IF (myprow == 1) work(mypcol) = SUM(distribution_2d%n_local_cols)
     782             : 
     783          80 :             CALL group%sum(work)
     784             : 
     785          80 :             IF (output_unit > 0) THEN
     786             :                WRITE (UNIT=output_unit, &
     787             :                       FMT="(/, T2, A, /, T15, A, /, (T16, I10, T41, I10, T71, I10))") &
     788          40 :                   "DISTRIBUTION OF THE PARTICLES (COLUMNS)", &
     789          40 :                   "Process col      Number of particles      Number of matrix columns", &
     790         120 :                   (ipcol - 1, work(ipcol), -1, ipcol=1, npcol)
     791             :                WRITE (UNIT=output_unit, FMT="(T23, A3, T41, I10, T71, I10)") &
     792          80 :                   "Sum", SUM(work), -1
     793          40 :                CALL m_flush(output_unit)
     794             :             END IF
     795             : 
     796          80 :             DEALLOCATE (work)
     797             : 
     798             :             CALL cp_print_key_finished_output(output_unit, logger, force_env_section, &
     799          80 :                                               "PRINT%DISTRIBUTION")
     800             :          END IF
     801             :       END ASSOCIATE
     802             : 
     803        7368 :       IF (BTEST(cp_print_key_should_output(logger%iter_info, &
     804             :                                            force_env_section, "PRINT%DISTRIBUTION2D"), cp_p_file)) THEN
     805             : 
     806          70 :          iw = cp_logger_get_default_unit_nr(logger, LOCAL=.TRUE.)
     807             :          CALL distribution_2d_write(distribution_2d, &
     808             :                                     unit_nr=iw, &
     809             :                                     local=.TRUE., &
     810          70 :                                     long_description=.TRUE.)
     811             : 
     812             :       END IF
     813             : 
     814             : !   *** Release work storage ***
     815             : 
     816        7368 :       DEALLOCATE (nparticle_local_row)
     817             : 
     818        7368 :       DEALLOCATE (nparticle_local_col)
     819             : 
     820        7368 :       CALL timestop(handle)
     821             : 
     822       22104 :    END SUBROUTINE distribute_molecules_2d
     823             : 
     824             : ! **************************************************************************************************
     825             : !> \brief Creates a basic distribution
     826             : !> \param cluster_list ...
     827             : !> \param cluster_prices ...
     828             : !> \param nprows ...
     829             : !> \param row_distribution ...
     830             : !> \param npcols ...
     831             : !> \param col_distribution ...
     832             : !> \par History
     833             : !> - Created 2010-08-06 UB
     834             : ! **************************************************************************************************
     835        7198 :    SUBROUTINE make_basic_distribution(cluster_list, cluster_prices, &
     836        7198 :                                       nprows, row_distribution, npcols, col_distribution)
     837             :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: cluster_list, cluster_prices
     838             :       INTEGER, INTENT(IN)                                :: nprows
     839             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: row_distribution
     840             :       INTEGER, INTENT(IN)                                :: npcols
     841             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: col_distribution
     842             : 
     843             :       CHARACTER(len=*), PARAMETER :: routineN = 'make_basic_distribution'
     844             : 
     845             :       INTEGER                                            :: bin, cluster, cluster_index, &
     846             :                                                             cluster_price, nbins, nclusters, pcol, &
     847             :                                                             pgrid_gcd, prow, timing_handle
     848             :       INTEGER(int_8)                                     :: bin_price
     849             :       LOGICAL                                            :: found
     850             :       TYPE(cp_heap_type)                                 :: bin_heap
     851             : 
     852             : !   ---------------------------------------------------------------------------
     853             : 
     854        7198 :       CALL timeset(routineN, timing_handle)
     855        7198 :       nbins = lcm(nprows, npcols)
     856        7198 :       pgrid_gcd = gcd(nprows, npcols)
     857        7198 :       CALL sort(cluster_prices, SIZE(cluster_list), cluster_list)
     858        7198 :       CALL cp_heap_new(bin_heap, nbins)
     859       34714 :       CALL cp_heap_fill(bin_heap, (/(0_int_8, bin=1, nbins)/))
     860             :       !
     861        7198 :       nclusters = SIZE(cluster_list)
     862             :       ! Put the most expensive cluster in the bin with the smallest
     863             :       ! price and repeat.
     864       49969 :       DO cluster_index = nclusters, 1, -1
     865       42771 :          cluster = cluster_list(cluster_index)
     866       42771 :          CALL cp_heap_get_first(bin_heap, bin, bin_price, found)
     867       42771 :          IF (.NOT. found) &
     868           0 :             CPABORT("No topmost heap element found.")
     869             :          !
     870       42771 :          prow = INT((bin - 1)*pgrid_gcd/npcols)
     871       42771 :          IF (prow .GE. nprows) &
     872           0 :             CPABORT("Invalid process row.")
     873       42771 :          pcol = INT((bin - 1)*pgrid_gcd/nprows)
     874       42771 :          IF (pcol .GE. npcols) &
     875           0 :             CPABORT("Invalid process column.")
     876       42771 :          row_distribution(cluster) = prow + 1
     877       42771 :          col_distribution(cluster) = pcol + 1
     878             :          !
     879       42771 :          cluster_price = cluster_prices(cluster_index)
     880       42771 :          bin_price = bin_price + cluster_price
     881       92740 :          CALL cp_heap_reset_first(bin_heap, bin_price)
     882             :       END DO
     883        7198 :       CALL cp_heap_release(bin_heap)
     884        7198 :       CALL timestop(timing_handle)
     885        7198 :    END SUBROUTINE make_basic_distribution
     886             : 
     887             : ! **************************************************************************************************
     888             : !> \brief Creates a basic spatial distribution
     889             : !>        that tries to make the corresponding blocks as homogeneous as possible
     890             : !> \param pbc_scaled_coords ...
     891             : !> \param costs ...
     892             : !> \param nprows ...
     893             : !> \param row_distribution ...
     894             : !> \param npcols ...
     895             : !> \param col_distribution ...
     896             : !> \par History
     897             : !> - Created 2010-11-11 Joost VandeVondele
     898             : ! **************************************************************************************************
     899         166 :    SUBROUTINE make_basic_spatial_distribution(pbc_scaled_coords, costs, &
     900         166 :                                               nprows, row_distribution, npcols, col_distribution)
     901             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: pbc_scaled_coords
     902             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: costs
     903             :       INTEGER, INTENT(IN)                                :: nprows
     904             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: row_distribution
     905             :       INTEGER, INTENT(IN)                                :: npcols
     906             :       INTEGER, DIMENSION(:), INTENT(OUT)                 :: col_distribution
     907             : 
     908             :       CHARACTER(len=*), PARAMETER :: routineN = 'make_basic_spatial_distribution'
     909             : 
     910             :       INTEGER                                            :: handle, iatom, natoms, nbins, pgrid_gcd
     911             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: bin_costs, distribution
     912             : 
     913         166 :       CALL timeset(routineN, handle)
     914             : 
     915         166 :       natoms = SIZE(costs)
     916         166 :       nbins = lcm(nprows, npcols)
     917         166 :       pgrid_gcd = gcd(nprows, npcols)
     918         830 :       ALLOCATE (bin_costs(nbins), distribution(natoms))
     919         498 :       bin_costs = 0
     920             : 
     921        5226 :       CALL spatial_recurse(pbc_scaled_coords, costs, (/(iatom, iatom=1, natoms)/), bin_costs, distribution, 0)
     922             : 
     923             :       ! WRITE(*, *) "Final bin costs: ", bin_costs
     924             : 
     925             :       ! final row_distribution / col_distribution
     926        2696 :       DO iatom = 1, natoms
     927        2530 :          row_distribution(iatom) = (distribution(iatom) - 1)*pgrid_gcd/npcols + 1
     928        2696 :          col_distribution(iatom) = (distribution(iatom) - 1)*pgrid_gcd/nprows + 1
     929             :       END DO
     930             : 
     931         166 :       DEALLOCATE (bin_costs, distribution)
     932             : 
     933         166 :       CALL timestop(handle)
     934             : 
     935         166 :    END SUBROUTINE make_basic_spatial_distribution
     936             : 
     937             : ! **************************************************************************************************
     938             : !> \brief ...
     939             : !> \param pbc_scaled_coords ...
     940             : !> \param costs ...
     941             : !> \param indices ...
     942             : !> \param bin_costs ...
     943             : !> \param distribution ...
     944             : !> \param level ...
     945             : ! **************************************************************************************************
     946        3206 :    RECURSIVE SUBROUTINE spatial_recurse(pbc_scaled_coords, costs, indices, bin_costs, distribution, level)
     947             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: pbc_scaled_coords
     948             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: costs, indices
     949             :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: bin_costs, distribution
     950             :       INTEGER, INTENT(IN)                                :: level
     951             : 
     952             :       INTEGER                                            :: iatom, ibin, natoms, nbins, nhalf
     953        3206 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_costs_sorted, atom_permutation, &
     954        3206 :                                                             bin_costs_sorted, permutation
     955        3206 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: coord
     956             : 
     957        3206 :       natoms = SIZE(costs)
     958        3206 :       nbins = SIZE(bin_costs)
     959        3206 :       nhalf = (natoms + 1)/2
     960             : 
     961        3206 :       IF (natoms <= nbins) THEN
     962             :          ! assign the most expensive atom to the least costly bin
     963        6744 :          ALLOCATE (bin_costs_sorted(nbins), permutation(nbins))
     964        5058 :          bin_costs_sorted(:) = bin_costs
     965        1686 :          CALL sort(bin_costs_sorted, nbins, permutation)
     966        6744 :          ALLOCATE (atom_costs_sorted(natoms), atom_permutation(natoms))
     967        4216 :          atom_costs_sorted(:) = costs
     968        1686 :          CALL sort(atom_costs_sorted, natoms, atom_permutation)
     969        1686 :          ibin = 0
     970             :          ! WRITE(*, *) "Dealing with a new bunch of atoms "
     971        4216 :          DO iatom = natoms, 1, -1
     972        2530 :             ibin = ibin + 1
     973             :             ! WRITE(*, *) "atom", indices(atom_permutation(iatom)), "cost", atom_costs_sorted(iatom), &
     974             :             !            "bin", permutation(ibin), "its cost", bin_costs(permutation(ibin))
     975             :             ! WRITE(100, '(A, I0, 3F12.6)') "A", permutation(ibin), pbc_scaled_coords(:, atom_permutation(iatom))
     976        2530 :             bin_costs(permutation(ibin)) = bin_costs(permutation(ibin)) + atom_costs_sorted(iatom)
     977        4216 :             distribution(indices(atom_permutation(iatom))) = permutation(ibin)
     978             :          END DO
     979        1686 :          DEALLOCATE (bin_costs_sorted, permutation, atom_costs_sorted, atom_permutation)
     980             :       ELSE
     981             :          ! divide atoms in two subsets, sorting according to their coordinates, alternatively x, y, z
     982             :          ! recursively do this for both subsets
     983        7600 :          ALLOCATE (coord(natoms), permutation(natoms))
     984       12354 :          coord(:) = pbc_scaled_coords(MOD(level, 3) + 1, :)
     985        1520 :          CALL sort(coord, natoms, permutation)
     986             :          CALL spatial_recurse(pbc_scaled_coords(:, permutation(1:nhalf)), costs(permutation(1:nhalf)), &
     987       36512 :                               indices(permutation(1:nhalf)), bin_costs, distribution, level + 1)
     988             :          CALL spatial_recurse(pbc_scaled_coords(:, permutation(nhalf + 1:)), costs(permutation(nhalf + 1:)), &
     989       31532 :                               indices(permutation(nhalf + 1:)), bin_costs, distribution, level + 1)
     990        1520 :          DEALLOCATE (coord, permutation)
     991             :       END IF
     992             : 
     993        3206 :    END SUBROUTINE spatial_recurse
     994             : 
     995             : ! **************************************************************************************************
     996             : !> \brief creates a distribution placing close by atoms into clusters and
     997             : !>        putting them on the same processors. Load balancing is
     998             : !>        performed by balancing sum of the cluster costs per processor
     999             : !> \param coords coordinates of the system
    1000             : !> \param scaled_coords scaled coordinates
    1001             : !> \param cell the cell_type
    1002             : !> \param costs costs per atomic block
    1003             : !> \param nprows number of precessors per row on the 2d grid
    1004             : !> \param row_distribution the resulting distribution over proc_rows of atomic blocks
    1005             : !> \param npcols number of precessors per col on the 2d grid
    1006             : !> \param col_distribution the resulting distribution over proc_cols of atomic blocks
    1007             : ! **************************************************************************************************
    1008           4 :    SUBROUTINE make_cluster_distribution(coords, scaled_coords, cell, costs, &
    1009           4 :                                         nprows, row_distribution, npcols, col_distribution)
    1010             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: coords, scaled_coords
    1011             :       TYPE(cell_type), POINTER                           :: cell
    1012             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: costs
    1013             :       INTEGER, INTENT(IN)                                :: nprows
    1014             :       INTEGER, DIMENSION(:, :), INTENT(OUT)              :: row_distribution
    1015             :       INTEGER, INTENT(IN)                                :: npcols
    1016             :       INTEGER, DIMENSION(:, :), INTENT(OUT)              :: col_distribution
    1017             : 
    1018             :       CHARACTER(len=*), PARAMETER :: routineN = 'make_cluster_distribution'
    1019             : 
    1020             :       INTEGER                                            :: handle, i, icluster, level, natom, &
    1021             :                                                             output_unit
    1022             :       INTEGER(KIND=int_8)                                :: ncluster
    1023             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: atom_to_cluster, cluster_cost, &
    1024           4 :                                                             cluster_count, cluster_to_col, &
    1025             :                                                             cluster_to_row, piv_cost, proc_cost, &
    1026           4 :                                                             sorted_cost
    1027             :       REAL(KIND=dp)                                      :: fold(3)
    1028           4 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: cluster_center, cluster_high, cluster_low
    1029             : 
    1030           4 :       CALL timeset(routineN, handle)
    1031             : 
    1032           4 :       output_unit = cp_logger_get_default_io_unit()
    1033             : 
    1034           4 :       natom = SIZE(costs)
    1035         118 :       ncluster = dbcsr_distribution_get_num_images(SUM(costs), natom, nprows, npcols)
    1036          12 :       ALLOCATE (atom_to_cluster(natom))
    1037          12 :       ALLOCATE (cluster_cost(ncluster))
    1038           8 :       ALLOCATE (cluster_to_row(ncluster))
    1039           8 :       ALLOCATE (cluster_to_col(ncluster))
    1040           8 :       ALLOCATE (sorted_cost(ncluster))
    1041           8 :       ALLOCATE (piv_cost(ncluster))
    1042          16 :       cluster_cost(:) = 0
    1043             : 
    1044           4 :       icluster = 0
    1045           4 :       CALL cluster_recurse(coords, scaled_coords, cell, costs, atom_to_cluster, ncluster, icluster, cluster_cost)
    1046             : 
    1047          16 :       sorted_cost(:) = cluster_cost(:)
    1048           4 :       CALL sort(sorted_cost, INT(ncluster), piv_cost)
    1049             : 
    1050          12 :       ALLOCATE (proc_cost(nprows))
    1051          12 :       proc_cost = 0; level = 1
    1052           4 :       CALL assign_clusters(cluster_cost, piv_cost, proc_cost, cluster_to_row, nprows)
    1053             : 
    1054          12 :       DEALLOCATE (proc_cost); ALLOCATE (proc_cost(npcols))
    1055           8 :       proc_cost = 0; level = 1
    1056           4 :       CALL assign_clusters(cluster_cost, piv_cost, proc_cost, cluster_to_col, npcols)
    1057             : 
    1058         118 :       DO i = 1, natom
    1059         114 :          row_distribution(i, 1) = cluster_to_row(atom_to_cluster(i))
    1060         114 :          row_distribution(i, 2) = atom_to_cluster(i)
    1061         114 :          col_distribution(i, 1) = cluster_to_col(atom_to_cluster(i))
    1062         118 :          col_distribution(i, 2) = atom_to_cluster(i)
    1063             :       END DO
    1064             : 
    1065             :       ! generate some statistics on clusters
    1066          12 :       ALLOCATE (cluster_center(3, ncluster))
    1067           8 :       ALLOCATE (cluster_low(3, ncluster))
    1068           8 :       ALLOCATE (cluster_high(3, ncluster))
    1069           8 :       ALLOCATE (cluster_count(ncluster))
    1070          16 :       cluster_count = 0
    1071         118 :       DO i = 1, natom
    1072         114 :          cluster_count(atom_to_cluster(i)) = cluster_count(atom_to_cluster(i)) + 1
    1073         460 :          cluster_center(:, atom_to_cluster(i)) = coords(:, i)
    1074             :       END DO
    1075          52 :       cluster_low = HUGE(0.0_dp)/2
    1076          52 :       cluster_high = -HUGE(0.0_dp)/2
    1077         118 :       DO i = 1, natom
    1078         798 :          fold = pbc(coords(:, i) - cluster_center(:, atom_to_cluster(i)), cell) + cluster_center(:, atom_to_cluster(i))
    1079         456 :          cluster_low(:, atom_to_cluster(i)) = MIN(cluster_low(:, atom_to_cluster(i)), fold(:))
    1080         460 :          cluster_high(:, atom_to_cluster(i)) = MAX(cluster_high(:, atom_to_cluster(i)), fold(:))
    1081             :       END DO
    1082           4 :       IF (output_unit > 0) THEN
    1083           2 :          WRITE (output_unit, *)
    1084           2 :          WRITE (output_unit, '(T2,A)') "Cluster distribution information"
    1085           2 :          WRITE (output_unit, '(T2,A,T48,I8)') "Number of atoms", natom
    1086           2 :          WRITE (output_unit, '(T2,A,T48,I8)') "Number of clusters", ncluster
    1087           8 :          WRITE (output_unit, '(T2,A,T48,I8)') "Largest cluster in atoms", MAXVAL(cluster_count)
    1088           8 :          WRITE (output_unit, '(T2,A,T48,I8)') "Smallest cluster in atoms", MINVAL(cluster_count)
    1089           2 :          WRITE (output_unit, '(T2,A,T48,F8.3,I8)') "Largest cartesian extend [a.u.]/cluster x=", &
    1090          10 :             MAXVAL(cluster_high(1, :) - cluster_low(1, :), MASK=(cluster_count > 0)), &
    1091          14 :             MAXLOC(cluster_high(1, :) - cluster_low(1, :), MASK=(cluster_count > 0))
    1092           2 :          WRITE (output_unit, '(T2,A,T48,F8.3,I8)') "Largest cartesian extend [a.u.]/cluster y=", &
    1093          10 :             MAXVAL(cluster_high(2, :) - cluster_low(2, :), MASK=(cluster_count > 0)), &
    1094          14 :             MAXLOC(cluster_high(2, :) - cluster_low(2, :), MASK=(cluster_count > 0))
    1095           2 :          WRITE (output_unit, '(T2,A,T48,F8.3,I8)') "Largest cartesian extend [a.u.]/cluster z=", &
    1096          10 :             MAXVAL(cluster_high(3, :) - cluster_low(3, :), MASK=(cluster_count > 0)), &
    1097          14 :             MAXLOC(cluster_high(3, :) - cluster_low(3, :), MASK=(cluster_count > 0))
    1098             :       END IF
    1099             : 
    1100           4 :       DEALLOCATE (atom_to_cluster, cluster_cost, cluster_to_row, cluster_to_col, sorted_cost, piv_cost, proc_cost)
    1101           4 :       CALL timestop(handle)
    1102             : 
    1103           8 :    END SUBROUTINE make_cluster_distribution
    1104             : 
    1105             : ! **************************************************************************************************
    1106             : !> \brief assigns the clusters to processors, tryimg to balance the cost on the nodes
    1107             : !> \param cluster_cost vector with the cost of each cluster
    1108             : !> \param piv_cost pivoting vector sorting the cluster_cost
    1109             : !> \param proc_cost cost per processor, on input 0 everywhere
    1110             : !> \param cluster_assign assgnment of clusters on proc
    1111             : !> \param nproc number of processor over which clusters are distributed
    1112             : ! **************************************************************************************************
    1113           8 :    SUBROUTINE assign_clusters(cluster_cost, piv_cost, proc_cost, cluster_assign, nproc)
    1114             :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cluster_cost, piv_cost, proc_cost, &
    1115             :                                                             cluster_assign
    1116             :       INTEGER                                            :: nproc
    1117             : 
    1118             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'assign_clusters'
    1119             : 
    1120             :       INTEGER                                            :: handle, i, ilevel, offset, &
    1121          16 :                                                             piv_pcost(nproc), sort_proc_cost(nproc)
    1122             : 
    1123           8 :       CALL timeset(routineN, handle)
    1124             : 
    1125          26 :       DO ilevel = 1, SIZE(cluster_cost)/nproc
    1126          42 :          sort_proc_cost(:) = proc_cost(:)
    1127          18 :          CALL sort(sort_proc_cost, nproc, piv_pcost)
    1128             : 
    1129          18 :          offset = (SIZE(cluster_cost)/nproc - ilevel + 1)*nproc + 1
    1130          50 :          DO i = 1, nproc
    1131          24 :             cluster_assign(piv_cost(offset - i)) = piv_pcost(i)
    1132          42 :             proc_cost(piv_pcost(i)) = proc_cost(piv_pcost(i)) + cluster_cost(piv_cost(offset - i))
    1133             :          END DO
    1134             :       END DO
    1135             : 
    1136           8 :       CALL timestop(handle)
    1137             : 
    1138           8 :    END SUBROUTINE assign_clusters
    1139             : 
    1140             : ! **************************************************************************************************
    1141             : !> \brief recursive routine to cluster atoms.
    1142             : !>        Low level uses a modified KMEANS algorithm
    1143             : !>        recursion is used to reduce cost.
    1144             : !>        each level will subdivide a cluster into smaller clusters
    1145             : !>        If only a single split is necessary atoms are assigned to the current cluster
    1146             : !> \param coord coordinates of the system
    1147             : !> \param scaled_coord scaled coordinates
    1148             : !> \param cell the cell_type
    1149             : !> \param costs costs per atomic block
    1150             : !> \param cluster_inds the atom_to cluster mapping
    1151             : !> \param ncluster number of clusters still to be created on a given recursion level
    1152             : !> \param icluster the index of the current cluster to be created
    1153             : !> \param fin_cluster_cost total cost of the final clusters
    1154             : ! **************************************************************************************************
    1155          16 :    RECURSIVE SUBROUTINE cluster_recurse(coord, scaled_coord, cell, costs, cluster_inds, ncluster, icluster, fin_cluster_cost)
    1156             :       REAL(KIND=dp), DIMENSION(:, :), INTENT(IN)         :: coord, scaled_coord
    1157             :       TYPE(cell_type), POINTER                           :: cell
    1158             :       INTEGER, DIMENSION(:), INTENT(IN)                  :: costs
    1159             :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: cluster_inds
    1160             :       INTEGER(KIND=int_8), INTENT(INOUT)                 :: ncluster
    1161             :       INTEGER, INTENT(INOUT)                             :: icluster
    1162             :       INTEGER, DIMENSION(:), INTENT(INOUT)               :: fin_cluster_cost
    1163             : 
    1164             :       INTEGER                                            :: i, ibeg, iend, maxv(1), min_seed, &
    1165             :                                                             natoms, nleft, nsplits, seed, tot_cost
    1166          16 :       INTEGER(KIND=int_8), ALLOCATABLE, DIMENSION(:)     :: ncluster_new
    1167          16 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: cluster_cost, inds_tmp, nat_cluster, piv
    1168             :       LOGICAL                                            :: found
    1169             :       REAL(KIND=dp)                                      :: balance, balance_new, conv
    1170             : 
    1171          16 :       natoms = SIZE(coord, 2)
    1172             :       ! This is a bit of an arbitrary choice, simply a try to avoid too many clusters on large systems and too few for balancing on
    1173             :       ! small systems or subclusters
    1174          16 :       IF (natoms .LE. 1) THEN
    1175           2 :          nsplits = 1
    1176             :       ELSE
    1177          14 :          nsplits = MIN(INT(MIN(INT(MAX(6, INT(60.00/LOG(REAL(natoms, KIND=dp)))), KIND=int_8), ncluster)), natoms)
    1178             :       END IF
    1179          16 :       IF (nsplits == 1) THEN
    1180          12 :          icluster = icluster + 1
    1181         126 :          cluster_inds = icluster
    1182         126 :          fin_cluster_cost(icluster) = SUM(costs)
    1183             :       ELSE
    1184          36 :          ALLOCATE (cluster_cost(nsplits), ncluster_new(nsplits), inds_tmp(natoms), piv(natoms), nat_cluster(nsplits))
    1185             :          ! initialise some values
    1186          16 :          cluster_cost = 0; seed = 300; found = .TRUE.; min_seed = seed
    1187           4 :          CALL kmeans(nsplits, coord, scaled_coord, cell, cluster_inds, nat_cluster, seed, conv)
    1188          36 :          balance = MAXVAL(REAL(nat_cluster, KIND=dp))/MINVAL(REAL(nat_cluster, KIND=dp))
    1189             : 
    1190             :          ! If the system is small enough try to do better in terms of balancing number of atoms per cluster
    1191             :          ! by changing the seed for the initial guess
    1192           4 :          IF (natoms .LT. 1000 .AND. balance .GT. 1.1) THEN
    1193          24 :             found = .FALSE.
    1194          24 :             DO i = 1, 5
    1195          24 :                IF (balance .GT. 1.1) THEN
    1196          20 :                   CALL kmeans(nsplits, coord, scaled_coord, cell, cluster_inds, nat_cluster, seed + i*40, conv)
    1197         160 :                   balance_new = MAXVAL(REAL(nat_cluster, KIND=dp))/MINVAL(REAL(nat_cluster, KIND=dp))
    1198          20 :                   IF (balance_new .LT. balance) THEN
    1199           0 :                      balance = balance_new
    1200           0 :                      min_seed = seed + i*40; 
    1201             :                   END IF
    1202             :                ELSE
    1203             :                   found = .TRUE.
    1204             :                   EXIT
    1205             :                END IF
    1206             :             END DO
    1207             :          END IF
    1208             :          !If we do not match the convergence than recompute at least the best assignment
    1209           4 :          IF (.NOT. found) CALL kmeans(nsplits, coord, scaled_coord, cell, cluster_inds, nat_cluster, min_seed, conv)
    1210             : 
    1211             :          ! compute the cost of each cluster to decide how many splits have to be performed on the next lower level
    1212         118 :          DO i = 1, natoms
    1213         118 :             cluster_cost(cluster_inds(i)) = cluster_cost(cluster_inds(i)) + costs(i)
    1214             :          END DO
    1215          16 :          tot_cost = SUM(cluster_cost)
    1216             :          ! compute new splitting, can be done more elegant
    1217          16 :          ncluster_new(:) = ncluster*cluster_cost(:)/tot_cost
    1218          16 :          nleft = INT(ncluster - SUM(ncluster_new))
    1219             :          ! As we won't have empty clusters, we can not have 0 as new size, so we correct for this at first
    1220          16 :          DO i = 1, nsplits
    1221          16 :             IF (ncluster_new(i) == 0) THEN
    1222           6 :                ncluster_new(i) = 1
    1223           6 :                nleft = nleft - 1
    1224             :             END IF
    1225             :          END DO
    1226             :          ! now comes the next part that the number of clusters will not match anymore, so try to correct in a meaningful way without
    1227             :          ! introducing 0 sized blocks again
    1228           4 :          IF (nleft .NE. 0) THEN
    1229           0 :             DO i = 1, ABS(nleft)
    1230           0 :                IF (nleft < 0) THEN
    1231           0 :                   maxv = MINLOC(cluster_cost/ncluster_new)
    1232           0 :                   IF (ncluster_new(maxv(1)) .NE. 1) THEN
    1233           0 :                      ncluster_new(maxv) = ncluster_new(maxv) - 1
    1234             :                   ELSE
    1235           0 :                      maxv = MAXLOC(ncluster_new)
    1236           0 :                      ncluster_new(maxv) = ncluster_new(maxv) - 1
    1237             :                   END IF
    1238             :                ELSE
    1239           0 :                   maxv = MAXLOC(cluster_cost/ncluster_new)
    1240           0 :                   ncluster_new(maxv) = ncluster_new(maxv) + 1
    1241             :                END IF
    1242             :             END DO
    1243             :          END IF
    1244             : 
    1245             :          !Now get the permutations to sort the atoms in the nsplits clusters for the next level of iteration
    1246         118 :          inds_tmp(:) = cluster_inds(:)
    1247           4 :          CALL sort(inds_tmp, natoms, piv)
    1248             : 
    1249           4 :          ibeg = 1; iend = 0
    1250          16 :          DO i = 1, nsplits
    1251          12 :             IF (nat_cluster(i) == 0) CYCLE
    1252          12 :             iend = iend + nat_cluster(i)
    1253             :             CALL cluster_recurse(coord(:, piv(ibeg:iend)), scaled_coord(:, piv(ibeg:iend)), cell, costs(piv(ibeg:iend)), &
    1254        1038 :                                  inds_tmp(ibeg:iend), ncluster_new(i), icluster, fin_cluster_cost)
    1255          16 :             ibeg = ibeg + nat_cluster(i)
    1256             :          END DO
    1257             :          ! copy the sorted cluster IDs on the old layout, inds_tmp gets set at the lowest level of recursion
    1258         118 :          cluster_inds(piv(:)) = inds_tmp
    1259           4 :          DEALLOCATE (cluster_cost, ncluster_new, inds_tmp, piv, nat_cluster)
    1260             : 
    1261             :       END IF
    1262             : 
    1263          16 :    END SUBROUTINE cluster_recurse
    1264             : 
    1265             : ! **************************************************************************************************
    1266             : !> \brief A modified version of the kmeans algorithm.
    1267             : !>        The assignment has a penalty function in case clusters become
    1268             : !>        larger than average. Like this more even sized clusters are created
    1269             : !>        trading it for locality
    1270             : !> \param ncent number of centers to be created
    1271             : !> \param coord coordinates
    1272             : !> \param scaled_coord scaled coord
    1273             : !> \param cell the cell_type
    1274             : !> \param cluster atom to cluster assignment
    1275             : !> \param nat_cl atoms per cluster
    1276             : !> \param seed seed for the RNG. Algorithm might need multiple tries to deliver best results
    1277             : !> \param tot_var the total variance of the clusters around the centers
    1278             : ! **************************************************************************************************
    1279          28 :    SUBROUTINE kmeans(ncent, coord, scaled_coord, cell, cluster, nat_cl, seed, tot_var)
    1280             :       INTEGER                                            :: ncent
    1281             :       REAL(KIND=dp), DIMENSION(:, :)                     :: coord, scaled_coord
    1282             :       TYPE(cell_type), POINTER                           :: cell
    1283             :       INTEGER, DIMENSION(:)                              :: cluster, nat_cl
    1284             :       INTEGER                                            :: seed
    1285             :       REAL(KIND=dp)                                      :: tot_var
    1286             : 
    1287             :       CHARACTER(len=*), PARAMETER                        :: routineN = 'kmeans'
    1288             : 
    1289             :       INTEGER                                            :: handle, i, ind, itn, j, nat, oldc
    1290             :       LOGICAL                                            :: changed
    1291          56 :       REAL(KIND=dp) :: average(3, ncent, 2), cent_coord(3, ncent), devi, deviat(ncent), dist, &
    1292          56 :          dvec(3), old_var, rn, scaled_cent(3, ncent), var_cl(ncent)
    1293          28 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: dmat
    1294             :       REAL(KIND=dp), DIMENSION(3, 2)                     :: initial_seed
    1295             :       TYPE(rng_stream_type)                              :: rng_stream
    1296             : 
    1297          28 :       CALL timeset(routineN, handle)
    1298             : 
    1299         252 :       initial_seed = REAL(seed, dp); nat = SIZE(coord, 2)
    1300         112 :       ALLOCATE (dmat(ncent, nat))
    1301             : 
    1302             :       rng_stream = rng_stream_type(name="kmeans uniform distribution [0,1]", &
    1303          28 :                                    distribution_type=UNIFORM, seed=initial_seed)
    1304             : 
    1305             : ! try to find a clever initial guess with centers being somewhat distributed
    1306          28 :       rn = rng_stream%next()
    1307          28 :       ind = CEILING(rn*nat)
    1308         112 :       cent_coord(:, 1) = coord(:, ind)
    1309          84 :       DO i = 2, ncent
    1310          28 :          DO
    1311         928 :             rn = rng_stream%next()
    1312         928 :             ind = CEILING(rn*nat)
    1313        3712 :             cent_coord(:, i) = coord(:, ind)
    1314             :             devi = HUGE(1.0_dp)
    1315        2004 :             DO j = 1, i - 1
    1316        1076 :                dvec = pbc(cent_coord(:, j), cent_coord(:, i), cell)
    1317        4304 :                dist = SQRT(DOT_PRODUCT(dvec, dvec))
    1318        2004 :                IF (dist .LT. devi) devi = dist
    1319             :             END DO
    1320         928 :             rn = rng_stream%next()
    1321         928 :             IF (rn .LT. devi**2/169.0) EXIT
    1322             :          END DO
    1323             :       END DO
    1324             : 
    1325             : ! Now start the KMEANS but penalise it in case it starts packing too many atoms into a single set
    1326             : ! Unfoirtunatelz as this is dependent on what happened before it cant be parallel
    1327         826 :       cluster = 0; old_var = HUGE(1.0_dp)
    1328         106 :       DO itn = 1, 1000
    1329        1210 :          changed = .FALSE.; var_cl = 0.0_dp; tot_var = 0.0_dp; nat_cl = 0; deviat = 0.0_dp
    1330             : !      !$OMP PARALLEL DO PRIVATE(i,j,dvec)
    1331        4402 :          DO i = 1, nat
    1332       21418 :             DO j = 1, ncent
    1333       17016 :                dvec = pbc(cent_coord(:, j), coord(:, i), cell)
    1334       72360 :                dmat(j, i) = DOT_PRODUCT(dvec, dvec)
    1335             :             END DO
    1336             :          END DO
    1337        4402 :          DO i = 1, nat
    1338        4296 :             devi = HUGE(1.0_dp); oldc = cluster(i)
    1339       21312 :             DO j = 1, ncent
    1340       17016 :                dist = dmat(j, i) + MAX(nat_cl(j)**2/nat*ncent, nat/ncent)
    1341       21312 :                IF (dist .LT. devi) THEN
    1342        8760 :                   devi = dist; cluster(i) = j
    1343             :                END IF
    1344             :             END DO
    1345        4296 :             deviat(cluster(i)) = deviat(cluster(i)) + SQRT(devi)
    1346        4296 :             nat_cl(cluster(i)) = nat_cl(cluster(i)) + 1
    1347        4296 :             tot_var = tot_var + devi
    1348        4402 :             IF (oldc .NE. cluster(i)) changed = .TRUE.
    1349             :          END DO
    1350             :          ! get the update of the centers done, add a new one in case one center lost all its atoms
    1351             :          ! the algorithm would survive, but its nice to really create what you demand
    1352         106 :          IF (tot_var .GE. old_var) EXIT
    1353         106 :          IF (changed) THEN
    1354             :             ! Here misery of computing the center of geometry of the clusters in PBC.
    1355             :             ! The mapping on the unit circle allows to circumvent all problems
    1356        2506 :             average = 0.0_dp
    1357        3576 :             DO i = 1, SIZE(coord, 2)
    1358       13992 :                average(:, cluster(i), 1) = average(:, cluster(i), 1) + COS(scaled_coord(:, i)*2.0_dp*pi)
    1359       14070 :                average(:, cluster(i), 2) = average(:, cluster(i), 2) + SIN(scaled_coord(:, i)*2.0_dp*pi)
    1360             :             END DO
    1361             : 
    1362         362 :             DO i = 1, ncent
    1363         362 :                IF (nat_cl(i) == 0) THEN
    1364           0 :                   rn = rng_stream%next()
    1365           0 :                   scaled_cent(:, i) = scaled_coord(:, CEILING(rn*nat))
    1366             :                ELSE
    1367        1136 :                   average(:, i, 1) = average(:, i, 1)/REAL(nat_cl(i), dp)
    1368        1136 :                   average(:, i, 2) = average(:, i, 2)/REAL(nat_cl(i), dp)
    1369        1136 :                   scaled_cent(:, i) = (ATAN2(-average(:, i, 2), -average(:, i, 1)) + pi)/(2.0_dp*pi)
    1370         284 :                   CALL scaled_to_real(cent_coord(:, i), scaled_cent(:, i), cell)
    1371             :                END IF
    1372             :             END DO
    1373             :          ELSE
    1374             :             EXIT
    1375             :          END IF
    1376             :       END DO
    1377             : 
    1378          28 :       CALL timestop(handle)
    1379             : 
    1380         728 :    END SUBROUTINE kmeans
    1381             : 
    1382             : END MODULE distribution_methods

Generated by: LCOV version 1.15