LCOV - code coverage report
Current view: top level - src/offload - offload_mempool.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:ca6acae) Lines: 100.0 % 137 137
Test Date: 2026-01-02 06:29:53 Functions: 100.0 % 14 14

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2026 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : #include "offload_mempool.h"
       8              : #include "../mpiwrap/cp_mpi.h"
       9              : #include "offload_library.h"
      10              : #include "offload_runtime.h"
      11              : 
      12              : #include <assert.h>
      13              : #include <inttypes.h>
      14              : #include <omp.h>
      15              : #include <stdbool.h>
      16              : #include <stdio.h>
      17              : #include <stdlib.h>
      18              : #include <string.h>
      19              : 
      20              : #if defined(__parallel)
      21              : #include <mpi.h>
      22              : #endif
      23              : 
      24              : #define OFFLOAD_MEMPOOL_PRINT(FN, MSG, OUTPUT_UNIT)                            \
      25              :   ((FN)(MSG, (int)strlen(MSG), OUTPUT_UNIT))
      26              : #define OFFLOAD_MEMPOOL_OMPALLOC 1
      27              : 
      28              : /*******************************************************************************
      29              :  * \brief Private struct for storing a chunk of memory.
      30              :  * \author Ole Schuett
      31              :  ******************************************************************************/
      32              : typedef struct offload_memchunk {
      33              :   void *mem; // first: allows to cast memchunk into mem-ptr...
      34              :   struct offload_memchunk *next;
      35              :   size_t size, used;
      36              : } offload_memchunk_t;
      37              : 
      38              : /*******************************************************************************
      39              :  * \brief Private struct for storing a memory pool.
      40              :  * \author Ole Schuett
      41              :  ******************************************************************************/
      42              : typedef struct offload_mempool {
      43              :   offload_memchunk_t *available_head, *allocated_head; // single-linked lists
      44              : } offload_mempool_t;
      45              : 
      46              : /*******************************************************************************
      47              :  * \brief Private pools for host and device memory.
      48              :  * \author Ole Schuett
      49              :  ******************************************************************************/
      50              : static offload_mempool_t mempool_host = {0}, mempool_device = {0};
      51              : 
      52              : /*******************************************************************************
      53              :  * \brief Private some counters for statistics.
      54              :  * \author Hans Pabst
      55              :  ******************************************************************************/
      56              : static uint64_t host_malloc_counter = 0, device_malloc_counter = 0;
      57              : 
      58              : /*******************************************************************************
      59              :  * \brief Private routine for actually allocating system memory.
      60              :  * \author Ole Schuett
      61              :  ******************************************************************************/
      62       131452 : static void *actual_malloc(const size_t size, const bool on_device) {
      63       131452 :   if (size == 0) {
      64              :     return NULL;
      65              :   }
      66              : 
      67       131452 :   void *memory = NULL;
      68              : 
      69              : #if defined(__OFFLOAD)
      70              :   if (on_device) {
      71              :     offload_activate_chosen_device();
      72              :     offloadMalloc(&memory, size);
      73              :   } else {
      74              :     offload_activate_chosen_device();
      75              :     offloadMallocHost(&memory, size);
      76              :   }
      77              : #elif OFFLOAD_MEMPOOL_OMPALLOC && (201811 /*v5.0*/ <= _OPENMP)
      78              :   memory = omp_alloc(size, omp_null_allocator);
      79              : #elif defined(__parallel) && !OFFLOAD_MEMPOOL_OMPALLOC
      80              :   if (MPI_SUCCESS != MPI_Alloc_mem((MPI_Aint)size, MPI_INFO_NULL, &memory)) {
      81              :     fprintf(stderr, "ERROR: MPI_Alloc_mem failed at %s:%i\n", name, __FILE__,
      82              :             __LINE__);
      83              :     MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
      84              :   }
      85              : #else
      86       131452 :   memory = malloc(size);
      87              : #endif
      88              : 
      89              :   // Update statistics.
      90       131452 :   if (on_device) {
      91        44109 : #pragma omp atomic
      92              :     ++device_malloc_counter;
      93              :   } else {
      94        87343 : #pragma omp atomic
      95              :     ++host_malloc_counter;
      96              :   }
      97              : 
      98       131452 :   assert(memory != NULL);
      99              :   return memory;
     100              : }
     101              : 
     102              : /*******************************************************************************
     103              :  * \brief Private routine for actually freeing system memory.
     104              :  * \author Ole Schuett
     105              :  ******************************************************************************/
     106       240548 : static void actual_free(void *memory, const bool on_device) {
     107       240548 :   if (NULL == memory) {
     108              :     return;
     109              :   }
     110              : 
     111              : #if defined(__OFFLOAD)
     112              :   if (on_device) {
     113              :     offload_activate_chosen_device();
     114              :     offloadFree(memory);
     115              :   } else {
     116              :     offload_activate_chosen_device();
     117              :     offloadFreeHost(memory);
     118              :   }
     119              : #elif OFFLOAD_MEMPOOL_OMPALLOC && (201811 /*v5.0*/ <= _OPENMP)
     120              :   (void)on_device; // mark used
     121              :   omp_free(memory, omp_null_allocator);
     122              : #elif defined(__parallel) && !OFFLOAD_MEMPOOL_OMPALLOC
     123              :   (void)on_device; // mark used
     124              :   if (MPI_SUCCESS != MPI_Free_mem(memory)) {
     125              :     fprintf(stderr, "ERROR: MPI_Free_mem failed at %s:%i\n", name, __FILE__,
     126              :             __LINE__);
     127              :     MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
     128              :   }
     129              : #else
     130       131436 :   (void)on_device; // mark used
     131       131436 :   free(memory);
     132              : #endif
     133              : }
     134              : 
     135              : /*******************************************************************************
     136              :  * \brief Private routine for allocating host or device memory from the pool.
     137              :  * \author Ole Schuett and Hans Pabst
     138              :  ******************************************************************************/
     139      3679162 : static void *internal_mempool_malloc(offload_mempool_t *pool, const size_t size,
     140              :                                      const bool on_device) {
     141      3679162 :   if (size == 0) {
     142              :     return NULL;
     143              :   }
     144              : 
     145      3624296 :   offload_memchunk_t *chunk;
     146              : 
     147      7248592 : #pragma omp critical(offload_mempool_modify)
     148              :   {
     149              :     // Find a possible chunk to reuse or reclaim in available list.
     150      3624296 :     offload_memchunk_t **reuse = NULL,
     151      3624296 :                        **reclaim = NULL; // ** for easy list removal
     152      3624296 :     offload_memchunk_t **indirect = &pool->available_head;
     153     68737362 :     while (*indirect != NULL) {
     154     66048949 :       const size_t s = (*indirect)->size;
     155     66048949 :       if (size <= s && (reuse == NULL || s < (*reuse)->size)) {
     156      5245463 :         reuse = indirect; // reuse smallest suitable chunk
     157      5245463 :         if (s == size) {
     158              :           break; // perfect match, exit early
     159              :         }
     160     60803486 :       } else if (reclaim == NULL || (*reclaim)->size < s) {
     161      6564603 :         reclaim = indirect; // reclaim largest unsuitable chunk
     162              :       }
     163     65113066 :       indirect = &(*indirect)->next;
     164              :     }
     165              : 
     166              :     // Select an existing chunk or allocate a new one.
     167      3624296 :     if (reuse != NULL) {
     168              :       // Reusing an exising chunk that's already large enough.
     169      3492844 :       chunk = *reuse;
     170      3492844 :       *reuse = chunk->next; // remove chunk from available list.
     171       131452 :     } else if (reclaim != NULL) {
     172              :       // Reclaiming an existing chunk (resize will happen outside crit. region).
     173        22340 :       chunk = *reclaim;
     174        22340 :       *reclaim = chunk->next; // remove chunk from available list.
     175              :     } else {
     176              :       // Found no available chunk, allocate a new one.
     177       109112 :       chunk = calloc(1, sizeof(offload_memchunk_t));
     178       109112 :       assert(chunk != NULL);
     179              :     }
     180              :   }
     181              : 
     182              :   // Resize chunk outside of critical region before adding it to allocated list.
     183      3624296 :   if (chunk->size < size) {
     184       131452 :     actual_free(chunk->mem, on_device);
     185       131452 :     chunk->mem = actual_malloc(size, on_device);
     186       131452 :     chunk->size = size;
     187              :   }
     188              : 
     189      3624296 :   chunk->used = size; // for statistics
     190              : 
     191              :   // Insert chunk into allocated list.
     192      3624296 : #pragma omp critical(offload_mempool_modify)
     193              :   {
     194      3624296 :     chunk->next = pool->allocated_head;
     195      3624296 :     pool->allocated_head = chunk;
     196              :   }
     197              : 
     198      3624296 :   return chunk->mem;
     199              : }
     200              : 
     201              : /*******************************************************************************
     202              :  * \brief Internal routine for allocating host memory from the pool.
     203              :  * \author Ole Schuett
     204              :  ******************************************************************************/
     205      3404220 : void *offload_mempool_host_malloc(const size_t size) {
     206      3404220 :   return internal_mempool_malloc(&mempool_host, size, false);
     207              : }
     208              : 
     209              : /*******************************************************************************
     210              :  * \brief Internal routine for allocating device memory from the pool
     211              :  * \author Ole Schuett
     212              :  ******************************************************************************/
     213       274942 : void *offload_mempool_device_malloc(const size_t size) {
     214       274942 :   return internal_mempool_malloc(&mempool_device, size, true);
     215              : }
     216              : 
     217              : /*******************************************************************************
     218              :  * \brief Private routine for releasing memory back to the pool.
     219              :  * \author Ole Schuett
     220              :  ******************************************************************************/
     221      4296337 : static void internal_mempool_free(offload_mempool_t *pool, const void *mem) {
     222      4296337 :   if (mem == NULL) {
     223              :     return;
     224              :   }
     225              : 
     226      7248592 : #pragma omp critical(offload_mempool_modify)
     227              :   {
     228              :     // Find chunk in allocated list.
     229      3624296 :     offload_memchunk_t **indirect = &pool->allocated_head;
     230     13413686 :     while (*indirect != NULL && (*indirect)->mem != mem) {
     231      9789390 :       indirect = &(*indirect)->next;
     232              :     }
     233      3624296 :     offload_memchunk_t *chunk = *indirect;
     234      3624296 :     assert(chunk != NULL && chunk->mem == mem);
     235              : 
     236              :     // Remove chunk from allocated list.
     237      3624296 :     *indirect = chunk->next;
     238              : 
     239              :     // Add chunk to available list.
     240      3624296 :     chunk->next = pool->available_head;
     241      3624296 :     pool->available_head = chunk;
     242              :   }
     243              : }
     244              : 
     245              : /*******************************************************************************
     246              :  * \brief Internal routine for releasing memory back to the pool.
     247              :  * \author Ole Schuett
     248              :  ******************************************************************************/
     249      4021395 : void offload_mempool_host_free(const void *memory) {
     250      4021395 :   internal_mempool_free(&mempool_host, memory);
     251      4021395 : }
     252              : 
     253              : /*******************************************************************************
     254              :  * \brief Internal routine for releasing memory back to the pool.
     255              :  * \author Ole Schuett
     256              :  ******************************************************************************/
     257       274942 : void offload_mempool_device_free(const void *memory) {
     258       274942 :   internal_mempool_free(&mempool_device, memory);
     259       274942 : }
     260              : 
     261              : /*******************************************************************************
     262              :  * \brief Private routine for freeing all memory in the pool.
     263              :  * \author Ole Schuett
     264              :  ******************************************************************************/
     265        18600 : static void internal_mempool_clear(offload_mempool_t *pool,
     266              :                                    const bool on_device) {
     267        37200 : #pragma omp critical(offload_mempool_modify)
     268              :   {
     269              :     // Check for leaks, i.e. that the allocated list is empty.
     270        18600 :     assert(pool->allocated_head == NULL);
     271              : 
     272              :     // Free all chunks in available list.
     273       127696 :     while (pool->available_head != NULL) {
     274       109096 :       offload_memchunk_t *chunk = pool->available_head;
     275       109096 :       pool->available_head = chunk->next; // remove chunk
     276       109096 :       actual_free(chunk->mem, on_device);
     277       109096 :       free(chunk);
     278              :     }
     279              :   }
     280        18600 : }
     281              : 
     282              : /*******************************************************************************
     283              :  * \brief Internal routine for freeing all memory in the pool.
     284              :  * \author Ole Schuett
     285              :  ******************************************************************************/
     286         9300 : void offload_mempool_clear(void) {
     287         9300 :   internal_mempool_clear(&mempool_host, false);
     288         9300 :   internal_mempool_clear(&mempool_device, true);
     289         9300 : }
     290              : 
     291              : /*******************************************************************************
     292              :  * \brief Private routine for summing alloc sizes of all chunks in given list.
     293              :  * \author Ole Schuett
     294              :  ******************************************************************************/
     295        37664 : static uint64_t sum_chunks_size(const offload_memchunk_t *head) {
     296        37664 :   uint64_t size_sum = 0;
     297       147620 :   for (const offload_memchunk_t *chunk = head; chunk != NULL;
     298       109956 :        chunk = chunk->next) {
     299       109956 :     size_sum += chunk->size;
     300              :   }
     301        37664 :   return size_sum;
     302              : }
     303              : 
     304              : /*******************************************************************************
     305              :  * \brief Private routine for summing used sizes of all chunks in given list.
     306              :  * \author Ole Schuett
     307              :  ******************************************************************************/
     308        37664 : static uint64_t sum_chunks_used(const offload_memchunk_t *head) {
     309        37664 :   uint64_t used_sum = 0;
     310       147620 :   for (const offload_memchunk_t *chunk = head; chunk != NULL;
     311       109956 :        chunk = chunk->next) {
     312       109956 :     used_sum += chunk->used;
     313              :   }
     314        37664 :   return used_sum;
     315              : }
     316              : 
     317              : /*******************************************************************************
     318              :  * \brief Internal routine to query statistics.
     319              :  * \author Hans Pabst
     320              :  ******************************************************************************/
     321         9416 : void offload_mempool_stats_get(offload_mempool_stats_t *memstats) {
     322         9416 :   assert(NULL != memstats);
     323        18832 : #pragma omp critical(offload_mempool_modify)
     324              :   {
     325         9416 :     memstats->host_mallocs = host_malloc_counter;
     326         9416 :     memstats->host_used = sum_chunks_used(mempool_host.available_head) +
     327         9416 :                           sum_chunks_used(mempool_host.allocated_head);
     328         9416 :     memstats->host_size = sum_chunks_size(mempool_host.available_head) +
     329         9416 :                           sum_chunks_size(mempool_host.allocated_head);
     330              : 
     331         9416 :     memstats->device_mallocs = device_malloc_counter;
     332         9416 :     memstats->device_used = sum_chunks_used(mempool_device.available_head) +
     333         9416 :                             sum_chunks_used(mempool_device.allocated_head);
     334         9416 :     memstats->device_size = sum_chunks_size(mempool_device.available_head) +
     335         9416 :                             sum_chunks_size(mempool_device.allocated_head);
     336              :   }
     337         9416 : }
     338              : 
     339              : /*******************************************************************************
     340              :  * \brief Print allocation statistics..
     341              :  * \author Hans Pabst
     342              :  ******************************************************************************/
     343         9416 : void offload_mempool_stats_print(int fortran_comm,
     344              :                                  void (*print_func)(const char *, int, int),
     345              :                                  int output_unit) {
     346         9416 :   assert(omp_get_num_threads() == 1);
     347              : 
     348         9416 :   char buffer[100];
     349         9416 :   const cp_mpi_comm_t comm = cp_mpi_comm_f2c(fortran_comm);
     350         9416 :   offload_mempool_stats_t memstats;
     351         9416 :   offload_mempool_stats_get(&memstats);
     352         9416 :   cp_mpi_max_uint64(&memstats.device_mallocs, 1, comm);
     353         9416 :   cp_mpi_max_uint64(&memstats.host_mallocs, 1, comm);
     354              : 
     355         9416 :   if (0 != memstats.device_mallocs || 0 != memstats.host_mallocs) {
     356         8042 :     OFFLOAD_MEMPOOL_PRINT(print_func, "\n", output_unit);
     357         8042 :     OFFLOAD_MEMPOOL_PRINT(
     358              :         print_func,
     359              :         " ----------------------------------------------------------------"
     360              :         "---------------\n",
     361              :         output_unit);
     362         8042 :     OFFLOAD_MEMPOOL_PRINT(
     363              :         print_func,
     364              :         " -                                                               "
     365              :         "              -\n",
     366              :         output_unit);
     367              : 
     368         8042 :     OFFLOAD_MEMPOOL_PRINT(
     369              :         print_func,
     370              :         " -                          OFFLOAD MEMPOOL STATISTICS           "
     371              :         "              -\n",
     372              :         output_unit);
     373         8042 :     OFFLOAD_MEMPOOL_PRINT(
     374              :         print_func,
     375              :         " -                                                               "
     376              :         "              -\n",
     377              :         output_unit);
     378         8042 :     OFFLOAD_MEMPOOL_PRINT(
     379              :         print_func,
     380              :         " ----------------------------------------------------------------"
     381              :         "---------------\n",
     382              :         output_unit);
     383         8042 :     OFFLOAD_MEMPOOL_PRINT(print_func,
     384              :                           " Memory consumption               "
     385              :                           " Number of allocations  Used [MiB]  Size [MiB]\n",
     386              :                           output_unit);
     387              :   }
     388         9416 :   if (0 < memstats.device_mallocs) {
     389         8028 :     cp_mpi_max_uint64(&memstats.device_size, 1, comm);
     390         8028 :     snprintf(buffer, sizeof(buffer),
     391              :              " Device                            "
     392              :              " %20" PRIuPTR "  %10" PRIuPTR "  %10" PRIuPTR "\n",
     393         8028 :              (uintptr_t)memstats.device_mallocs,
     394         8028 :              (uintptr_t)((memstats.device_used + (512U << 10)) >> 20),
     395         8028 :              (uintptr_t)((memstats.device_size + (512U << 10)) >> 20));
     396         8028 :     OFFLOAD_MEMPOOL_PRINT(print_func, buffer, output_unit);
     397              :   }
     398         9416 :   if (0 < memstats.host_mallocs) {
     399         8042 :     cp_mpi_max_uint64(&memstats.host_size, 1, comm);
     400         8042 :     snprintf(buffer, sizeof(buffer),
     401              :              " Host                              "
     402              :              " %20" PRIuPTR "  %10" PRIuPTR "  %10" PRIuPTR "\n",
     403         8042 :              (uintptr_t)memstats.host_mallocs,
     404         8042 :              (uintptr_t)((memstats.host_used + (512U << 10)) >> 20),
     405         8042 :              (uintptr_t)((memstats.host_size + (512U << 10)) >> 20));
     406         8042 :     OFFLOAD_MEMPOOL_PRINT(print_func, buffer, output_unit);
     407              :   }
     408         9416 :   if (0 < memstats.device_mallocs || 0 < memstats.host_mallocs) {
     409         8042 :     OFFLOAD_MEMPOOL_PRINT(
     410              :         print_func,
     411              :         " ----------------------------------------------------------------"
     412              :         "---------------\n",
     413              :         output_unit);
     414              :   }
     415         9416 : }
     416              : 
     417              : // EOF
        

Generated by: LCOV version 2.0-1