blob: d5b3191b9b10ceeb9e6e963d390d0708bb2dd296 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
Willy Tarreau62405a22014-12-23 13:51:28 +01002 * include/common/memory.h
3 * Memory management definitions..
4 *
5 * Copyright (C) 2000-2014 Willy Tarreau - w@1wt.eu
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation, version 2.1
10 * exclusively.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
Willy Tarreaubaaee002006-06-26 02:48:02 +020021
Willy Tarreau2dd0d472006-06-29 17:53:05 +020022#ifndef _COMMON_MEMORY_H
23#define _COMMON_MEMORY_H
Willy Tarreaubaaee002006-06-26 02:48:02 +020024
Willy Tarreau158fa752017-11-22 15:47:29 +010025#include <sys/mman.h>
26
Willy Tarreaubaaee002006-06-26 02:48:02 +020027#include <stdlib.h>
Willy Tarreaue430e772014-12-23 14:13:16 +010028#include <string.h>
Willy Tarreaua7280a12018-11-26 19:41:40 +010029#include <unistd.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020030
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020031#include <haproxy/api.h>
Willy Tarreau853b2972020-05-27 18:01:47 +020032#include <haproxy/list.h>
Willy Tarreau3f567e42020-05-28 15:29:19 +020033#include <haproxy/thread.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020034
Willy Tarreau3bc4e8b2020-05-09 09:02:35 +020035/* On architectures supporting threads and double-word CAS, we can implement
36 * lock-less memory pools. This isn't supported for debugging modes however.
37 */
38#if defined(USE_THREAD) && defined(HA_HAVE_CAS_DW) && !defined(DEBUG_NO_LOCKLESS_POOLS) && !defined(DEBUG_UAF) && !defined(DEBUG_FAIL_ALLOC)
39#define CONFIG_HAP_LOCKLESS_POOLS
40#endif
41
Willy Tarreaua84dcb82015-10-28 12:04:02 +010042#ifndef DEBUG_DONT_SHARE_POOLS
Willy Tarreau50e608d2007-05-13 18:26:08 +020043#define MEM_F_SHARED 0x1
Willy Tarreaua84dcb82015-10-28 12:04:02 +010044#else
45#define MEM_F_SHARED 0
46#endif
Willy Tarreau581bf812016-01-25 02:19:13 +010047#define MEM_F_EXACT 0x2
Willy Tarreau50e608d2007-05-13 18:26:08 +020048
Willy Tarreauac421112015-10-28 15:09:29 +010049/* reserve an extra void* at the end of a pool for linking */
50#ifdef DEBUG_MEMORY_POOLS
51#define POOL_EXTRA (sizeof(void *))
52#define POOL_LINK(pool, item) (void **)(((char *)item) + (pool->size))
53#else
54#define POOL_EXTRA (0)
55#define POOL_LINK(pool, item) ((void **)(item))
56#endif
57
Willy Tarreau0a93b642018-10-16 07:58:39 +020058#define MAX_BASE_POOLS 32
59
Willy Tarreaue18db9e2018-10-16 10:28:54 +020060struct pool_cache_head {
61 struct list list; /* head of objects in this pool */
62 size_t size; /* size of an object */
63 unsigned int count; /* number of objects in this pool */
64};
65
66struct pool_cache_item {
67 struct list by_pool; /* link to objects in this pool */
68 struct list by_lru; /* link to objects by LRU order */
69};
70
Willy Tarreau7f0165e2018-11-26 17:09:46 +010071extern struct pool_cache_head pool_cache[][MAX_BASE_POOLS];
Willy Tarreaue18db9e2018-10-16 10:28:54 +020072extern THREAD_LOCAL size_t pool_cache_bytes; /* total cache size */
73extern THREAD_LOCAL size_t pool_cache_count; /* #cache objects */
74
Willy Tarreauf161d0f2018-02-22 14:05:55 +010075#ifdef CONFIG_HAP_LOCKLESS_POOLS
Olivier Houchardcf975d42018-01-24 18:38:31 +010076struct pool_free_list {
77 void **free_list;
78 uintptr_t seq;
79};
80#endif
81
Willy Tarreau21072b92020-05-29 17:23:05 +020082/* Note below, in case of lockless pools, we still need the lock only for
83 * the flush() operation.
84 */
Willy Tarreau50e608d2007-05-13 18:26:08 +020085struct pool_head {
Willy Tarreau1ca1b702017-11-26 10:50:36 +010086 void **free_list;
Willy Tarreauf161d0f2018-02-22 14:05:55 +010087#ifdef CONFIG_HAP_LOCKLESS_POOLS
Olivier Houchardcf975d42018-01-24 18:38:31 +010088 uintptr_t seq;
Olivier Houchardcf975d42018-01-24 18:38:31 +010089#endif
Willy Tarreauaf613e82020-06-05 08:40:51 +020090 __decl_thread(HA_SPINLOCK_T lock); /* the spin lock */
Willy Tarreau50e608d2007-05-13 18:26:08 +020091 unsigned int used; /* how many chunks are currently in use */
Willy Tarreaua1e4f8c2020-05-08 08:31:56 +020092 unsigned int needed_avg;/* floating indicator between used and allocated */
Willy Tarreau50e608d2007-05-13 18:26:08 +020093 unsigned int allocated; /* how many chunks have been allocated */
94 unsigned int limit; /* hard limit on the number of chunks */
95 unsigned int minavail; /* how many chunks are expected to be used */
96 unsigned int size; /* chunk size */
97 unsigned int flags; /* MEM_F_* */
Willy Tarreau7dcd46d2007-05-14 00:16:13 +020098 unsigned int users; /* number of pools sharing this zone */
Willy Tarreau58102cf2015-10-28 16:24:21 +010099 unsigned int failed; /* failed allocations */
Olivier Houchardcf975d42018-01-24 18:38:31 +0100100 struct list list; /* list of all known pools */
Willy Tarreau7dcd46d2007-05-14 00:16:13 +0200101 char name[12]; /* name of the pool */
Willy Tarreau1ca1b702017-11-26 10:50:36 +0100102} __attribute__((aligned(64)));
Willy Tarreau50e608d2007-05-13 18:26:08 +0200103
Willy Tarreau0a93b642018-10-16 07:58:39 +0200104
105extern struct pool_head pool_base_start[MAX_BASE_POOLS];
106extern unsigned int pool_base_count;
107
Willy Tarreau067ac9f2015-10-08 14:12:13 +0200108/* poison each newly allocated area with this byte if >= 0 */
109extern int mem_poison_byte;
Willy Tarreau50e608d2007-05-13 18:26:08 +0200110
Willy Tarreaua885f6d2014-12-03 15:25:28 +0100111/* Allocates new entries for pool <pool> until there are at least <avail> + 1
112 * available, then returns the last one for immediate use, so that at least
113 * <avail> are left available in the pool upon return. NULL is returned if the
114 * last entry could not be allocated. It's important to note that at least one
115 * allocation is always performed even if there are enough entries in the pool.
116 * A call to the garbage collector is performed at most once in case malloc()
117 * returns an error, before returning NULL.
Willy Tarreau50e608d2007-05-13 18:26:08 +0200118 */
Christopher Fauletb349e482017-08-29 09:52:38 +0200119void *__pool_refill_alloc(struct pool_head *pool, unsigned int avail);
Willy Tarreaua885f6d2014-12-03 15:25:28 +0100120void *pool_refill_alloc(struct pool_head *pool, unsigned int avail);
Willy Tarreau50e608d2007-05-13 18:26:08 +0200121
122/* Try to find an existing shared pool with the same characteristics and
123 * returns it, otherwise creates this one. NULL is returned if no memory
124 * is available for a new creation.
125 */
126struct pool_head *create_pool(char *name, unsigned int size, unsigned int flags);
Willy Tarreau7107c8b2018-11-26 11:44:35 +0100127void create_pool_callback(struct pool_head **ptr, char *name, unsigned int size);
128
129/* This registers a call to create_pool_callback(ptr, name, size) */
130#define REGISTER_POOL(ptr, name, size) \
131 INITCALL3(STG_POOL, create_pool_callback, (ptr), (name), (size))
132
133/* This macro declares a pool head <ptr> and registers its creation */
134#define DECLARE_POOL(ptr, name, size) \
135 struct pool_head *(ptr) = NULL; \
136 REGISTER_POOL(&ptr, name, size)
137
138/* This macro declares a static pool head <ptr> and registers its creation */
139#define DECLARE_STATIC_POOL(ptr, name, size) \
140 static struct pool_head *(ptr); \
141 REGISTER_POOL(&ptr, name, size)
Willy Tarreau50e608d2007-05-13 18:26:08 +0200142
143/* Dump statistics on pools usage.
144 */
Willy Tarreau12833bb2014-01-28 16:49:56 +0100145void dump_pools_to_trash();
Willy Tarreau50e608d2007-05-13 18:26:08 +0200146void dump_pools(void);
Willy Tarreau58102cf2015-10-28 16:24:21 +0100147int pool_total_failures();
148unsigned long pool_total_allocated();
149unsigned long pool_total_used();
Willy Tarreau50e608d2007-05-13 18:26:08 +0200150
151/*
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200152 * This function frees whatever can be freed in pool <pool>.
153 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100154void pool_flush(struct pool_head *pool);
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200155
156/*
157 * This function frees whatever can be freed in all pools, but respecting
158 * the minimum thresholds imposed by owners.
Christopher Fauletb349e482017-08-29 09:52:38 +0200159 *
Willy Tarreaubafbe012017-11-24 17:34:44 +0100160 * <pool_ctx> is used when pool_gc is called to release resources to allocate
Christopher Fauletb349e482017-08-29 09:52:38 +0200161 * an element in __pool_refill_alloc. It is important because <pool_ctx> is
162 * already locked, so we need to skip the lock here.
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200163 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100164void pool_gc(struct pool_head *pool_ctx);
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200165
166/*
167 * This function destroys a pull by freeing it completely.
168 * This should be called only under extreme circumstances.
169 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100170void *pool_destroy(struct pool_head *pool);
Willy Tarreau2455ceb2018-11-26 15:57:34 +0100171void pool_destroy_all();
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200172
Willy Tarreau0a93b642018-10-16 07:58:39 +0200173/* returns the pool index for pool <pool>, or -1 if this pool has no index */
174static inline ssize_t pool_get_index(const struct pool_head *pool)
175{
176 size_t idx;
177
178 idx = pool - pool_base_start;
179 if (idx >= MAX_BASE_POOLS)
180 return -1;
181 return idx;
182}
183
Willy Tarreaua1e4f8c2020-05-08 08:31:56 +0200184/* The two functions below were copied from freq_ctr.h's swrate_add, impossible
185 * to use here due to include dependency hell again!
186 */
187#define POOL_AVG_SAMPLES 1024
188
189static inline unsigned int pool_avg_add(unsigned int *sum, unsigned int v)
190{
191 unsigned int new_sum, old_sum;
192 unsigned int n = POOL_AVG_SAMPLES;
193
194 old_sum = *sum;
195 do {
196 new_sum = old_sum - (old_sum + n - 1) / n + v;
197 } while (!_HA_ATOMIC_CAS(sum, &old_sum, new_sum));
198 return new_sum;
199}
200
201/* make the new value <v> count for 1/4 of the total sum */
202static inline unsigned int pool_avg_bump(unsigned int *sum, unsigned int v)
203{
204 unsigned int new_sum, old_sum;
205 unsigned int n = POOL_AVG_SAMPLES;
206
207 old_sum = *sum;
208 do {
209 new_sum = old_sum - (old_sum + 3) / 4;
210 new_sum += (n * v + 3) / 4;
211 } while (!_HA_ATOMIC_CAS(sum, &old_sum, new_sum));
212 return new_sum;
213}
214
215static inline unsigned int pool_avg(unsigned int sum)
216{
217 unsigned int n = POOL_AVG_SAMPLES;
218
219 return (sum + n - 1) / n;
220}
221
Willy Tarreau63a87382020-05-08 08:38:24 +0200222/* returns true if the pool is considered to have too many free objects */
223static inline int pool_is_crowded(const struct pool_head *pool)
224{
225 return pool->allocated >= pool_avg(pool->needed_avg + pool->needed_avg / 4) &&
226 (int)(pool->allocated - pool->used) >= pool->minavail;
227}
228
Willy Tarreauf161d0f2018-02-22 14:05:55 +0100229#ifdef CONFIG_HAP_LOCKLESS_POOLS
Willy Tarreaue18db9e2018-10-16 10:28:54 +0200230
231/* Tries to retrieve an object from the local pool cache corresponding to pool
232 * <pool>. Returns NULL if none is available.
233 */
234static inline void *__pool_get_from_cache(struct pool_head *pool)
235{
236 ssize_t idx = pool_get_index(pool);
237 struct pool_cache_item *item;
Willy Tarreau7f0165e2018-11-26 17:09:46 +0100238 struct pool_cache_head *ph;
Willy Tarreaue18db9e2018-10-16 10:28:54 +0200239
240 /* pool not in cache */
241 if (idx < 0)
242 return NULL;
243
Willy Tarreau7f0165e2018-11-26 17:09:46 +0100244 ph = &pool_cache[tid][idx];
245 if (LIST_ISEMPTY(&ph->list))
246 return NULL; // empty
Willy Tarreaue18db9e2018-10-16 10:28:54 +0200247
Willy Tarreau7f0165e2018-11-26 17:09:46 +0100248 item = LIST_NEXT(&ph->list, typeof(item), by_pool);
249 ph->count--;
250 pool_cache_bytes -= ph->size;
Willy Tarreaue18db9e2018-10-16 10:28:54 +0200251 pool_cache_count--;
252 LIST_DEL(&item->by_pool);
253 LIST_DEL(&item->by_lru);
Willy Tarreau8e9f4532018-10-28 20:09:12 +0100254#ifdef DEBUG_MEMORY_POOLS
255 /* keep track of where the element was allocated from */
256 *POOL_LINK(pool, item) = (void *)pool;
257#endif
Willy Tarreaue18db9e2018-10-16 10:28:54 +0200258 return item;
259}
260
Olivier Houchardcf975d42018-01-24 18:38:31 +0100261/*
262 * Returns a pointer to type <type> taken from the pool <pool_type> if
263 * available, otherwise returns NULL. No malloc() is attempted, and poisonning
264 * is never performed. The purpose is to get the fastest possible allocation.
265 */
266static inline void *__pool_get_first(struct pool_head *pool)
267{
268 struct pool_free_list cmp, new;
Willy Tarreaue18db9e2018-10-16 10:28:54 +0200269 void *ret = __pool_get_from_cache(pool);
270
271 if (ret)
272 return ret;
Olivier Houchardcf975d42018-01-24 18:38:31 +0100273
274 cmp.seq = pool->seq;
275 __ha_barrier_load();
276
277 cmp.free_list = pool->free_list;
278 do {
Olivier Houchard899fb8a2020-03-18 15:48:29 +0100279 if (cmp.free_list == NULL)
Olivier Houchardcf975d42018-01-24 18:38:31 +0100280 return NULL;
281 new.seq = cmp.seq + 1;
282 __ha_barrier_load();
283 new.free_list = *POOL_LINK(pool, cmp.free_list);
Willy Tarreau6a38b322019-05-11 18:04:24 +0200284 } while (HA_ATOMIC_DWCAS((void *)&pool->free_list, (void *)&cmp, (void *)&new) == 0);
Olivier Houchard20872762019-03-08 18:53:35 +0100285 __ha_barrier_atomic_store();
Tim Duesterhus05f6a432018-02-20 00:49:46 +0100286
Olivier Houchard20872762019-03-08 18:53:35 +0100287 _HA_ATOMIC_ADD(&pool->used, 1);
Olivier Houchardcf975d42018-01-24 18:38:31 +0100288#ifdef DEBUG_MEMORY_POOLS
289 /* keep track of where the element was allocated from */
290 *POOL_LINK(pool, cmp.free_list) = (void *)pool;
291#endif
292 return cmp.free_list;
293}
294
295static inline void *pool_get_first(struct pool_head *pool)
296{
297 void *ret;
298
299 ret = __pool_get_first(pool);
300 return ret;
301}
302/*
303 * Returns a pointer to type <type> taken from the pool <pool_type> or
304 * dynamically allocated. In the first case, <pool_type> is updated to point to
305 * the next element in the list. No memory poisonning is ever performed on the
306 * returned area.
307 */
308static inline void *pool_alloc_dirty(struct pool_head *pool)
309{
310 void *p;
311
312 if ((p = __pool_get_first(pool)) == NULL)
313 p = __pool_refill_alloc(pool, 0);
314 return p;
315}
316
Willy Tarreaue6ce59d2007-05-13 19:38:49 +0200317/*
Olivier Houchardcf975d42018-01-24 18:38:31 +0100318 * Returns a pointer to type <type> taken from the pool <pool_type> or
319 * dynamically allocated. In the first case, <pool_type> is updated to point to
320 * the next element in the list. Memory poisonning is performed if enabled.
321 */
322static inline void *pool_alloc(struct pool_head *pool)
323{
324 void *p;
325
326 p = pool_alloc_dirty(pool);
Olivier Houchardcf975d42018-01-24 18:38:31 +0100327 if (p && mem_poison_byte >= 0) {
328 memset(p, mem_poison_byte, pool->size);
329 }
330
331 return p;
332}
333
Willy Tarreau146794d2018-10-16 08:55:15 +0200334/* Locklessly add item <ptr> to pool <pool>, then update the pool used count.
335 * Both the pool and the pointer must be valid. Use pool_free() for normal
336 * operations.
337 */
338static inline void __pool_free(struct pool_head *pool, void *ptr)
339{
Willy Tarreau7a6ad882018-10-20 17:37:38 +0200340 void **free_list = pool->free_list;
Willy Tarreau146794d2018-10-16 08:55:15 +0200341
Olivier Houchard20872762019-03-08 18:53:35 +0100342 _HA_ATOMIC_SUB(&pool->used, 1);
Willy Tarreau63a87382020-05-08 08:38:24 +0200343
344 if (unlikely(pool_is_crowded(pool))) {
345 free(ptr);
346 _HA_ATOMIC_SUB(&pool->allocated, 1);
347 } else {
348 do {
349 *POOL_LINK(pool, ptr) = (void *)free_list;
350 __ha_barrier_store();
351 } while (!_HA_ATOMIC_CAS(&pool->free_list, &free_list, ptr));
352 __ha_barrier_atomic_store();
353 }
Willy Tarreaua1e4f8c2020-05-08 08:31:56 +0200354 pool_avg_add(&pool->needed_avg, pool->used);
Willy Tarreau146794d2018-10-16 08:55:15 +0200355}
356
Willy Tarreaue18db9e2018-10-16 10:28:54 +0200357/* frees an object to the local cache, possibly pushing oldest objects to the
358 * global pool.
359 */
360void __pool_put_to_cache(struct pool_head *pool, void *ptr, ssize_t idx);
361static inline void pool_put_to_cache(struct pool_head *pool, void *ptr)
362{
363 ssize_t idx = pool_get_index(pool);
364
365 /* pool not in cache or too many objects for this pool (more than
366 * half of the cache is used and this pool uses more than 1/8 of
367 * the cache size).
368 */
369 if (idx < 0 ||
370 (pool_cache_bytes > CONFIG_HAP_POOL_CACHE_SIZE * 3 / 4 &&
Willy Tarreau7f0165e2018-11-26 17:09:46 +0100371 pool_cache[tid][idx].count >= 16 + pool_cache_count / 8)) {
Willy Tarreaue18db9e2018-10-16 10:28:54 +0200372 __pool_free(pool, ptr);
373 return;
374 }
375 __pool_put_to_cache(pool, ptr, idx);
376}
377
Olivier Houchardcf975d42018-01-24 18:38:31 +0100378/*
379 * Puts a memory area back to the corresponding pool.
380 * Items are chained directly through a pointer that
381 * is written in the beginning of the memory area, so
382 * there's no need for any carrier cell. This implies
383 * that each memory area is at least as big as one
384 * pointer. Just like with the libc's free(), nothing
385 * is done if <ptr> is NULL.
386 */
387static inline void pool_free(struct pool_head *pool, void *ptr)
388{
389 if (likely(ptr != NULL)) {
Olivier Houchardcf975d42018-01-24 18:38:31 +0100390#ifdef DEBUG_MEMORY_POOLS
391 /* we'll get late corruption if we refill to the wrong pool or double-free */
392 if (*POOL_LINK(pool, ptr) != (void *)pool)
Willy Tarreaue4d42552020-03-14 11:08:16 +0100393 *DISGUISE((volatile int *)0) = 0;
Olivier Houchardcf975d42018-01-24 18:38:31 +0100394#endif
Willy Tarreauda520352019-11-15 06:59:54 +0100395 if (mem_poison_byte >= 0)
396 memset(ptr, mem_poison_byte, pool->size);
Willy Tarreaue18db9e2018-10-16 10:28:54 +0200397 pool_put_to_cache(pool, ptr);
Olivier Houchardcf975d42018-01-24 18:38:31 +0100398 }
399}
400
Willy Tarreauf161d0f2018-02-22 14:05:55 +0100401#else /* CONFIG_HAP_LOCKLESS_POOLS */
Olivier Houchardcf975d42018-01-24 18:38:31 +0100402/*
Willy Tarreau02622412014-12-08 16:35:23 +0100403 * Returns a pointer to type <type> taken from the pool <pool_type> if
404 * available, otherwise returns NULL. No malloc() is attempted, and poisonning
405 * is never performed. The purpose is to get the fastest possible allocation.
Willy Tarreau50e608d2007-05-13 18:26:08 +0200406 */
Christopher Fauletb349e482017-08-29 09:52:38 +0200407static inline void *__pool_get_first(struct pool_head *pool)
Willy Tarreaue430e772014-12-23 14:13:16 +0100408{
409 void *p;
410
Willy Tarreau02622412014-12-08 16:35:23 +0100411 if ((p = pool->free_list) != NULL) {
Willy Tarreauac421112015-10-28 15:09:29 +0100412 pool->free_list = *POOL_LINK(pool, p);
Willy Tarreaue430e772014-12-23 14:13:16 +0100413 pool->used++;
Willy Tarreaude30a682015-10-28 15:23:51 +0100414#ifdef DEBUG_MEMORY_POOLS
415 /* keep track of where the element was allocated from */
416 *POOL_LINK(pool, p) = (void *)pool;
417#endif
Willy Tarreaue430e772014-12-23 14:13:16 +0100418 }
419 return p;
420}
Willy Tarreau50e608d2007-05-13 18:26:08 +0200421
Christopher Fauletb349e482017-08-29 09:52:38 +0200422static inline void *pool_get_first(struct pool_head *pool)
423{
424 void *ret;
425
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100426 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Christopher Fauletb349e482017-08-29 09:52:38 +0200427 ret = __pool_get_first(pool);
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100428 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Christopher Fauletb349e482017-08-29 09:52:38 +0200429 return ret;
430}
Willy Tarreau50e608d2007-05-13 18:26:08 +0200431/*
Willy Tarreau02622412014-12-08 16:35:23 +0100432 * Returns a pointer to type <type> taken from the pool <pool_type> or
433 * dynamically allocated. In the first case, <pool_type> is updated to point to
434 * the next element in the list. No memory poisonning is ever performed on the
435 * returned area.
436 */
437static inline void *pool_alloc_dirty(struct pool_head *pool)
438{
439 void *p;
440
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100441 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Christopher Fauletb349e482017-08-29 09:52:38 +0200442 if ((p = __pool_get_first(pool)) == NULL)
443 p = __pool_refill_alloc(pool, 0);
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100444 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Willy Tarreau02622412014-12-08 16:35:23 +0100445 return p;
446}
447
Willy Tarreau158fa752017-11-22 15:47:29 +0100448#ifndef DEBUG_UAF /* normal allocator */
449
Willy Tarreauf13322e2017-11-22 10:50:54 +0100450/* allocates an area of size <size> and returns it. The semantics are similar
451 * to those of malloc().
452 */
453static inline void *pool_alloc_area(size_t size)
454{
455 return malloc(size);
456}
457
458/* frees an area <area> of size <size> allocated by pool_alloc_area(). The
459 * semantics are identical to free() except that the size is specified and
460 * may be ignored.
461 */
462static inline void pool_free_area(void *area, size_t __maybe_unused size)
463{
464 free(area);
465}
466
Willy Tarreau158fa752017-11-22 15:47:29 +0100467#else /* use-after-free detector */
468
469/* allocates an area of size <size> and returns it. The semantics are similar
470 * to those of malloc(). However the allocation is rounded up to 4kB so that a
471 * full page is allocated. This ensures the object can be freed alone so that
472 * future dereferences are easily detected. The returned object is always
Willy Tarreau364d7452018-02-22 14:14:23 +0100473 * 16-bytes aligned to avoid issues with unaligned structure objects. In case
474 * some padding is added, the area's start address is copied at the end of the
475 * padding to help detect underflows.
Willy Tarreau158fa752017-11-22 15:47:29 +0100476 */
Olivier Houchard62975a72018-10-21 01:33:11 +0200477#include <errno.h>
Willy Tarreau158fa752017-11-22 15:47:29 +0100478static inline void *pool_alloc_area(size_t size)
479{
480 size_t pad = (4096 - size) & 0xFF0;
Willy Tarreau229e7392019-08-08 07:38:19 +0200481 int isolated;
Willy Tarreau5a9cce42018-02-22 11:39:23 +0100482 void *ret;
Willy Tarreau158fa752017-11-22 15:47:29 +0100483
Willy Tarreau229e7392019-08-08 07:38:19 +0200484 isolated = thread_isolated();
485 if (!isolated)
486 thread_harmless_now();
Olivier Houchard62975a72018-10-21 01:33:11 +0200487 ret = mmap(NULL, (size + 4095) & -4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
Willy Tarreau85b2cae2019-07-04 16:18:23 +0200488 if (ret != MAP_FAILED) {
489 /* let's dereference the page before returning so that the real
490 * allocation in the system is performed without holding the lock.
491 */
492 *(int *)ret = 0;
493 if (pad >= sizeof(void *))
494 *(void **)(ret + pad - sizeof(void *)) = ret + pad;
495 ret += pad;
496 } else {
497 ret = NULL;
498 }
Willy Tarreau229e7392019-08-08 07:38:19 +0200499 if (!isolated)
500 thread_harmless_end();
Willy Tarreau85b2cae2019-07-04 16:18:23 +0200501 return ret;
Willy Tarreau158fa752017-11-22 15:47:29 +0100502}
503
504/* frees an area <area> of size <size> allocated by pool_alloc_area(). The
505 * semantics are identical to free() except that the size must absolutely match
Willy Tarreau364d7452018-02-22 14:14:23 +0100506 * the one passed to pool_alloc_area(). In case some padding is added, the
507 * area's start address is compared to the one at the end of the padding, and
508 * a segfault is triggered if they don't match, indicating an underflow.
Willy Tarreau158fa752017-11-22 15:47:29 +0100509 */
510static inline void pool_free_area(void *area, size_t size)
511{
512 size_t pad = (4096 - size) & 0xFF0;
513
Willy Tarreau364d7452018-02-22 14:14:23 +0100514 if (pad >= sizeof(void *) && *(void **)(area - sizeof(void *)) != area)
Willy Tarreaue4d42552020-03-14 11:08:16 +0100515 *DISGUISE((volatile int *)0) = 0;
Willy Tarreau364d7452018-02-22 14:14:23 +0100516
Willy Tarreau85b2cae2019-07-04 16:18:23 +0200517 thread_harmless_now();
Willy Tarreau158fa752017-11-22 15:47:29 +0100518 munmap(area - pad, (size + 4095) & -4096);
Willy Tarreau85b2cae2019-07-04 16:18:23 +0200519 thread_harmless_end();
Willy Tarreau158fa752017-11-22 15:47:29 +0100520}
521
522#endif /* DEBUG_UAF */
523
Willy Tarreau02622412014-12-08 16:35:23 +0100524/*
525 * Returns a pointer to type <type> taken from the pool <pool_type> or
526 * dynamically allocated. In the first case, <pool_type> is updated to point to
527 * the next element in the list. Memory poisonning is performed if enabled.
528 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100529static inline void *pool_alloc(struct pool_head *pool)
Willy Tarreau02622412014-12-08 16:35:23 +0100530{
531 void *p;
532
533 p = pool_alloc_dirty(pool);
Willy Tarreaude30a682015-10-28 15:23:51 +0100534 if (p && mem_poison_byte >= 0) {
Willy Tarreau02622412014-12-08 16:35:23 +0100535 memset(p, mem_poison_byte, pool->size);
Willy Tarreaude30a682015-10-28 15:23:51 +0100536 }
537
Willy Tarreau02622412014-12-08 16:35:23 +0100538 return p;
539}
540
541/*
Willy Tarreau50e608d2007-05-13 18:26:08 +0200542 * Puts a memory area back to the corresponding pool.
543 * Items are chained directly through a pointer that
544 * is written in the beginning of the memory area, so
545 * there's no need for any carrier cell. This implies
546 * that each memory area is at least as big as one
Willy Tarreau48d63db2008-08-03 17:41:33 +0200547 * pointer. Just like with the libc's free(), nothing
548 * is done if <ptr> is NULL.
Willy Tarreau50e608d2007-05-13 18:26:08 +0200549 */
Willy Tarreaubafbe012017-11-24 17:34:44 +0100550static inline void pool_free(struct pool_head *pool, void *ptr)
Willy Tarreaue430e772014-12-23 14:13:16 +0100551{
552 if (likely(ptr != NULL)) {
Willy Tarreaude30a682015-10-28 15:23:51 +0100553#ifdef DEBUG_MEMORY_POOLS
554 /* we'll get late corruption if we refill to the wrong pool or double-free */
555 if (*POOL_LINK(pool, ptr) != (void *)pool)
Willy Tarreaue4d42552020-03-14 11:08:16 +0100556 *DISGUISE((volatile int *)0) = 0;
Willy Tarreaude30a682015-10-28 15:23:51 +0100557#endif
Willy Tarreau158fa752017-11-22 15:47:29 +0100558
559#ifndef DEBUG_UAF /* normal pool behaviour */
Willy Tarreau3e853ea2019-07-04 11:30:00 +0200560 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Willy Tarreau3e853ea2019-07-04 11:30:00 +0200561 pool->used--;
Willy Tarreau63a87382020-05-08 08:38:24 +0200562 if (pool_is_crowded(pool)) {
563 free(ptr);
564 pool->allocated--;
565 } else {
566 *POOL_LINK(pool, ptr) = (void *)pool->free_list;
567 pool->free_list = (void *)ptr;
568 }
Willy Tarreaua1e4f8c2020-05-08 08:31:56 +0200569 pool_avg_add(&pool->needed_avg, pool->used);
Willy Tarreau3e853ea2019-07-04 11:30:00 +0200570 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Willy Tarreau158fa752017-11-22 15:47:29 +0100571#else /* release the entry for real to detect use after free */
572 /* ensure we crash on double free or free of a const area*/
573 *(uint32_t *)ptr = 0xDEADADD4;
574 pool_free_area(ptr, pool->size + POOL_EXTRA);
Willy Tarreau3e853ea2019-07-04 11:30:00 +0200575 HA_SPIN_LOCK(POOL_LOCK, &pool->lock);
Willy Tarreau158fa752017-11-22 15:47:29 +0100576 pool->allocated--;
Willy Tarreau3e853ea2019-07-04 11:30:00 +0200577 pool->used--;
Willy Tarreaua1e4f8c2020-05-08 08:31:56 +0200578 pool_avg_add(&pool->needed_avg, pool->used);
Christopher Faulet2a944ee2017-11-07 10:42:54 +0100579 HA_SPIN_UNLOCK(POOL_LOCK, &pool->lock);
Willy Tarreau3e853ea2019-07-04 11:30:00 +0200580#endif /* DEBUG_UAF */
Willy Tarreaue430e772014-12-23 14:13:16 +0100581 }
582}
Willy Tarreauf161d0f2018-02-22 14:05:55 +0100583#endif /* CONFIG_HAP_LOCKLESS_POOLS */
Willy Tarreau2dd0d472006-06-29 17:53:05 +0200584#endif /* _COMMON_MEMORY_H */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200585
586/*
587 * Local variables:
588 * c-indent-level: 8
589 * c-basic-offset: 8
590 * End:
591 */